Multiple Linear Regression
| Creating a Multiple linear regression model for estimating CO2 emission from a car model using independent variables like Engine size, cylinder, fuel consumption of a car. | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import pylab as pl | |
| import numpy as np | |
| %matplotlib inline | |
| df = pd.read_csv("FuelConsumptionCo2.csv") | |
| #df.head() | |
| cdf = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','CO2EMISSIONS']] | |
| #cdf.head(9) | |
| # Plotting | |
| plt.scatter(cdf.ENGINESIZE, cdf.CO2EMISSIONS, color = 'blue') | |
| plt.xlabel('Engine Size') | |
| plt.ylabel('Emissions') | |
| #plt.show() | |
| # Creating train and test dataset | |
| msk = np.random.rand(len(df)) < 0.8 | |
| train = cdf[msk] | |
| test = cdf[~msk] | |
| # train data distribution | |
| plt.scatter(train.ENGINESIZE, train.CO2EMISSIONS, color='blue') | |
| plt.xlabel("Engine size") | |
| plt.ylabel("Emission") | |
| #plt.show() | |
| #### MULTIPLE REGRESSION MODEL #### | |
| from sklearn import linear_model | |
| regr = linear_model.LinearRegression() | |
| x = np.asanyarray(train[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']]) | |
| y = np.asanyarray(train[['CO2EMISSIONS']]) | |
| regr.fit (x, y) | |
| # The coefficients | |
| #print ('Coefficients: ', regr.coef_) | |
| # Prediction | |
| y_hat= regr.predict(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']]) | |
| x = np.asanyarray(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']]) | |
| y = np.asanyarray(test[['CO2EMISSIONS']]) | |
| #print("Residual sum of squares: %.2f" % np.mean((y_hat - y) ** 2)) | |
| # Explained variance score: 1 is perfect prediction | |
| print('Variance score: %.2f' % regr.score(x, y)) |
Comments
Post a Comment