Friday, February 3, 2017

Linear Regression Visualisation using Seaborn



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.basemap import Basemap
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'
In [2]:
cars = pd.read_csv("http://web.pdx.edu/~gerbing/data/cars.csv")
In [3]:
cars.head()
Out[3]:
Model MPG Cylinders Engine Disp Horsepower Weight Accelerate Year Origin
0 amc ambassador dpl 15.0 8 390.0 190 3850 8.5 70 American
1 amc gremlin 21.0 6 199.0 90 2648 15.0 70 American
2 amc hornet 18.0 6 199.0 97 2774 15.5 70 American
3 amc rebel sst 16.0 8 304.0 150 3433 12.0 70 American
4 buick estate wagon (sw) 14.0 8 455.0 225 3086 10.0 70 American
In [8]:
cars.columns = ["model","mpg","cylinder","engine","hp","weight","accelerate","year","origin"]
In [18]:
cars.head()
Out[18]:
model mpg cylinder engine hp weight accelerate year origin
0 amc ambassador dpl 15.0 8 390.0 190 3850 8.5 70 American
1 amc gremlin 21.0 6 199.0 90 2648 15.0 70 American
2 amc hornet 18.0 6 199.0 97 2774 15.5 70 American
3 amc rebel sst 16.0 8 304.0 150 3433 12.0 70 American
4 buick estate wagon (sw) 14.0 8 455.0 225 3086 10.0 70 American

Regression Plot

In [17]:
sns.lmplot(x='weight', y='hp', data=cars, size=5, aspect=2)
plt.title("Weight vs HP")
plt.show()

Generating residual plot of the regression line

In [25]:
fig = plt.figure(figsize=(10,8))
sns.residplot(x = 'weight',y = 'hp', data = cars, color = "green")
Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f1618d0>
In [24]:
fig = plt.figure(figsize=(10,8))
sns.residplot(x='hp', y='mpg', data=cars, color='green')
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x11ef3f4e0>

Higher order regressions

In [27]:
# Generate a scatter plot of 'weight' and 'mpg' using red circles
plt.scatter(cars['weight'], cars['mpg'], label='data', color='red', marker='o')
Out[27]:
<matplotlib.collections.PathCollection at 0x11fe96588>
In [35]:
# Plot in blue a linear regression of order 1 between 'weight' and 'mpg'
# Generate a scatter plot of 'weight' and 'mpg' using red circles
plt.figure(figsize=(10,8))
plt.scatter(cars['weight'], cars['mpg'], label='data', color='red', marker='o')
sns.regplot(x='weight', y='mpg', data=cars, scatter=None, color='blue', label='order 1')
# Plot in green a linear regression of order 2 between 'weight' and 'mpg'
sns.regplot(x='weight', y='mpg', data=cars, scatter=None, order=2, color='green', label='order 2')

plt.legend(loc='upper right')
Out[35]:
<matplotlib.legend.Legend at 0x1212b50f0>
In [37]:
sns.lmplot(x='weight', y='hp', data=cars, hue='origin', palette='Set1')
Out[37]:
<seaborn.axisgrid.FacetGrid at 0x121515ef0>
<matplotlib.figure.Figure at 0x121bcdac8>

Grouping linear regressions by row or column

In [40]:
sns.lmplot(x='weight', y='hp', data=cars, col='origin')
Out[40]:
<seaborn.axisgrid.FacetGrid at 0x117ccacc0>