In [2]:
cars = pd.read_csv("http://web.pdx.edu/~gerbing/data/cars.csv")
In [3]:
cars.head()
Out[3]:
In [8]:
cars.columns = ["model","mpg","cylinder","engine","hp","weight","accelerate","year","origin"]
In [18]:
cars.head()
Out[18]:
Regression Plot¶
In [17]:
sns.lmplot(x='weight', y='hp', data=cars, size=5, aspect=2)
plt.title("Weight vs HP")
plt.show()
Generating residual plot of the regression line¶
In [25]:
fig = plt.figure(figsize=(10,8))
sns.residplot(x = 'weight',y = 'hp', data = cars, color = "green")
Out[25]:
In [24]:
fig = plt.figure(figsize=(10,8))
sns.residplot(x='hp', y='mpg', data=cars, color='green')
Out[24]:
Higher order regressions¶
In [27]:
# Generate a scatter plot of 'weight' and 'mpg' using red circles
plt.scatter(cars['weight'], cars['mpg'], label='data', color='red', marker='o')
Out[27]:
In [35]:
# Plot in blue a linear regression of order 1 between 'weight' and 'mpg'
# Generate a scatter plot of 'weight' and 'mpg' using red circles
plt.figure(figsize=(10,8))
plt.scatter(cars['weight'], cars['mpg'], label='data', color='red', marker='o')
sns.regplot(x='weight', y='mpg', data=cars, scatter=None, color='blue', label='order 1')
# Plot in green a linear regression of order 2 between 'weight' and 'mpg'
sns.regplot(x='weight', y='mpg', data=cars, scatter=None, order=2, color='green', label='order 2')
plt.legend(loc='upper right')
Out[35]:
In [37]:
sns.lmplot(x='weight', y='hp', data=cars, hue='origin', palette='Set1')
Out[37]:
Grouping linear regressions by row or column¶
In [40]:
sns.lmplot(x='weight', y='hp', data=cars, col='origin')
Out[40]: