In [4]:
cars = pd.read_csv("http://web.pdx.edu/~gerbing/data/cars.csv")
In [9]:
cars.columns = ["model","mpg","cyl","engine","hp","weight","accelerate","year","origin"]
In [10]:
cars.head()
Out[10]:
Strip plots¶
(Single continuous data vs categorical data)
In [16]:
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
sns.stripplot(x='cyl', y='hp', data=cars)
Out[16]:
In [17]:
plt.figure(figsize=(10,8))
plt.subplot(2,1,2)
sns.stripplot(x='cyl', y='hp', data=cars, jitter=True, size=5)
Out[17]:
Swarm plots¶
- Single continuous data vs categorical data
- It spreads out datapoint for better visualization
In [19]:
plt.figure(figsize=(10,8))
plt.subplot(2,1,1)
sns.swarmplot(x='cyl', y='hp', data=cars)
Out[19]:
In [22]:
plt.figure(figsize=(10,10))
plt.subplot(2,1,2)
sns.swarmplot(x='hp', y='cyl', data=cars, hue='origin', orient='h')
Out[22]:
Both Strip plots and Swarm plots are meaningless(overplotting) for visualizing a large dataset.
Violin plots¶
- Similar to box plots
In [26]:
plt.figure(figsize=(10,10))
plt.subplot(2,1,1)
sns.violinplot(x='cyl', y='hp', data=cars)
Out[26]:
Combining strip plot and violin plot¶
In [27]:
plt.figure(figsize=(10,10))
plt.subplot(2,1,2)
sns.violinplot(x='cyl', y='hp', data=cars, inner=None, color='lightgray')
# Overlay a strip plot on the violin plot
sns.stripplot(x='cyl', y='hp', data=cars, jitter=True, size=1.5)
Out[27]:
No comments :
Post a Comment