In [5]:
iris = pd.read_csv("https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv")
In [6]:
iris.head()
Out[6]:
In [10]:
iris.shape[0]
Out[10]:
In [8]:
plt.hist(iris["petal_length"])
Out[8]:
Square root rule¶
Normally people use the square root of the sample as a bin size
In [12]:
plt.hist(iris["petal_length"],bins=int(np.sqrt(iris.shape[0])))
Out[12]:
Swarm plot¶
In [13]:
sns.swarmplot(x = "species", y = "petal_length", data = iris)
Out[13]:
Empirical Cumulative Distributive function¶
In [14]:
def ecdf(data):
"""Compute ECDF for a one-dimensional array of measurements."""
# Number of data points: n
n = len(data)
# x-data for the ECDF: x
x = np.sort(data)
# y-data for the ECDF: y
y = np.arange(1, n+1) / n
return x, y
ECDF plot for petal length¶
In [16]:
x,y = ecdf(iris["petal_length"])
In [24]:
plt.plot(x,y, marker = '.', linestyle = 'none')
plt.margins(0.02)
plt.xlabel('petal length (cm)')
plt.ylabel("ECDF")
Out[24]:
No comments :
Post a Comment