## Friday, February 3, 2017

### Empirical Cumulative Distribution Function

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.basemap import Basemap
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

In [5]:
iris = pd.read_csv("https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv")

In [6]:
iris.head()

Out[6]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [10]:
iris.shape[0]

Out[10]:
150
In [8]:
plt.hist(iris["petal_length"])

Out[8]:
(array([ 37.,  13.,   0.,   3.,   8.,  26.,  29.,  18.,  11.,   5.]),
array([ 1.  ,  1.59,  2.18,  2.77,  3.36,  3.95,  4.54,  5.13,  5.72,
6.31,  6.9 ]),
<a list of 10 Patch objects>)

### Square root rule¶

Normally people use the square root of the sample as a bin size
In [12]:
plt.hist(iris["petal_length"],bins=int(np.sqrt(iris.shape[0])))

Out[12]:
(array([ 23.,  27.,   0.,   0.,   3.,   8.,  18.,  25.,  18.,  17.,   7.,
4.]),
array([ 1.        ,  1.49166667,  1.98333333,  2.475     ,  2.96666667,
3.45833333,  3.95      ,  4.44166667,  4.93333333,  5.425     ,
5.91666667,  6.40833333,  6.9       ]),
<a list of 12 Patch objects>)

### Swarm plot¶

In [13]:
sns.swarmplot(x = "species", y = "petal_length", data = iris)

Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x11ce6f550>

### Empirical Cumulative Distributive function¶

In [14]:
def ecdf(data):
"""Compute ECDF for a one-dimensional array of measurements."""

# Number of data points: n
n = len(data)

# x-data for the ECDF: x
x = np.sort(data)

# y-data for the ECDF: y
y = np.arange(1, n+1) / n

return x, y


### ECDF plot for petal length¶

In [16]:
x,y = ecdf(iris["petal_length"])

In [24]:
plt.plot(x,y, marker = '.', linestyle = 'none')
plt.margins(0.02)
plt.xlabel('petal length (cm)')
plt.ylabel("ECDF")

Out[24]:
<matplotlib.text.Text at 0x11e7d9978>