Wednesday, July 12, 2017

Python for Data Analysis: Matplotlib



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings('ignore')
In [2]:
plt.plot(np.arange(10))
Out[2]:
[<matplotlib.lines.Line2D at 0x1170dd4e0>]
In [19]:
fig = plt.figure(figsize=(10,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax1.plot(np.random.randn(50).cumsum())
ax2.plot(np.random.randn(50).cumsum())
ax3.plot(np.random.randn(50).cumsum())
Out[19]:
[<matplotlib.lines.Line2D at 0x117ef47f0>]
In [10]:
plt.plot(np.random.randn(50).cumsum(), 'k--')
Out[10]:
[<matplotlib.lines.Line2D at 0x117823630>]
In [20]:
np.random.randn(50).cumsum()
Out[20]:
array([ -1.27151916,  -2.59263093,  -1.36963461,  -0.15767807,
        -0.46038967,  -0.88472351,  -1.72576629,  -2.35962792,
        -2.4149889 ,  -3.99999618,  -5.55828585,  -4.00123898,
        -4.09655013,  -4.18725737,  -3.58542347,  -3.43221186,
        -5.71828916,  -4.83495968,  -6.00132637,  -4.26417814,
        -4.00288293,  -4.03792049,  -4.42462774,  -4.31498023,
        -4.48951022,  -5.780037  ,  -6.05237602,  -4.84322739,
        -4.99295429,  -2.88437542,  -4.2114043 ,  -3.24408027,
        -3.44759988,  -4.8351261 ,  -6.50245802,  -6.50555987,
        -6.39274789,  -6.96087746,  -5.88146577,  -5.68982717,
        -7.22333554,  -7.35737438,  -8.59295647,  -9.43071727,
       -10.01216583,  -9.35799485,  -9.85610915, -12.04219967,
       -10.07705358,  -8.81087437])
In [21]:
np.random.randn(50)
Out[21]:
array([ 1.28679649,  0.23974566,  1.1491516 ,  0.02834109,  0.23616748,
       -1.00200392, -0.64689202, -0.63444447,  0.58756627,  0.43588888,
        0.39807528,  0.32829297, -0.47143378, -0.64964832, -2.1330079 ,
       -0.21682732,  1.03542918, -1.99394021, -2.5156884 , -1.34445941,
       -0.83866025, -0.35098097,  0.51865451, -0.28584907, -0.09132289,
        0.44870196,  0.69724048, -0.4476485 , -0.87657024,  0.2530584 ,
       -1.48125762,  0.7465853 , -0.58607457, -1.23233546,  0.59483498,
       -0.44849835,  0.41740588, -0.04229125,  0.34400788, -2.75247057,
       -1.9850818 ,  0.12443813,  1.72909423, -0.55339689, -0.52656201,
        0.73816988, -0.40378774, -0.41787295, -0.58145532,  0.05892673])
In [29]:
plt.plot(np.random.randn(30), "og--")
Out[29]:
[<matplotlib.lines.Line2D at 0x11d186828>]
In [32]:
plt.plot(np.random.randn(30),color = "green", linestyle = "--", marker = "o")
Out[32]:
[<matplotlib.lines.Line2D at 0x11d5bf668>]
In [33]:
data = np.random.randn(30).cumsum()
In [37]:
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')
Out[37]:
<matplotlib.legend.Legend at 0x11dca8710>
In [38]:
from numpy.random import randn
In [48]:
fig = plt.figure(figsize=(10,8)); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'g', label='one')
ax.plot(randn(1000).cumsum(), 'r--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
ax.legend(loc='best')
Out[48]:
<matplotlib.legend.Legend at 0x1200cf5c0>

Annotate

In [79]:
from datetime import datetime

fig = plt.figure(figsize=(18,10))
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

#spx.plot(ax=ax, style='k-')
ax.plot(spx, "k-")

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 50),
                xytext=(date, spx.asof(date) + 200),
                arrowprops=dict(facecolor='red'),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in 2008-2009 financial crisis')
Out[79]:
<matplotlib.text.Text at 0x12771e978>

Bar plots

In [62]:
fig = plt.figure(figsize=(10,8))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=ax1, color='k', alpha=0.7)
data.plot.barh(ax=ax2, color='k', alpha=0.7)
Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x12336bb70>
In [65]:
df = pd.DataFrame(np.random.rand(6, 4), index=['one', 'two', 'three', 'four', 'five', 'six'], \
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
In [66]:
df
Out[66]:
Genus A B C D
one 0.937337 0.601791 0.771729 0.582258
two 0.022157 0.057519 0.171492 0.730448
three 0.835999 0.433387 0.074650 0.561878
four 0.214437 0.080940 0.170560 0.582689
five 0.318198 0.980626 0.469795 0.937800
six 0.478719 0.799570 0.419184 0.448964
In [69]:
df.plot(kind = 'bar', figsize=(10,8))
Out[69]:
<matplotlib.axes._subplots.AxesSubplot at 0x123ece390>
In [71]:
df.plot.barh(stacked=True, alpha=0.5, figsize = (10,8))
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x1241292b0>
In [72]:
comp1 = np.random.normal(0, 1, size=200)  # Normal(0, 1)
comp2 = np.random.normal(10, 2, size=200)  # Normal(10, 2)
In [73]:
values = pd.Series(np.concatenate([comp1, comp2]))
In [74]:
sns.distplot(values, bins=100, color='k')
Out[74]:
<matplotlib.axes._subplots.AxesSubplot at 0x12482de10>
In [ ]:
 

No comments :

Post a Comment