Friday, January 20, 2017

Numpy and Pandas for 2D data


2D Numpy array

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# Subway ridership for 5 stations on 10 different days
ridership = np.array([
    [   0,    0,    2,    5,    0],
    [1478, 3877, 3674, 2328, 2539],
    [1613, 4088, 3991, 6461, 2691],
    [1560, 3392, 3826, 4787, 2613],
    [1608, 4802, 3932, 4477, 2705],
    [1576, 3933, 3909, 4979, 2685],
    [  95,  229,  255,  496,  201],
    [   2,    0,    1,   27,    0],
    [1438, 3785, 3589, 4174, 2215],
    [1342, 4043, 4009, 4665, 3033]
])

Accessing Numpy array:

In [2]:
print(ridership[1,4])  # In Python list of lists, we can access using a[i][j]. But, in numpy, it's a[i,j]
2539
In [3]:
print(len(ridership))
10
In [4]:
print(len(ridership[0]))
5
In [5]:
print(ridership[1:6,0:4])
[[1478 3877 3674 2328]
 [1613 4088 3991 6461]
 [1560 3392 3826 4787]
 [1608 4802 3932 4477]
 [1576 3933 3909 4979]]

vectorized operations

In [6]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
In [7]:
print(a)
print(b)
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]
In [8]:
print(a+b)
[[ 2  3  4]
 [ 6  7  8]
 [10 11 12]]
In [9]:
first_col = ridership[:,0:1]
In [10]:
print(first_col)
[[   0]
 [1478]
 [1613]
 [1560]
 [1608]
 [1576]
 [  95]
 [   2]
 [1438]
 [1342]]
In [11]:
print(first_col.mean())
1071.2
In [12]:
print(ridership.mean(axis=0)) #mean for column
[ 1071.2  2814.9  2718.8  3239.9  1868.2]
In [13]:
print(ridership.mean(axis = 1)) #mean for row
[  1.40000000e+00   2.77920000e+03   3.76880000e+03   3.23560000e+03
   3.50480000e+03   3.41640000e+03   2.55200000e+02   6.00000000e+00
   3.04020000e+03   3.41840000e+03]

pandas DataFrame

In [14]:
student = pd.DataFrame({"name":["Arun","Prakash","Balaji","TR","Chidu"],
                       "city":["chennai","chennai","trichy","madurai","madurai"],
                       "age":[25,24,25,26,27]})
In [15]:
print(student)
   age     city     name
0   25  chennai     Arun
1   24  chennai  Prakash
2   25   trichy   Balaji
3   26  madurai       TR
4   27  madurai    Chidu
In [16]:
student = pd.DataFrame(data = student, index=["student1","student2","student3","student4","student5"])
print(student)
          age city name
student1  NaN  NaN  NaN
student2  NaN  NaN  NaN
student3  NaN  NaN  NaN
student4  NaN  NaN  NaN
student5  NaN  NaN  NaN
In [17]:
student = {"name":["Arun","Prakash","Balaji","TR","Chidu"],
                       "city":["chennai","chennai","trichy","madurai","madurai"],
                       "age":[25,24,25,26,27]}
In [18]:
student = pd.DataFrame(data = student, index=["student1","student2","student3","student4","student5"])
print(student)
          age     city     name
student1   25  chennai     Arun
student2   24  chennai  Prakash
student3   25   trichy   Balaji
student4   26  madurai       TR
student5   27  madurai    Chidu
In [19]:
student.loc["student1"]
Out[19]:
age          25
city    chennai
name       Arun
Name: student1, dtype: object
In [20]:
student.iloc[0]
Out[20]:
age          25
city    chennai
name       Arun
Name: student1, dtype: object
In [21]:
student.loc["student1","name"]
Out[21]:
'Arun'

converting pandas dataframe to numpy array

In [22]:
print(student.values)
[[25 'chennai' 'Arun']
 [24 'chennai' 'Prakash']
 [25 'trichy' 'Balaji']
 [26 'madurai' 'TR']
 [27 'madurai' 'Chidu']]
In [23]:
print(type(student.values))
<class 'numpy.ndarray'>

pandas practice

In [24]:
df_2 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=['A', 'B', 'C'])
In [25]:
print(df_2)
   A  B  C
0  0  1  2
1  3  4  5
In [26]:
df_2 = pd.DataFrame(data = df_2.values, columns=["A","B","C"],index= ["row1","row2"])
In [27]:
print(df_2)
      A  B  C
row1  0  1  2
row2  3  4  5

Calculating correlation between variables (Pearson's r method)

Earlier, we calculated the correlation by just checking the relationship between increase or decrease in value of one variable with another variable. Here, we are going to calculate correlation between two variables using pearson's r method.
  • Standardize the variable values ( Pandas tip : use var.std(ddof=0) , uncorrected standard deviation)
  • Multiply each pair of values and then take average.
  • The value of r range in between -1 and +1.
  • If the values is positive, then it's positively correlated, if it's negative, then it's negatively correlated.
In [28]:
subway_df = pd.read_csv("nyc_subway_weather.csv")
In [29]:
print(subway_df.head())
   UNIT     DATEn     TIMEn  ENTRIESn   EXITSn  ENTRIESn_hourly  \
0  R003  05-01-11  00:00:00   4388333  2911002              0.0   
1  R003  05-01-11  04:00:00   4388333  2911002              0.0   
2  R003  05-01-11  12:00:00   4388333  2911002              0.0   
3  R003  05-01-11  16:00:00   4388333  2911002              0.0   
4  R003  05-01-11  20:00:00   4388333  2911002              0.0   

   EXITSn_hourly             datetime  hour  day_week     ...       pressurei  \
0            0.0  2011-05-01 00:00:00     0         6     ...           30.22   
1            0.0  2011-05-01 04:00:00     4         6     ...           30.25   
2            0.0  2011-05-01 12:00:00    12         6     ...           30.28   
3            0.0  2011-05-01 16:00:00    16         6     ...           30.26   
4            0.0  2011-05-01 20:00:00    20         6     ...           30.28   

  rain  tempi  wspdi meanprecipi  meanpressurei  meantempi  meanwspdi  \
0    0   55.9    3.5         0.0         30.258      55.98       7.86   
1    0   52.0    3.5         0.0         30.258      55.98       7.86   
2    0   62.1    6.9         0.0         30.258      55.98       7.86   
3    0   57.9   15.0         0.0         30.258      55.98       7.86   
4    0   52.0   10.4         0.0         30.258      55.98       7.86   

   weather_lat  weather_lon  
0    40.700348   -73.887177  
1    40.700348   -73.887177  
2    40.700348   -73.887177  
3    40.700348   -73.887177  
4    40.700348   -73.887177  

[5 rows x 27 columns]
In [30]:
entries = subway_df['ENTRIESn_hourly']
cum_entries = subway_df['ENTRIESn']
rain = subway_df['meanprecipi']
temp = subway_df['meantempi']
In [31]:
print(entries.head(20))
0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
5      15.0
6      19.0
7     488.0
8     490.0
9     231.0
10    235.0
11     74.0
12     20.0
13    975.0
14    267.0
15    277.0
16     83.0
17     24.0
18    532.0
19    454.0
Name: ENTRIESn_hourly, dtype: float64
In [32]:
print(cum_entries.head())
0    4388333
1    4388333
2    4388333
3    4388333
4    4388333
Name: ENTRIESn, dtype: int64
In [33]:
print(rain.head(20))
0     0.00
1     0.00
2     0.00
3     0.00
4     0.00
5     0.00
6     0.00
7     0.00
8     0.00
9     0.00
10    0.00
11    0.00
12    0.00
13    0.00
14    0.00
15    0.00
16    0.01
17    0.01
18    0.01
19    0.01
Name: meanprecipi, dtype: float64
In [34]:
print(temp.head())
0    55.98
1    55.98
2    55.98
3    55.98
4    55.98
Name: meantempi, dtype: float64

Correlation function

In [35]:
def correlation(x,y):
    #step 1
    x = (x - x.mean())/x.std(ddof=0)
    y = (y - y.mean())/y.std(ddof = 0)
    #step 2
    r = (x*y).mean()
    return r
In [36]:
print(correlation(entries, rain))
print(correlation(entries, temp))
print(correlation(rain, temp))
print(correlation(entries, cum_entries))
0.03564851577223041
-0.026693348321569912
-0.22903432340833663
0.5858954707662182

Positive correlation example

In [37]:
plt.plot(cum_entries,entries)
Out[37]:
[<matplotlib.lines.Line2D at 0x11b22c860>]

Negative correlation example

In [38]:
plt.plot(temp,rain)
Out[38]:
[<matplotlib.lines.Line2D at 0x11bbc46d8>]

applymap( ) function

In [40]:
#Example 1
df = pd.DataFrame({
        'a': [1, 2, 3],
        'b': [10, 20, 30],
        'c': [5, 10, 15]
    })
    
def add_one(x):
    return x + 1
        
print(df.applymap(add_one))
   a   b   c
0  2  11   6
1  3  21  11
2  4  31  16
In [42]:
#Example 2
grades_df = pd.DataFrame(
    data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
          'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
    index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 
           'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)
    
def convert_grades(grades):
    '''
    Fill in this function to convert the given DataFrame of numerical
    grades to letter grades. Return a new DataFrame with the converted
    grade.
    
    The conversion rule is:
        90-100 -> A
        80-89  -> B
        70-79  -> C
        60-69  -> D
        0-59   -> F
    '''
    if grades >=90:
        grades = "A"
    elif grades >=80 and grades<90:
        grades = "B"
    elif grades >=70 and grades < 80:
        grades = "C"
    elif grades >=60 and grades <70:
        grades = "D"
    else:
        grades = "F"
    return grades
In [43]:
print(grades_df)
         exam1  exam2
Andre       43     24
Barry       81     63
Chris       78     56
Dan         75     56
Emilio      89     67
Fred        70     51
Greta       91     79
Humbert     65     46
Ivan        98     72
James       87     60
In [44]:
print(grades_df.applymap(convert_grades))
        exam1 exam2
Andre       F     F
Barry       B     D
Chris       C     F
Dan         C     F
Emilio      B     D
Fred        C     F
Greta       A     C
Humbert     D     F
Ivan        A     C
James       B     D

apply( ) function

In [45]:
df = pd.DataFrame({
    'a': [4, 5, 3, 1, 2],
    'b': [20, 10, 40, 50, 30],
    'c': [25, 20, 5, 15, 10]
})
In [46]:
print(df)
   a   b   c
0  4  20  25
1  5  10  20
2  3  40   5
3  1  50  15
4  2  30  10
In [51]:
print(df.apply(np.mean))
a     3.0
b    30.0
c    15.0
dtype: float64
In [52]:
print(df.apply(np.max))
a     5
b    50
c    25
dtype: int64
Now let's try to find the second max element in a dataframe using apply funtion.
In [92]:
def second_max(data):
    data_2 = data
    max_data_2 = max(data_2)
    data_2 = data_2.drop(pd.Index(data_2).get_loc(max_data_2))
    return max(data_2)
In [93]:
check =  df["a"]
print(check)
print(max(check))
pos = pd.Index(check).get_loc(max(check))
aa = check.drop(pos)
print(check)
print(aa)
0    4
1    5
2    3
3    1
4    2
Name: a, dtype: int64
5
0    4
1    5
2    3
3    1
4    2
Name: a, dtype: int64
0    4
2    3
3    1
4    2
Name: a, dtype: int64
In [94]:
print(df.apply(second_max))
a     4
b    40
c    20
dtype: int64

Adding a series to a dataframe

In [95]:
s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({
        0: [10, 20, 30, 40],
        1: [50, 60, 70, 80],
        2: [90, 100, 110, 120],
        3: [130, 140, 150, 160]
    })
In [96]:
print(df)
    0   1    2    3
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160
In [97]:
print(s)
0    1
1    2
2    3
3    4
dtype: int64
In [98]:
print(df+s)
    0   1    2    3
0  11  52   93  134
1  21  62  103  144
2  31  72  113  154
3  41  82  123  164
In [99]:
s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({0: [10], 1: [20], 2: [30], 3: [40]})
In [102]:
print(df)
print(s)
    0   1   2   3
0  10  20  30  40
0    1
1    2
2    3
3    4
dtype: int64
In [101]:
print(df+s)
    0   1   2   3
0  11  22  33  44
In [105]:
s = pd.Series([1, 2, 3, 4])
df = pd.DataFrame({0: [10, 20, 30, 40]})
print(df)
print(s)
    0
0  10
1  20
2  30
3  40
0    1
1    2
2    3
3    4
dtype: int64
In [104]:
print(df+s)
    0   1   2   3
0  11 NaN NaN NaN
1  21 NaN NaN NaN
2  31 NaN NaN NaN
3  41 NaN NaN NaN
In [106]:
s = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
df = pd.DataFrame({
    'a': [10, 20, 30, 40],
    'b': [50, 60, 70, 80],
    'c': [90, 100, 110, 120],
    'd': [130, 140, 150, 160]
})
print(df)
print(s)
    a   b    c    d
0  10  50   90  130
1  20  60  100  140
2  30  70  110  150
3  40  80  120  160
a    1
b    2
c    3
d    4
dtype: int64
In [107]:
print(df+s)
    a   b    c    d
0  11  52   93  134
1  21  62  103  144
2  31  72  113  154
3  41  82  123  164

groupby( ) function

In [108]:
values = np.array([1, 3, 2, 4, 1, 6, 4])
example_df = pd.DataFrame({
    'value': values,
    'even': values % 2 == 0,
    'above_three': values > 3 
}, index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
In [109]:
print(example_df)
  above_three   even  value
a       False  False      1
b       False  False      3
c       False   True      2
d        True   True      4
e       False  False      1
f        True   True      6
g        True   True      4
In [119]:
print(example_df.groupby("even"))
print(example_df.groupby("even").sum())
<pandas.core.groupby.DataFrameGroupBy object at 0x11bd642e8>
       above_three  value
even                     
False          0.0      5
True           3.0     16
In [116]:
print(example_df.groupby("even").groups)
{False: ['a', 'b', 'e'], True: ['c', 'd', 'f', 'g']}
In [117]:
# groupby multiple columns 

grouped_data = example_df.groupby(['even', 'above_three'])
print(grouped_data.groups)
{(True, False): ['c'], (False, False): ['a', 'b', 'e'], (True, True): ['d', 'f', 'g']}

Merging multiple pandas Dataframes

In [131]:
subway_df = pd.DataFrame({
    'UNIT': ['R003', 'R003', 'R003', 'R003', 'R003', 'R004', 'R004', 'R004',
             'R004', 'R004'],
    'DATEn': ['05-01-11', '05-02-11', '05-03-11', '05-04-11', '05-05-11',
              '05-01-11', '05-02-11', '05-03-11', '05-04-11', '05-05-11'],
    'hour': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'ENTRIESn': [ 4388333,  4388348,  4389885,  4391507,  4393043, 14656120,
                 14656174, 14660126, 14664247, 14668301],
    'EXITSn': [ 2911002,  2911036,  2912127,  2913223,  2914284, 14451774,
               14451851, 14454734, 14457780, 14460818],
    'latitude': [ 40.689945,  40.689945,  40.689945,  40.689945,  40.689945,
                  40.69132 ,  40.69132 ,  40.69132 ,  40.69132 ,  40.69132 ],
    'longitude': [-73.872564, -73.872564, -73.872564, -73.872564, -73.872564,
                  -73.867135, -73.867135, -73.867135, -73.867135, -73.867135]
})

weather_df = pd.DataFrame({
    'DATEn': ['05-01-11', '05-01-11', '05-02-11', '05-02-11', '05-03-11',
              '05-03-11', '05-04-11', '05-04-11', '05-05-11', '05-05-11'],
    'hour': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'latitude': [ 40.689945,  40.69132 ,  40.689945,  40.69132 ,  40.689945,
                  40.69132 ,  40.689945,  40.69132 ,  40.689945,  40.69132 ],
    'longitude': [-73.872564, -73.867135, -73.872564, -73.867135, -73.872564,
                  -73.867135, -73.872564, -73.867135, -73.872564, -73.867135],
    'pressurei': [ 30.24,  30.24,  30.32,  30.32,  30.14,  30.14,  29.98,  29.98,
                   30.01,  30.01],
    'fog': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'rain': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    'tempi': [ 52. ,  52. ,  48.9,  48.9,  54. ,  54. ,  57.2,  57.2,  48.9,  48.9],
    'wspdi': [  8.1,   8.1,   6.9,   6.9,   3.5,   3.5,  15. ,  15. ,  15. ,  15. ]
})
In [132]:
subway_df
Out[132]:
DATEn ENTRIESn EXITSn UNIT hour latitude longitude
0 05-01-11 4388333 2911002 R003 0 40.689945 -73.872564
1 05-02-11 4388348 2911036 R003 0 40.689945 -73.872564
2 05-03-11 4389885 2912127 R003 0 40.689945 -73.872564
3 05-04-11 4391507 2913223 R003 0 40.689945 -73.872564
4 05-05-11 4393043 2914284 R003 0 40.689945 -73.872564
5 05-01-11 14656120 14451774 R004 0 40.691320 -73.867135
6 05-02-11 14656174 14451851 R004 0 40.691320 -73.867135
7 05-03-11 14660126 14454734 R004 0 40.691320 -73.867135
8 05-04-11 14664247 14457780 R004 0 40.691320 -73.867135
9 05-05-11 14668301 14460818 R004 0 40.691320 -73.867135
In [133]:
weather_df
Out[133]:
DATEn fog hour latitude longitude pressurei rain tempi wspdi
0 05-01-11 0 0 40.689945 -73.872564 30.24 0 52.0 8.1
1 05-01-11 0 0 40.691320 -73.867135 30.24 0 52.0 8.1
2 05-02-11 0 0 40.689945 -73.872564 30.32 0 48.9 6.9
3 05-02-11 0 0 40.691320 -73.867135 30.32 0 48.9 6.9
4 05-03-11 0 0 40.689945 -73.872564 30.14 0 54.0 3.5
5 05-03-11 0 0 40.691320 -73.867135 30.14 0 54.0 3.5
6 05-04-11 0 0 40.689945 -73.872564 29.98 0 57.2 15.0
7 05-04-11 0 0 40.691320 -73.867135 29.98 0 57.2 15.0
8 05-05-11 0 0 40.689945 -73.872564 30.01 0 48.9 15.0
9 05-05-11 0 0 40.691320 -73.867135 30.01 0 48.9 15.0
In [134]:
subway_df.merge(weather_df, on = ["DATEn","hour","latitude","longitude"],how = "inner")
Out[134]:
DATEn ENTRIESn EXITSn UNIT hour latitude longitude fog pressurei rain tempi wspdi
0 05-01-11 4388333 2911002 R003 0 40.689945 -73.872564 0 30.24 0 52.0 8.1
1 05-02-11 4388348 2911036 R003 0 40.689945 -73.872564 0 30.32 0 48.9 6.9
2 05-03-11 4389885 2912127 R003 0 40.689945 -73.872564 0 30.14 0 54.0 3.5
3 05-04-11 4391507 2913223 R003 0 40.689945 -73.872564 0 29.98 0 57.2 15.0
4 05-05-11 4393043 2914284 R003 0 40.689945 -73.872564 0 30.01 0 48.9 15.0
5 05-01-11 14656120 14451774 R004 0 40.691320 -73.867135 0 30.24 0 52.0 8.1
6 05-02-11 14656174 14451851 R004 0 40.691320 -73.867135 0 30.32 0 48.9 6.9
7 05-03-11 14660126 14454734 R004 0 40.691320 -73.867135 0 30.14 0 54.0 3.5
8 05-04-11 14664247 14457780 R004 0 40.691320 -73.867135 0 29.98 0 57.2 15.0
9 05-05-11 14668301 14460818 R004 0 40.691320 -73.867135 0 30.01 0 48.9 15.0

Plotting

In [136]:
subway_df = pd.read_csv("nyc_subway_weather.csv")
subway_df.head()
Out[136]:
UNIT DATEn TIMEn ENTRIESn EXITSn ENTRIESn_hourly EXITSn_hourly datetime hour day_week ... pressurei rain tempi wspdi meanprecipi meanpressurei meantempi meanwspdi weather_lat weather_lon
0 R003 05-01-11 00:00:00 4388333 2911002 0.0 0.0 2011-05-01 00:00:00 0 6 ... 30.22 0 55.9 3.5 0.0 30.258 55.98 7.86 40.700348 -73.887177
1 R003 05-01-11 04:00:00 4388333 2911002 0.0 0.0 2011-05-01 04:00:00 4 6 ... 30.25 0 52.0 3.5 0.0 30.258 55.98 7.86 40.700348 -73.887177
2 R003 05-01-11 12:00:00 4388333 2911002 0.0 0.0 2011-05-01 12:00:00 12 6 ... 30.28 0 62.1 6.9 0.0 30.258 55.98 7.86 40.700348 -73.887177
3 R003 05-01-11 16:00:00 4388333 2911002 0.0 0.0 2011-05-01 16:00:00 16 6 ... 30.26 0 57.9 15.0 0.0 30.258 55.98 7.86 40.700348 -73.887177
4 R003 05-01-11 20:00:00 4388333 2911002 0.0 0.0 2011-05-01 20:00:00 20 6 ... 30.28 0 52.0 10.4 0.0 30.258 55.98 7.86 40.700348 -73.887177
5 rows × 27 columns
In [139]:
data_by_location = subway_df.groupby(["latitude","longitude"],as_index=False).mean()
In [140]:
data_by_location.head()
Out[140]:
latitude longitude ENTRIESn EXITSn ENTRIESn_hourly EXITSn_hourly hour day_week weekday fog ... pressurei rain tempi wspdi meanprecipi meanpressurei meantempi meanwspdi weather_lat weather_lon
0 40.576152 -73.975925 9.659049e+06 8.641132e+06 403.896175 325.956284 10.032787 2.907104 0.715847 0.010929 ... 29.972568 0.229508 63.383607 5.553005 0.006284 29.972568 63.383607 5.553005 40.603489 -73.958763
1 40.576298 -73.968523 8.306897e+06 6.646823e+06 526.697297 419.562162 9.989189 2.951351 0.708108 0.010811 ... 29.973297 0.227027 63.375135 5.517838 0.006216 29.973297 63.375135 5.517838 40.603489 -73.958763
2 40.577961 -73.961806 4.552910e+07 4.612408e+07 1950.295699 1930.483871 10.000000 2.935484 0.709677 0.010753 ... 29.973118 0.225806 63.394086 5.531720 0.006183 29.973118 63.394086 5.531720 40.603489 -73.958763
3 40.589547 -73.974295 7.268214e+06 7.961334e+06 485.382353 362.941176 10.164706 2.905882 0.705882 0.011765 ... 29.971176 0.200000 63.650588 5.630588 0.006118 29.971176 63.650588 5.630588 40.603489 -73.958763
4 40.590867 -73.797011 6.477945e+06 5.994957e+06 500.725610 374.628049 10.097561 2.951220 0.719512 0.024390 ... 29.981098 0.195122 61.721341 9.945122 0.002744 29.981098 61.721341 9.945122 40.660004 -73.844849
5 rows × 21 columns
In [142]:
plt.scatter(data_by_location["latitude"],data_by_location["longitude"], s=data_by_location["ENTRIESn_hourly"])
Out[142]:
<matplotlib.collections.PathCollection at 0x12410de48>
In [145]:
scaled_entries = (data_by_location["ENTRIESn_hourly"]/data_by_location["ENTRIESn_hourly"].std())
In [146]:
plt.scatter(data_by_location["latitude"],data_by_location["longitude"], s=scaled_entries)
Out[146]:
<matplotlib.collections.PathCollection at 0x129498898>

No comments :

Post a Comment