Monday, January 30, 2017

Data Visualisation : Women Bachelors degree dataset



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Data

This dataset contains the details of the percentage of women who has bachelors degree in the year from 1970 to 2012
In [2]:
women_degrees = pd.read_csv("percent-bachelors-degrees-women-usa.csv")
In [3]:
print(women_degrees.head())
   Year  Agriculture  Architecture  Art and Performance    Biology   Business  \
0  1970     4.229798     11.921005                 59.7  29.088363   9.064439   
1  1971     5.452797     12.003106                 59.9  29.394403   9.503187   
2  1972     7.420710     13.214594                 60.4  29.810221  10.558962   
3  1973     9.653602     14.791613                 60.2  31.147915  12.804602   
4  1974    14.074623     17.444688                 61.9  32.996183  16.204850   

   Communications and Journalism  Computer Science  Education  Engineering  \
0                           35.3              13.6  74.535328          0.8   
1                           35.5              13.6  74.149204          1.0   
2                           36.6              14.9  73.554520          1.2   
3                           38.4              16.4  73.501814          1.6   
4                           40.5              18.9  73.336811          2.2   

     English  Foreign Languages  Health Professions  Math and Statistics  \
0  65.570923               73.8                77.1                 38.0   
1  64.556485               73.9                75.5                 39.0   
2  63.664263               74.6                76.9                 40.2   
3  62.941502               74.9                77.4                 40.9   
4  62.413412               75.3                77.9                 41.8   

   Physical Sciences  Psychology  Public Administration  \
0               13.8        44.4                   68.4   
1               14.9        46.2                   65.5   
2               14.8        47.6                   62.6   
3               16.5        50.4                   64.3   
4               18.2        52.6                   66.1   

   Social Sciences and History  
0                         36.8  
1                         36.2  
2                         36.1  
3                         36.4  
4                         37.3  
In [4]:
women_degrees.shape
Out[4]:
(42, 18)

Plotting : Biology degree awareded to women

In [5]:
plt.plot(women_degrees["Year"],women_degrees["Biology"])
Out[5]:
[<matplotlib.lines.Line2D at 0x11bfe7978>]

Plotting : Men vs Women in Biology degree

In [6]:
plt.plot(women_degrees["Year"], 100 - women_degrees["Biology"], c='red', label ='Men')
plt.plot(women_degrees["Year"], women_degrees["Biology"], c='blue', label ='Women')
plt.legend(loc = "upper right")
plt.title('Percentage of Biology Degrees Awarded By Gender')
Out[6]:
<matplotlib.text.Text at 0x11c788438>

Multiple plots

In [7]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
ax = [ax1,ax2,ax3,ax4]


for i in range(len(major_cats)):
    ax[i].plot(women_degrees["Year"],women_degrees[major_cats[i]], c ="red", label = "women" )
    ax[i].plot(women_degrees["Year"],100-women_degrees[major_cats[i]], c ="blue", label = "men" )
    ax[i].set_title(major_cats[i])
    ax[i].set_ylim(0,100)
plt.legend()
Out[7]:
<matplotlib.legend.Legend at 0x11ca93dd8>

Selecting an appropriate color for color blinded people:

In [8]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
ax = [ax1,ax2,ax3,ax4]
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255,14/255)

for i in range(len(major_cats)):
    ax[i].plot(women_degrees["Year"],women_degrees[major_cats[i]], c =cb_dark_blue, label = "women" )
    ax[i].plot(women_degrees["Year"],100-women_degrees[major_cats[i]], c =cb_orange, label = "men" )
    ax[i].set_title(major_cats[i])
    ax[i].set_ylim(0,100)
plt.legend()
Out[8]:
<matplotlib.legend.Legend at 0x11dba1278>

Adding Line width

In [9]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
ax = [ax1,ax2,ax3,ax4]
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255,14/255)

for i in range(len(major_cats)):
    ax[i].plot(women_degrees["Year"],women_degrees[major_cats[i]], c =cb_dark_blue, label = "women", linewidth = 3 )
    ax[i].plot(women_degrees["Year"],100-women_degrees[major_cats[i]], c =cb_orange, label = "men", linewidth = 3 )
    ax[i].set_title(major_cats[i])
    ax[i].set_ylim(0,100)
plt.legend()
Out[9]:
<matplotlib.legend.Legend at 0x11e602828>
In [10]:
fig = plt.figure(figsize=(18,3))
ax1 = fig.add_subplot(1,6,1)
ax2 = fig.add_subplot(1,6,2)
ax3 = fig.add_subplot(1,6,3)
ax4 = fig.add_subplot(1,6,4)
ax5 = fig.add_subplot(1,6,5)
ax6 = fig.add_subplot(1,6,6)
major_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
ax = [ax1,ax2,ax3,ax4, ax5,ax6]
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255,14/255)

for i in range(len(major_cats)):
    ax[i].plot(women_degrees["Year"],women_degrees[major_cats[i]], c =cb_dark_blue, label = "women", linewidth = 3 )
    ax[i].plot(women_degrees["Year"],100-women_degrees[major_cats[i]], c =cb_orange, label = "men", linewidth = 3 )
    ax[i].set_title(major_cats[i])
    ax[i].set_xlim(1968, 2011)
    ax[i].set_ylim(0,100)
plt.legend()
Out[10]:
<matplotlib.legend.Legend at 0x11ed982b0>

Replacing the legend with annotations

In [11]:
fig = plt.figure(figsize=(18,3))
ax1 = fig.add_subplot(1,6,1)
ax2 = fig.add_subplot(1,6,2)
ax3 = fig.add_subplot(1,6,3)
ax4 = fig.add_subplot(1,6,4)
ax5 = fig.add_subplot(1,6,5)
ax6 = fig.add_subplot(1,6,6)
major_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
ax = [ax1,ax2,ax3,ax4, ax5,ax6]
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255,14/255)

for i in range(len(major_cats)):
    ax[i].plot(women_degrees["Year"],women_degrees[major_cats[i]], c =cb_dark_blue, label = "women", linewidth = 3 )
    ax[i].plot(women_degrees["Year"],100-women_degrees[major_cats[i]], c =cb_orange, label = "men", linewidth = 3 )
    ax[i].set_title(major_cats[i])
    ax[i].set_xlim(1968, 2011)
    ax[i].set_ylim(0,100)
    if i ==0:
        ax[i].text(2003,87,'Men')
        ax[i].text(2000,8,'Women')
    if i == 5:
        ax[i].text(2003,62,'Men')
        ax[i].text(2000,35,'Women')

No comments :

Post a Comment