In [2]:
all_ages = pd.read_csv("all-ages.csv")
In [3]:
recent_grads = pd.read_csv("recent-grads.csv")
In [4]:
all_ages.head()
Out[4]:
In [5]:
all_ages.describe()
Out[5]:
In [6]:
recent_grads.head()
Out[6]:
In [7]:
recent_grads.describe()
Out[7]:
Summarizing eacy categories¶
In [8]:
aa_cat_counts = dict()
rg_cat_counts = dict()
all_cat = all_ages["Major_category"].unique()
all_rec = recent_grads["Major_category"].unique()
In [9]:
all_cat
Out[9]:
In [10]:
all_rec
Out[10]:
In [11]:
for i in range(1,len(recent_grads)):
if recent_grads.loc[i,"Major_category"] in rg_cat_counts:
val = rg_cat_counts[recent_grads.loc[i,"Major_category"]]
rg_cat_counts[recent_grads.loc[i,"Major_category"]] = recent_grads.loc[i,"Total"] +val
else:
rg_cat_counts[recent_grads.loc[i,"Major_category"]] = recent_grads.loc[i,"Total"]
for i in range(1,len(all_ages)):
if all_ages.loc[i,"Major_category"] in aa_cat_counts:
val = aa_cat_counts[all_ages.loc[i,"Major_category"]]
aa_cat_counts[all_ages.loc[i,"Major_category"]] = all_ages.loc[i,"Total"] +val
else:
aa_cat_counts[all_ages.loc[i,"Major_category"]] = all_ages.loc[i,"Total"]
In [12]:
aa_cat_counts
Out[12]:
In [13]:
rg_cat_counts
Out[13]:
In [14]:
# Unique values in Major_category column.
print(all_ages['Major_category'].unique())
aa_cat_counts = dict()
rg_cat_counts = dict()
def calculate_major_cat_totals(df):
cats = df['Major_category'].unique()
counts_dictionary = dict()
for c in cats:
major_df = df[df["Major_category"] == c]
total = major_df["Total"].sum()
counts_dictionary[c] = total
return counts_dictionary
aa_cat_counts = calculate_major_cat_totals(all_ages)
rg_cat_counts = calculate_major_cat_totals(recent_grads)
In [15]:
aa_cat_counts
Out[15]:
In [16]:
rg_cat_counts
Out[16]:
In [25]:
# All majors, common to both DataFrames
majors = recent_grads['Major'].unique()
rg_lower_count = 0
for m in majors:
recent = recent_grads[recent_grads["Major"] == m]
all_a = all_ages[all_ages["Major"] == m]
if recent.iloc[0]["Unemployment_rate"] < all_a.iloc[0]["Unemployment_rate"]:
rg_lower_count += 1
In [26]:
recent.iloc[0]["Unemployment_rate"]
Out[26]:
In [33]:
# All majors, common to both DataFrames
majors = recent_grads['Major'].unique()
print(len(majors))
rg_lower_count = 0
count = 0
for m in majors:
print(count)
count +=1
recent_grads_row = recent_grads[recent_grads['Major'] == m]
all_ages_row = all_ages[all_ages['Major'] == m]
rg_unemp_rate = recent_grads_row.iloc[0]['Unemployment_rate']
aa_unemp_rate = all_ages_row.iloc[0]['Unemployment_rate']
if rg_unemp_rate < aa_unemp_rate:
rg_lower_count += 1
print(rg_lower_count)
In [31]:
recent_grads_row.iloc[0]['Unemployment_rate']
Out[31]:
In [32]:
recent_grads_row
Out[32]:
No comments :
Post a Comment