Monday, January 30, 2017

Data Visualisation - Histogram and Boxplots



In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Data

In [2]:
reviews = pd.read_csv("fandango_score_comparison.csv")
In [4]:
reviews.head()
Out[4]:
FILM RottenTomatoes RottenTomatoes_User Metacritic Metacritic_User IMDB Fandango_Stars Fandango_Ratingvalue RT_norm RT_user_norm ... IMDB_norm RT_norm_round RT_user_norm_round Metacritic_norm_round Metacritic_user_norm_round IMDB_norm_round Metacritic_user_vote_count IMDB_user_vote_count Fandango_votes Fandango_Difference
0 Avengers: Age of Ultron (2015) 74 86 66 7.1 7.8 5.0 4.5 3.70 4.3 ... 3.90 3.5 4.5 3.5 3.5 4.0 1330 271107 14846 0.5
1 Cinderella (2015) 85 80 67 7.5 7.1 5.0 4.5 4.25 4.0 ... 3.55 4.5 4.0 3.5 4.0 3.5 249 65709 12640 0.5
2 Ant-Man (2015) 80 90 64 8.1 7.8 5.0 4.5 4.00 4.5 ... 3.90 4.0 4.5 3.0 4.0 4.0 627 103660 12055 0.5
3 Do You Believe? (2015) 18 84 22 4.7 5.4 5.0 4.5 0.90 4.2 ... 2.70 1.0 4.0 1.0 2.5 2.5 31 3136 1793 0.5
4 Hot Tub Time Machine 2 (2015) 14 28 29 3.4 5.1 3.5 3.0 0.70 1.4 ... 2.55 0.5 1.5 1.5 1.5 2.5 88 19560 1021 0.5
5 rows × 22 columns
In [7]:
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
norm_reviews.head()
Out[7]:
FILM RT_user_norm Metacritic_user_nom IMDB_norm Fandango_Ratingvalue
0 Avengers: Age of Ultron (2015) 4.3 3.55 3.90 4.5
1 Cinderella (2015) 4.0 3.75 3.55 4.5
2 Ant-Man (2015) 4.5 4.05 3.90 4.5
3 Do You Believe? (2015) 4.2 2.35 2.70 4.5
4 Hot Tub Time Machine 2 (2015) 1.4 1.70 2.55 3.0

Frequency counts

In [14]:
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts().sort_index()
imdb_distribution = norm_reviews['IMDB_norm'].value_counts().sort_index()
print(fandango_distribution)
print(imdb_distribution)
2.7     2
2.8     2
2.9     5
3.0     4
3.1     3
3.2     5
3.3     4
3.4     9
3.5     9
3.6     8
3.7     9
3.8     5
3.9    12
4.0     7
4.1    16
4.2    12
4.3    11
4.4     7
4.5     9
4.6     4
4.8     3
Name: Fandango_Ratingvalue, dtype: int64
2.00     1
2.10     1
2.15     1
2.20     1
2.30     2
2.45     2
2.50     1
2.55     1
2.60     2
2.70     4
2.75     5
2.80     2
2.85     1
2.90     1
2.95     3
3.00     2
3.05     4
3.10     1
3.15     9
3.20     6
3.25     4
3.30     9
3.35     7
3.40     1
3.45     7
3.50     4
3.55     7
3.60    10
3.65     5
3.70     8
3.75     6
3.80     3
3.85     4
3.90     9
3.95     2
4.00     1
4.05     1
4.10     4
4.15     1
4.20     2
4.30     1
Name: IMDB_norm, dtype: int64

Histogram

In [18]:
fig, ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(0, 5))
plt.show()
In [19]:
fig, ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(0, 5), bins = 20)
plt.show()
In [25]:
fig = plt.figure(figsize=(15,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)


ax1.hist(norm_reviews["Fandango_Ratingvalue"], range = (0,5) , bins = 20)
ax1.set_ylim(0,50)
ax1.set_title("Distribution of Fandango Ratings")


ax2.hist(norm_reviews["RT_user_norm"], range = (0,5) , bins = 20)
ax2.set_ylim(0,50)
ax2.set_title("Distribution of Rotten Tomatoes Ratings")

ax3.hist(norm_reviews["Metacritic_user_nom"], range = (0,5) , bins = 20)
ax3.set_ylim(0,50)
ax3.set_title("Distribution of Metacritic Ratings")

ax4.hist(norm_reviews["IMDB_norm"], range = (0,5) , bins = 20)
ax4.set_ylim(0,50)
ax4.set_title("Distribution of IMDB Ratings")
Out[25]:
<matplotlib.text.Text at 0x122a497f0>

Boxplot

In [26]:
fig,ax = plt.subplots()

ax.boxplot(norm_reviews["RT_user_norm"])
ax.set_ylim(0,5)
ax.set_xticklabels(["Rotten Tomatoes"])
Out[26]:
[<matplotlib.text.Text at 0x122aa8550>]

Multiple Boxplots

In [29]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots(figsize = (10,8))
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()

No comments :

Post a Comment