Sunday, January 29, 2017

Data Visualisation - Bars and Scatter Plots



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Data

In [2]:
reviews = pd.read_csv("fandango_score_comparison.csv")
In [3]:
reviews.head()
Out[3]:
FILM RottenTomatoes RottenTomatoes_User Metacritic Metacritic_User IMDB Fandango_Stars Fandango_Ratingvalue RT_norm RT_user_norm ... IMDB_norm RT_norm_round RT_user_norm_round Metacritic_norm_round Metacritic_user_norm_round IMDB_norm_round Metacritic_user_vote_count IMDB_user_vote_count Fandango_votes Fandango_Difference
0 Avengers: Age of Ultron (2015) 74 86 66 7.1 7.8 5.0 4.5 3.70 4.3 ... 3.90 3.5 4.5 3.5 3.5 4.0 1330 271107 14846 0.5
1 Cinderella (2015) 85 80 67 7.5 7.1 5.0 4.5 4.25 4.0 ... 3.55 4.5 4.0 3.5 4.0 3.5 249 65709 12640 0.5
2 Ant-Man (2015) 80 90 64 8.1 7.8 5.0 4.5 4.00 4.5 ... 3.90 4.0 4.5 3.0 4.0 4.0 627 103660 12055 0.5
3 Do You Believe? (2015) 18 84 22 4.7 5.4 5.0 4.5 0.90 4.2 ... 2.70 1.0 4.0 1.0 2.5 2.5 31 3136 1793 0.5
4 Hot Tub Time Machine 2 (2015) 14 28 29 3.4 5.1 3.5 3.0 0.70 1.4 ... 2.55 0.5 1.5 1.5 1.5 2.5 88 19560 1021 0.5
5 rows × 22 columns
In [4]:
reviews.columns
Out[4]:
Index(['FILM', 'RottenTomatoes', 'RottenTomatoes_User', 'Metacritic',
       'Metacritic_User', 'IMDB', 'Fandango_Stars', 'Fandango_Ratingvalue',
       'RT_norm', 'RT_user_norm', 'Metacritic_norm', 'Metacritic_user_nom',
       'IMDB_norm', 'RT_norm_round', 'RT_user_norm_round',
       'Metacritic_norm_round', 'Metacritic_user_norm_round',
       'IMDB_norm_round', 'Metacritic_user_vote_count', 'IMDB_user_vote_count',
       'Fandango_votes', 'Fandango_Difference'],
      dtype='object')
In [12]:
[x for x in reviews.columns if "norm" in x]
Out[12]:
['RT_norm',
 'RT_user_norm',
 'Metacritic_norm',
 'IMDB_norm',
 'RT_norm_round',
 'RT_user_norm_round',
 'Metacritic_norm_round',
 'Metacritic_user_norm_round',
 'IMDB_norm_round']
In [13]:
columns = ["FILM","RT_user_norm","Metacritic_user_nom","IMDB_norm","Fandango_Ratingvalue","Fandango_Stars"]

norm_reviews = reviews[columns]
In [14]:
norm_reviews.head()
Out[14]:
FILM RT_user_norm Metacritic_user_nom IMDB_norm Fandango_Ratingvalue Fandango_Stars
0 Avengers: Age of Ultron (2015) 4.3 3.55 3.90 4.5 5.0
1 Cinderella (2015) 4.0 3.75 3.55 4.5 5.0
2 Ant-Man (2015) 4.5 4.05 3.90 4.5 5.0
3 Do You Believe? (2015) 4.2 2.35 2.70 4.5 5.0
4 Hot Tub Time Machine 2 (2015) 1.4 1.70 2.55 3.0 3.5
In [31]:
# Positions of the left sides of the bars. [0.75, 1.75, 2.75, 3.75, 4.75]

fig, ax = plt.subplots()
bar_positions = np.arange(5) + 0.75

# Heights of the bars.  In our case, the average rating for the first movie in the dataset.
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.loc[0, num_cols].values

ax.bar(bar_positions, bar_heights)
Out[31]:
<Container object of 5 artists>
In [22]:
plt.bar(bar_positions,bar_heights)
Out[22]:
<Container object of 5 artists>
In [28]:
fig = plt.figure()
ax = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax.bar(bar_positions, bar_heights)
ax2.bar(bar_positions, bar_heights)
Out[28]:
<Container object of 5 artists>

Axis ticks ax.set_xticks( ) , ax.set_xticklabels( )

In [39]:
# Positions of the left sides of the bars. [0.75, 1.75, 2.75, 3.75, 4.75]

fig, ax = plt.subplots()
bar_positions = np.arange(5) + 0.75

# Heights of the bars.  In our case, the average rating for the first movie in the dataset.
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.loc[0, num_cols].values

ax.bar(bar_positions, bar_heights)
tick_positions = np.arange(1,6)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation = 90)
Out[39]:
[<matplotlib.text.Text at 0x11f330c50>,
 <matplotlib.text.Text at 0x11f30a8d0>,
 <matplotlib.text.Text at 0x11f4e9518>,
 <matplotlib.text.Text at 0x11f4ed048>,
 <matplotlib.text.Text at 0x11f4edb38>]

Full Example

In [40]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()

ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation=90)

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

Horizontal bar plot

In [47]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()

ax.barh(bar_positions, bar_heights, 0.5)
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)

ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
In [49]:
fig,ax = plt.subplots()
ax.scatter(norm_reviews["Fandango_Ratingvalue"],norm_reviews["RT_user_norm"])
ax.set_xlabel("Fandango")
ax.set_ylabel("Rotten Tomatoes")
Out[49]:
<matplotlib.text.Text at 0x12083e8d0>
In [50]:
fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)


ax1.scatter(norm_reviews["Fandango_Ratingvalue"],norm_reviews["RT_user_norm"])
ax1.set_xlabel("Fandango")
ax1.set_ylabel("Rotten Tomatoes")



ax2.scatter(norm_reviews["RT_user_norm"],norm_reviews["Fandango_Ratingvalue"])
ax1.set_ylabel("Fandango")
ax1.set_xlabel("Rotten Tomatoes")
Out[50]:
<matplotlib.text.Text at 0x1205ffc88>

Benchmarking correlation

In [51]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(3,1,1)
ax2 = fig.add_subplot(3,1,2)
ax3 = fig.add_subplot(3,1,3)


ax1.scatter(norm_reviews["Fandango_Ratingvalue"], norm_reviews["RT_user_norm"])
ax1.set_xlabel("Fandango")
ax1.set_ylabel("Rotten Tomatoes")
ax1.set_xlim(0,5)
ax1.set_ylim(0,5)


ax2.scatter(norm_reviews["Fandango_Ratingvalue"], norm_reviews["Metacritic_user_nom"])
ax2.set_xlabel("Fandango")
ax2.set_ylabel("Metacritic")
ax2.set_xlim(0,5)
ax2.set_ylim(0,5)



ax3.scatter(norm_reviews["Fandango_Ratingvalue"], norm_reviews["IMDB_norm"])
ax3.set_xlabel("Fandango")
ax3.set_ylabel("IMDB")
ax3.set_xlim(0,5)
ax3.set_ylim(0,5)
Out[51]:
(0, 5)

No comments :

Post a Comment