Data¶
In [2]:
reviews = pd.read_csv("fandango_score_comparison.csv")
In [3]:
reviews.head()
Out[3]:
In [4]:
reviews.columns
Out[4]:
In [12]:
[x for x in reviews.columns if "norm" in x]
Out[12]:
In [13]:
columns = ["FILM","RT_user_norm","Metacritic_user_nom","IMDB_norm","Fandango_Ratingvalue","Fandango_Stars"]
norm_reviews = reviews[columns]
In [14]:
norm_reviews.head()
Out[14]:
In [31]:
# Positions of the left sides of the bars. [0.75, 1.75, 2.75, 3.75, 4.75]
fig, ax = plt.subplots()
bar_positions = np.arange(5) + 0.75
# Heights of the bars. In our case, the average rating for the first movie in the dataset.
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.loc[0, num_cols].values
ax.bar(bar_positions, bar_heights)
Out[31]:
In [22]:
plt.bar(bar_positions,bar_heights)
Out[22]:
In [28]:
fig = plt.figure()
ax = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax.bar(bar_positions, bar_heights)
ax2.bar(bar_positions, bar_heights)
Out[28]:
Axis ticks ax.set_xticks( ) , ax.set_xticklabels( )¶
In [39]:
# Positions of the left sides of the bars. [0.75, 1.75, 2.75, 3.75, 4.75]
fig, ax = plt.subplots()
bar_positions = np.arange(5) + 0.75
# Heights of the bars. In our case, the average rating for the first movie in the dataset.
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.loc[0, num_cols].values
ax.bar(bar_positions, bar_heights)
tick_positions = np.arange(1,6)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation = 90)
Out[39]:
Full Example¶
In [40]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
Horizontal bar plot¶
In [47]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_heights, 0.5)
ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()
In [49]:
fig,ax = plt.subplots()
ax.scatter(norm_reviews["Fandango_Ratingvalue"],norm_reviews["RT_user_norm"])
ax.set_xlabel("Fandango")
ax.set_ylabel("Rotten Tomatoes")
Out[49]:
In [50]:
fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.scatter(norm_reviews["Fandango_Ratingvalue"],norm_reviews["RT_user_norm"])
ax1.set_xlabel("Fandango")
ax1.set_ylabel("Rotten Tomatoes")
ax2.scatter(norm_reviews["RT_user_norm"],norm_reviews["Fandango_Ratingvalue"])
ax1.set_ylabel("Fandango")
ax1.set_xlabel("Rotten Tomatoes")
Out[50]:
Benchmarking correlation¶
In [51]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(3,1,1)
ax2 = fig.add_subplot(3,1,2)
ax3 = fig.add_subplot(3,1,3)
ax1.scatter(norm_reviews["Fandango_Ratingvalue"], norm_reviews["RT_user_norm"])
ax1.set_xlabel("Fandango")
ax1.set_ylabel("Rotten Tomatoes")
ax1.set_xlim(0,5)
ax1.set_ylim(0,5)
ax2.scatter(norm_reviews["Fandango_Ratingvalue"], norm_reviews["Metacritic_user_nom"])
ax2.set_xlabel("Fandango")
ax2.set_ylabel("Metacritic")
ax2.set_xlim(0,5)
ax2.set_ylim(0,5)
ax3.scatter(norm_reviews["Fandango_Ratingvalue"], norm_reviews["IMDB_norm"])
ax3.set_xlabel("Fandango")
ax3.set_ylabel("IMDB")
ax3.set_xlim(0,5)
ax3.set_ylim(0,5)
Out[51]:
No comments :
Post a Comment