%reload_ext pretty_jupyter
%%jinja markdown
<style>
div.main-container {width:98%;max-width:100%;}
.btn {display:none;}
.col-md-3 {width:20%;}
.col-md-9 {width:80%;}
h2 {margin-top:30px;}
.tocify-extend-page {height: 50px !important;}
</style>
%%html
<head>
<link rel="icon" type="image/x-icon" href="favicon.ico">
</head>
from mpl_toolkits.axes_grid1 import ImageGrid
from itables import init_notebook_mode, show
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import datetime
import warnings
import os
warnings.simplefilter(action='ignore', category=FutureWarning)
init_notebook_mode(all_interactive=False)
pd.set_option('display.max_columns', 50)
files = sorted(["covers/"+str(file) for file in os.listdir("covers") if ".webp" in file])
images = [plt.imread(images) for images in files]
fig = plt.figure(figsize=(16,8))
grid = ImageGrid(fig, 111,nrows_ncols=(7,9),axes_pad=0.1)
for ax, im in zip(grid, images):
ax.imshow(im)
plt.show()
The Schedule
order = ['RL','WS','MT','TL','JP','RW','MJ','SF','RS']
album_count = len(images) - 1
current_turn = (album_count % 9)
order = order[current_turn:] + order[:current_turn]
started = datetime.datetime(2024, 2, 5).date()
time_elapsed = datetime.timedelta(weeks=album_count+5)
one_week = datetime.timedelta(weeks=1)
current_week = started + time_elapsed
schedule = pd.DataFrame({'From':[order[0],order[1],order[2],order[3]],
'Album #':[album_count,album_count+1,album_count+2,album_count+3],
'Week Of':[current_week,current_week+one_week,current_week+(2*one_week),current_week+(3*one_week)]},
index=['This Week','Next Week','Coming Up','Next Month'])
schedule['Week Of'] = schedule['Week Of'].apply(lambda date: date.strftime('%d/%m'))
show(schedule)
albums = pd.read_csv('albums.csv')
scores = pd.read_csv('scores.csv')
# Retrospective score changes
scores.at[5,'MT'] = 8
scores.at[(1,9),'MT'] = 3
scores.at[2,'JP'] = 8
scores.at[13,'MJ'] = 5.75
scores.at[25,'TL'] = 7.5
albums[['EDA','POC','Group']] = albums[['EDA','POC','Group']].astype(bool)
albums[['From','Genre','Decade','Origin','Gender']] = albums[['From','Genre','Decade','Origin','Gender']].astype('category')
scores.iloc[:,1:] = scores.iloc[:,1:].astype(float)
scores['AVG'] = scores.iloc[:,1:10].mean(axis=1).round(decimals=2)
scores['MED'] = scores.iloc[:,1:10].median(axis=1).round(decimals=2)
scores['MAX'] = scores.iloc[:,1:10].max(axis=1).round(decimals=2)
scores['MIN'] = scores.iloc[:,1:10].min(axis=1).round(decimals=2)
scores['SUM'] = scores.iloc[:,1:10].sum(axis=1).round(decimals=2)
scores['STD'] = scores.iloc[:,1:10].std(axis=1).round(decimals=2)
players = list(scores.columns[1:10])
aggs = list(scores.columns[10:])
scores_players = scores.loc[:,(["#"]+players)]
scores_aggs = scores.loc[:,(["#"]+aggs)]
master = pd.merge(left=albums,right=scores,how='inner')
Get the Tables!
Album Details
albums.index+=1
show(albums.iloc[:,1:])
Individual Scores
combined = pd.merge(left=albums.iloc[:,1:5],right=scores_players,how='inner')
combined.index+=1
combined = combined.iloc[:,:].style \
.background_gradient(subset=players, cmap='Blues') \
.format(precision=2)
show(combined.map(lambda x: 'color: transparent; background-color: transparent' if pd.isnull(x) else ''))
Summary Scores
combined_aggs = pd.merge(left=albums.iloc[:,1:7],right=scores_aggs,how='inner')
combined_aggs.index+=1
combined_aggs = combined_aggs.iloc[:,:].style \
.background_gradient(subset=['AVG','MAX'], cmap='Greens') \
.format(precision=2)
show(combined_aggs.map(lambda x: 'color: transparent; background-color: transparent' if pd.isnull(x) else ''))
Aggregations
Average Scores by Advocate
advocates_counts = master.groupby('From')['#'].agg('count').reset_index()
advocates_averages = pd.pivot_table(master, values=['AVG','MAX','MED','MIN','SUM'], index="From",columns=None).reset_index()
advocates_combined = pd.merge(advocates_counts,advocates_averages,how="inner").rename(columns={'#':'Albums'})
show(advocates_combined.style
.background_gradient(subset=['AVG','MAX','MIN'], cmap='Blues') \
.format(precision=2))
Average Scores by Decade
album_counts_by_decade = master.groupby('Decade',observed=True)['#'].agg('count').reset_index()
scores_by_decade = master.groupby(['Decade'], observed=True)[players+aggs].agg('mean').fillna('').reset_index()
combined_by_decade = pd.merge(album_counts_by_decade,scores_by_decade,how="inner").rename(columns={'Year':'Decade','#':'Albums'})
show(combined_by_decade.style \
.background_gradient(subset=['Albums','AVG','MAX','MIN'], cmap='Greens') \
.format(precision=2))
Average Proximities
dists_from_avg = pd.merge(left=scores_aggs.iloc[:,:2],right=scores_players,how='inner')
for player in order:
dists_from_avg[player] = dists_from_avg[player] - dists_from_avg['AVG']
avg_calcs = []
for player in order:
avg_calcs.append([player,len(dists_from_avg.query(f'{player} > 0')),len(dists_from_avg.query(f'{player} == 0')),len(dists_from_avg.query(f'{player} < 0'))])
avg_calcs = pd.DataFrame(avg_calcs,columns=["Player","Above Average","Exactly Average","Below Average"])
melt_for_avg_calcs = dists_from_avg.melt(id_vars=['#'],value_vars=players,var_name='Judge', value_name='Score')
melt_for_avg_calcs['Score'] = melt_for_avg_calcs['Score'].abs()
closest_to_avg_counts = []
for player in players:
count = 0
for album_num in range(1,album_count+1):
album_scores = melt_for_avg_calcs[melt_for_avg_calcs['#'] == album_num]
min_scores = album_scores[album_scores.Score == album_scores.Score.min()]
try:
count += min_scores['Judge'].value_counts()[player]
except:
pass
closest_to_avg_counts.append((player,str(count)))
closest_to_avg_counts = pd.DataFrame(closest_to_avg_counts,columns=["Player","Closest to Average"])
combined_avg_calcs = pd.merge(avg_calcs,closest_to_avg_counts,how="inner").sort_values('Player').set_index("Player")
combined_avg_calcs = combined_avg_calcs.iloc[:,:].style \
.background_gradient(subset=["Above Average","Exactly Average","Below Average","Closest to Average"],cmap='Blues') \
.format(precision=2)
show(combined_avg_calcs)
Relationship Matrix
relationship_matrix = pd.merge(left=albums.iloc[:,:4],right=scores_players,how='inner')
columns = ["Judge"] + players
matrix_list = []
for judge in players:
avg_for_each_player = [judge]
for advocate in players:
submatrix = relationship_matrix[relationship_matrix["From"] == advocate]
avg_for_each_player.append(submatrix[judge].mean())
matrix_list.append(avg_for_each_player)
relationship_matrix = pd.DataFrame(matrix_list,columns=columns).set_index("Judge").style \
.background_gradient(cmap='Greens') \
.format(precision=2)
show(relationship_matrix.map(lambda x: 'color: transparent; background-color: transparent' if pd.isnull(x) else ''))
Scores by Round
Aggs
scores_by_round = master.groupby(['Round'], observed=True)[aggs].agg(['mean','max','min']).fillna('') #.reset_index()
show(scores_by_round.style \
.background_gradient(cmap='Blues') \
.format(precision=2),classes="compact")
Players
scores_by_round = master.groupby(['Round'], observed=True)[players[:4]].agg(['mean','max','min']).fillna('')
show(scores_by_round.style \
.background_gradient(cmap='Greens') \
.format(precision=2),classes="compact")
scores_by_round = master.groupby(['Round'], observed=True)[players[5:]].agg(['mean','max','min']).fillna('')
show(scores_by_round.style \
.background_gradient(cmap='Greens') \
.format(precision=2),classes="compact")
Visualisations
melted = scores.melt(id_vars=['#'],value_vars=players,var_name='Judge', value_name='Score')
params = {"AVG" : ["mean",melted.groupby(["Judge"])["Score"].mean()],
"MED" : ["median",melted.groupby(["Judge"])["Score"].median()],
"SUM" : ["sum",melted.groupby(["Judge"])["Score"].sum()],
"MAX" : ["max",melted.groupby(["Judge"])["Score"].max()],
"MIN" : ["min",melted.groupby(["Judge"])["Score"].min()],
"STD" : ["std",melted.groupby(["Judge"])["Score"].std()]}
Scores by Judge
plt.figure(figsize=(16,8))
for i, agg in enumerate(aggs):
plt.subplot(2,3,i+1)
ax = sns.barplot(data=melted,x='Judge',y='Score',
estimator=params[agg][0],order=params[agg][1].sort_values(ascending=False).index,
alpha=0.65,errorbar=None)
ax.margins(y=0.10)
plt.title(agg),plt.xlabel(''),plt.ylabel('')
for p in ax.patches:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center',
size=8,xytext = (0, 5),
textcoords = 'offset points')
plt.subplots_adjust(left=0.1,bottom=0.1,right=1,top=0.75,wspace=0.1,hspace=0.3)
plt.show()
Scores by Years and Mins
plt.figure(figsize=(16,5))
for i, param in enumerate(["Year","Mins"]):
plt.subplot(1,2,i+1)
sns.scatterplot(data=master,x='AVG',y=param,hue='From',s=200)
plt.xlabel(''),plt.ylabel(''),plt.title(f'{param} / Avg. Score'),plt.grid(True,axis="y")
plt.subplots_adjust(left=0.1,bottom=0.1,right=1,top=0.75,wspace=0.1,hspace=0.3)
plt.show()
Scores by Album
plt.figure(figsize=(16,4))
ax = sns.lineplot(data=melted,x="#", y="Score",linewidth=2.5,marker='o',errorbar=('pi', 100))
plt.xlabel(''),plt.ylabel(''),plt.grid(True),plt.title('Average Score / Album')
plt.show()
plt.figure(figsize=(16,5))
sns.lineplot(data=melted,x="#", y="Score", hue='Judge',linewidth=1.5,marker='o')
plt.xlabel(''),plt.ylabel(''),plt.grid(True),plt.title('Judge\'s Scores / Album')
plt.show()
Scores by Genre
plt.figure(figsize=(28,3))
ax = sns.barplot(data=master,x="Genre", y="AVG",alpha=0.65,errorbar=None)
for p in ax.patches:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center',
size=10,xytext = (0, -10),
textcoords = 'offset points')
plt.xlabel(''),plt.ylabel('')
plt.show()
Scores by Categories
plt.figure(figsize=(16,8))
for i, cat in enumerate(["Origin","Gender","Group","POC"]):
plt.subplot(2,2,i+1)
ax = sns.barplot(data=master,x=cat, y="AVG",alpha=0.65,errorbar=None)
plt.title("Score / " + cat),plt.xlabel(''),plt.ylabel('')
for p in ax.patches:
ax.annotate(format(p.get_height(), '.2f'),
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center',
size=10,xytext = (0, -10),
textcoords = 'offset points')
plt.subplots_adjust(left=0.1,bottom=0.1,right=1,top=0.75,wspace=0.1,hspace=0.3)
plt.show()