Spaces:
Runtime error
Runtime error
import json | |
from collections import Counter | |
import numpy as np | |
import pandas as pd | |
import altair as alt | |
import operator | |
import panel as pn | |
import warnings | |
warnings.filterwarnings('ignore') | |
pn.extension('vega') | |
# loading the events data | |
events={} | |
nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup'] | |
for nation in nations: | |
with open('events/events_%s.json' %nation) as json_data: | |
events[nation] = json.load(json_data) | |
# loading the match data | |
matches={} | |
nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup'] | |
for nation in nations: | |
with open('matches/matches_%s.json' %nation) as json_data: | |
matches[nation] = json.load(json_data) | |
# loading the players data | |
players={} | |
with open('players.json') as json_data: | |
players = json.load(json_data) | |
# loading the competitions data | |
competitions={} | |
with open('competitions.json') as json_data: | |
competitions = json.load(json_data) | |
# drawing the first chart | |
ev_all_nations = [] | |
for nation in nations: | |
for i in range(len(events[nation])): | |
ev_all_nations.append(events[nation][i]['eventName']) | |
count = Counter(ev_all_nations) | |
total = len(ev_all_nations) | |
counter = {event: int((count / total) * 100) for event, count in count.items()} | |
sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False) | |
data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage']) | |
brush = alt.selection_interval(encodings=['y']) | |
max_value = data['Percentage'].max() | |
tick_values = list(range(0, int(max_value) + 10, 10)) | |
bars = alt.Chart(data).mark_bar().encode( | |
y=alt.Y('Event:N', title=None, sort='-x'), | |
x=alt.X('Percentage:Q', title='events(%)', axis=alt.Axis(values=tick_values)), | |
color=alt.condition(brush, alt.Color('Event:N', legend=None), alt.value('lightgray')) | |
).add_selection( | |
brush | |
) | |
average_rule = alt.Chart(data).mark_rule(color='firebrick', strokeWidth=2).encode( | |
x='mean(Percentage):Q' | |
).transform_filter( | |
brush | |
) | |
average_text = alt.Chart(data).mark_text( | |
dx=5, dy=-5, color='firebrick', align='left', fontWeight='bold' | |
).encode( | |
x=alt.X('mean(Percentage):Q', aggregate='mean'), | |
text=alt.Text('mean(Percentage):Q', aggregate='mean', format='.1f') | |
).transform_filter( | |
brush | |
) | |
chart1 = alt.layer(bars, average_rule, average_text).properties( | |
width=600, | |
height=500, | |
title='Events Distribution' | |
) | |
# Display the combined chart | |
chart1 | |
# drawing the second chart | |
match_ev_count = {} | |
for nation in nations: | |
for ev in events[nation]: | |
if ev['matchId'] not in match_ev_count: | |
match_ev_count[ev['matchId']] = 1 | |
else: | |
match_ev_count[ev['matchId']] += 1 | |
data = pd.DataFrame({ | |
'Event Count': list(match_ev_count.values()) | |
}) | |
mean_val = int(np.mean(data['Event Count'])) | |
std_val = int(np.std(data['Event Count'])) | |
hist = alt.Chart(data).mark_bar().encode( | |
alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)', | |
axis=alt.Axis(values=np.arange(0, max(data['Event Count']) + 100, 100))), | |
alt.Y('count()', title='frequency (n)') | |
).properties( | |
width=600, | |
height=400 | |
) | |
text = alt.Chart(pd.DataFrame({'x': [mean_val + std_val], 'y': [1], 'text': [f'μ = {mean_val} \n σ = {std_val}']})).mark_text( | |
align='left', | |
baseline='top', | |
fontSize=20, | |
dx=-120, | |
dy=-300 | |
).encode( | |
x='x:Q', | |
y='y:Q', | |
text='text:N' | |
) | |
chart2 = hist + text | |
# Display the combined chart | |
chart2.display() | |
# drawing the third chart | |
combined_chart1 = alt.hconcat( | |
chart1, | |
chart2, | |
spacing=10 | |
).resolve_scale( | |
color='independent' | |
) | |
combined_chart1 | |
# drawing the fourth chart | |
match_id = 2576335 | |
a_match = [] | |
for nation in nations: | |
for ev in events[nation]: | |
if ev['matchId'] == match_id: | |
a_match.append(ev) | |
for nation in nations: | |
for match in matches[nation]: | |
if match['wyId'] == match_id: | |
match_f = match | |
df_a_match = pd.DataFrame(a_match) | |
background_data = pd.DataFrame({ | |
'x': [0], | |
'y': [0], | |
'x2': [100], | |
'y2': [100] | |
}) | |
# Create the background | |
background = alt.Chart(background_data).mark_rect( | |
color='#195905' | |
).encode( | |
x='x:Q', | |
y='y:Q', | |
x2='x2:Q', | |
y2='y2:Q' | |
) | |
#Define the center circle | |
center_circle = alt.Chart(pd.DataFrame({'x': [50], 'y': [50]})).mark_point( | |
size=12000, | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x='x:Q', | |
y='y:Q' | |
) | |
# Create the border lines | |
border_lines_data = pd.DataFrame({ | |
'x': [1, 1, 99.5, 99.5, 1], | |
'y': [1, 99.5, 99.5, 1, 1], | |
'x2': [1, 99.5, 99.5, 1, 1], | |
'y2': [99.5, 99.5, 1, 1, 1] | |
}) | |
border_lines = alt.Chart(border_lines_data).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x=alt.X('x:Q', scale=alt.Scale(domain=[1, 99.5])), | |
y=alt.Y('y:Q', scale=alt.Scale(domain=[1, 99.5])), | |
x2='x2:Q', | |
y2='y2:Q' | |
) | |
midline_data = pd.DataFrame({ | |
'x': [50, 50,], | |
'y': [1, 99, ] | |
}) | |
# Create the line using `mark_line` | |
midline = alt.Chart(midline_data).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x='x:Q', | |
y='y:Q' | |
) | |
lines_data = pd.DataFrame({ | |
'x': [1, 17.5, 17.5, 1, 82.5, 82.5, 99,1,6.5,6.5,1, 99,93.5,93.5], | |
'y': [21.3, 21.3, 77.7, 77.7, 21.3, 77.7, 77.7,37.5,37.5,62.5,62.5,37.5,37.5,62.5], | |
'x2': [17.5, 17.5, 1, 17.5, 99, 82.5, 82.5, 6.5,6.5,1,6.5,93.5,93.5,99], | |
'y2': [21.3, 77.7, 77.7, 77.7, 21.3, 21.3,77.7,37.5,62.5,62.5,62.5,37.5,62.5,62.5] | |
}) | |
lines = alt.Chart(lines_data).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x='x:Q', | |
y='y:Q', | |
x2='x2:Q', | |
y2='y2:Q' | |
) | |
dot_positions = pd.DataFrame({ | |
'x': [12, 87], | |
'y': [50, 50] | |
}) | |
# Create the white dots | |
white_dots = alt.Chart(dot_positions).mark_point( | |
size=100, | |
color='white', | |
filled=True | |
).encode( | |
x='x:Q', | |
y='y:Q' | |
) | |
theta = np.linspace(0, np.pi, 100) | |
semicircle_x = 12 + 9.5 * np.cos(theta) | |
semicircle_y = 50 + 9.5 * np.sin(theta) | |
semicircle_data = pd.DataFrame({ | |
'x': semicircle_x, | |
'y': semicircle_y | |
}) | |
semicircle_data = semicircle_data[semicircle_data['x'] >= 17.5] | |
arc1 = alt.Chart(semicircle_data).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x=alt.X('x', scale=alt.Scale(domain=[0, 100])), | |
y=alt.Y('y', scale=alt.Scale(domain=[0, 100])) | |
) | |
theta = np.linspace(0, np.pi, 100) | |
semicircle_x2 = 12 + 9.5 * np.cos(theta) | |
semicircle_y2 = 50 - 9.5 * np.sin(theta) | |
semicircle_data2 = pd.DataFrame({ | |
'x': semicircle_x2, | |
'y': semicircle_y2 | |
}) | |
semicircle_data2 = semicircle_data2[semicircle_data2['x'] >= 17.5] | |
arc2 = alt.Chart(semicircle_data2).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x=alt.X('x', scale=alt.Scale(domain=[0, 100])), | |
y=alt.Y('y', scale=alt.Scale(domain=[0, 100])) | |
) | |
theta = np.linspace(0, np.pi, 100) | |
semicircle_x3 = 87 - 9.5 * np.cos(theta) | |
semicircle_y3 = 50 + 9.5 * np.sin(theta) | |
semicircle_data3 = pd.DataFrame({ | |
'x': semicircle_x3, | |
'y': semicircle_y3 | |
}) | |
semicircle_data3 = semicircle_data3[semicircle_data3['x'] <= 82.5] | |
arc3 = alt.Chart(semicircle_data3).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x=alt.X('x', scale=alt.Scale(domain=[0, 100])), | |
y=alt.Y('y', scale=alt.Scale(domain=[0, 100])) | |
) | |
theta = np.linspace(0, np.pi, 100) | |
semicircle_x4 = 87 - 9.5 * np.cos(theta) | |
semicircle_y4 = 50 - 9.5 * np.sin(theta) | |
semicircle_data4 = pd.DataFrame({ | |
'x': semicircle_x4, | |
'y': semicircle_y4 | |
}) | |
semicircle_data4 = semicircle_data4[semicircle_data4['x'] <= 82.5] | |
arc4 = alt.Chart(semicircle_data4).mark_line( | |
color='white', | |
strokeWidth=3 | |
).encode( | |
x=alt.X('x', scale=alt.Scale(domain=[0, 100]), title=None ), | |
y=alt.Y('y', scale=alt.Scale(domain=[0, 100]), title=None) | |
) | |
df_a_match['x'] = [pos[0]['x'] for pos in df_a_match['positions']] | |
df_a_match['y'] = [pos[0]['y'] for pos in df_a_match['positions']] | |
df_a_match.drop('positions', axis=1, inplace=True) | |
df_a_match.drop('tags', axis=1, inplace=True) | |
# brush2 = alt.selection_interval( | |
# on="[mousedown[event.shiftKey], mouseup] > mousemove", | |
# translate="[mousedown[event.shiftKey], mouseup] > mousemove!", | |
# ) | |
brush2 = alt.selection(type='interval', encodings=['x', 'y']) | |
team_event = alt.Chart(df_a_match).mark_point( | |
size=50, | |
opacity=1, | |
filled=True | |
).encode( | |
x=alt.X('x:Q', axis=alt.Axis(labels=False, ticks=False, grid=False)), | |
y=alt.Y('y:Q', axis=alt.Axis(labels=False, ticks=False, grid=False)), | |
color=alt.condition(brush2, | |
alt.Color('teamId:N', legend=None, scale=alt.Scale(domain=list(df_a_match['teamId'].unique()), range=['black', 'cyan'])), | |
alt.value('lightgray')), | |
tooltip=['eventName:N', 'teamId:N', 'x:Q', 'y:Q'] | |
).add_selection( | |
brush2 | |
) | |
zoom = alt.selection_interval( | |
bind='scales', | |
on="[mousedown[!event.shiftKey], mouseup] > mousemove", | |
translate="[mousedown[!event.shiftKey], mouseup] > mousemove!", | |
) | |
soccer_pitch = alt.layer(background, border_lines, midline, lines, white_dots, arc1, arc2, arc3, arc4, center_circle, team_event).properties( | |
width=700, | |
height=440, | |
title="Lazio - Internazionale, 2 - 3" | |
) | |
soccer_pitch = soccer_pitch.add_selection( | |
zoom, | |
) | |
bars = alt.Chart(df_a_match).mark_bar().encode( | |
y=alt.Y('eventName:N', sort='-x', title=None), | |
x=alt.X('count():Q', title='frequency'), | |
color=alt.Color('eventName:N',legend=None) | |
).transform_filter( | |
brush2 | |
) | |
annotations_df = pd.DataFrame({ | |
'text': ['You can select part of the graph to see distributoon of events'], | |
'x': [0], | |
'y': [0] | |
}) | |
annotations_chart = alt.Chart(annotations_df).mark_text( | |
align='left', | |
baseline='middle', | |
fontSize=12, | |
fontStyle='italic' | |
).encode( | |
text='text:N' | |
).properties( | |
width=700, | |
height=20 | |
) | |
soccer_pitch_with_annotations = alt.vconcat( | |
soccer_pitch, | |
annotations_chart, | |
spacing=5 | |
) | |
combined_chart2 = alt.hconcat( | |
soccer_pitch_with_annotations, | |
bars, | |
spacing=10 | |
).resolve_scale( | |
color='independent' | |
) | |
combined_chart2.display() | |
# drawing the fifth chart | |
combined_chart = alt.vconcat( | |
combined_chart1, | |
combined_chart2, | |
spacing=10 | |
).resolve_scale( | |
color='independent' | |
) | |
combined_chart | |
description1 = pn.pane.Markdown("Our Scientific Visualization Project is designed to provide a comprehensive overview of the process undertaken to reimagine the visual representations based on the scientific study. The visualizations selected for redesign include a bar chart detailing the percentage of different event types occurring within a soccer match and a histogram showcasing the distribution of event frequencies, alongside a spatial plot representing the locations of match events on a soccer field. Each graphic serves a distinct purpose: the bar chart allows for the quick comparison of event prevalence, the histogram provides an understanding of the variability and central tendency of event counts, and the spatial plot conveys the common areas of activity during a match.") | |
decription2 = pn.pane.Markdown("The first chart below is a bar chart detailing the percentage of different event types occurring within a soccer match.") | |
chart1_panel = pn.pane.Vega(chart1, sizing_mode='stretch_width') | |
decription3 = pn.pane.Markdown("The second chart below is a histogram showcasing the distribution of event frequencies.") | |
chart2_panel = pn.pane.Vega(chart2, sizing_mode='stretch_width') | |
decription4 = pn.pane.Markdown("The third chart below is a spatial plot representing the locations of match events on a soccer field. We made a connection between this graph and the first graph. People can select the event points here with zooming, the right is the interactive bar chart that shows the distribution of event frequency.") | |
combined_chart1_panel = pn.pane.Vega(combined_chart1, sizing_mode='stretch_width') | |
description5 = pn.pane.Markdown("We combined all the graphs we have and the chart below is what the whole things look like.") | |
combined_chart2_panel = pn.pane.Vega(combined_chart2, sizing_mode='stretch_width') | |
combined_chart_panel = pn.pane.Vega(combined_chart, sizing_mode='stretch_width') | |
dashboard = pn.Column( | |
"# Scientific Visualization Project", | |
description1, | |
decription2, | |
chart1_panel, | |
decription3, | |
chart2_panel, | |
decription4, | |
description5, | |
combined_chart_panel | |
) | |
dashboard.servable(title='Scientific Visualization Project') | |