import json from collections import Counter import numpy as np import pandas as pd import altair as alt import operator import warnings warnings.filterwarnings('ignore') # loading the events data events={} nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup'] for nation in nations: with open('events/events_%s.json' %nation) as json_data: events[nation] = json.load(json_data) # loading the match data matches={} nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup'] for nation in nations: with open('matches/matches_%s.json' %nation) as json_data: matches[nation] = json.load(json_data) # loading the players data players={} with open('players.json') as json_data: players = json.load(json_data) # loading the competitions data competitions={} with open('competitions.json') as json_data: competitions = json.load(json_data) ev_all_nations = [] for nation in nations: for i in range(len(events[nation])): ev_all_nations.append(events[nation][i]['eventName']) count = Counter(ev_all_nations) total = len(ev_all_nations) counter = {event: int((count / total) * 100) for event, count in count.items()} sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False) data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage']) brush = alt.selection_interval(encodings=['y']) max_value = data['Percentage'].max() tick_values = list(range(0, int(max_value) + 10, 10)) bars = alt.Chart(data).mark_bar().encode( y=alt.Y('Event:N', title=None, sort='-x'), x=alt.X('Percentage:Q', title='events(%)', axis=alt.Axis(values=tick_values)), color=alt.condition(brush, alt.Color('Event:N', legend=None), alt.value('lightgray')) ).add_selection( brush ) average_rule = alt.Chart(data).mark_rule(color='firebrick', strokeWidth=2).encode( x='mean(Percentage):Q' ).transform_filter( brush ) average_text = alt.Chart(data).mark_text( dx=5, dy=-5, color='firebrick', align='left', fontWeight='bold' ).encode( x=alt.X('mean(Percentage):Q', aggregate='mean'), text=alt.Text('mean(Percentage):Q', aggregate='mean', format='.1f') ).transform_filter( brush ) chart1 = alt.layer(bars, average_rule, average_text).properties( width=600, height=500, title='Events Distribution' ) # Display the combined chart chart1 match_ev_count = {} for nation in nations: for ev in events[nation]: if ev['matchId'] not in match_ev_count: match_ev_count[ev['matchId']] = 1 else: match_ev_count[ev['matchId']] += 1 data = pd.DataFrame({ 'Event Count': list(match_ev_count.values()) }) event_count_values = list(match_ev_count.values()) min_value = min(event_count_values) max_value = max(event_count_values) ticks = list(range((min_value // 200) * 200, (max_value // 200 + 1) * 200, 200)) click = alt.selection_single(encodings=['x'], nearest=True) hist = alt.Chart(data).mark_bar().encode( alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)', axis=alt.Axis(values=ticks)), alt.Y('count()', title='frequency (n)'), tooltip=[alt.Tooltip('mean(Event Count):Q', title='Mean', format='.2f')] ).properties( width=600, height=400 ).add_selection( click ) mean_rule = alt.Chart(data).transform_filter( click ).mark_rule(color='firebrick', size=3).encode( x='mean(Event Count):Q', ) chart2 = alt.layer(hist, mean_rule).properties( title='Histogram of Event Counts with Click Interaction and Tooltip' ) chart2 combined_chart1 = alt.hconcat( chart1, chart2, spacing=10 ).resolve_scale( color='independent' ) combined_chart1