Spaces:

siyuansc
/

scientific_viz_project

Runtime error

App Files Files Community

scientific_viz_project / app.py

siyuansc

Update app.py

6fc69ec verified 4 months ago

raw

history blame

No virus

3.71 kB

	import json
	from collections import Counter
	import numpy as np
	import pandas as pd
	import altair as alt
	import operator

	import warnings
	warnings.filterwarnings('ignore')

	# loading the events data
	events={}
	nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
	for nation in nations:
	with open('./events_%s.json' %nation) as json_data:
	events[nation] = json.load(json_data)

	# loading the match data
	matches={}
	nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
	for nation in nations:
	with open('./matches_%s.json' %nation) as json_data:
	matches[nation] = json.load(json_data)

	# loading the players data
	players={}
	with open('players.json') as json_data:
	players = json.load(json_data)

	# loading the competitions data
	competitions={}
	with open('competitions.json') as json_data:
	competitions = json.load(json_data)

	ev_all_nations = []
	for nation in nations:
	for i in range(len(events[nation])):
	ev_all_nations.append(events[nation][i]['eventName'])
	count = Counter(ev_all_nations)
	total = len(ev_all_nations)

	counter = {event: int((count / total) * 100) for event, count in count.items()}
	sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False)


	data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage'])

	brush = alt.selection_interval(encodings=['y'])

	max_value = data['Percentage'].max()
	tick_values = list(range(0, int(max_value) + 10, 10))

	bars = alt.Chart(data).mark_bar().encode(
	y=alt.Y('Event:N', title=None, sort='-x'),
	x=alt.X('Percentage:Q', title='events(%)', axis=alt.Axis(values=tick_values)),
	color=alt.condition(brush, alt.Color('Event:N', legend=None), alt.value('lightgray'))
	).add_selection(
	brush
	)

	average_rule = alt.Chart(data).mark_rule(color='firebrick', strokeWidth=2).encode(
	x='mean(Percentage):Q'
	).transform_filter(
	brush
	)

	average_text = alt.Chart(data).mark_text(
	dx=5, dy=-5, color='firebrick', align='left', fontWeight='bold'
	).encode(
	x=alt.X('mean(Percentage):Q', aggregate='mean'),
	text=alt.Text('mean(Percentage):Q', aggregate='mean', format='.1f')
	).transform_filter(
	brush
	)

	chart1 = alt.layer(bars, average_rule, average_text).properties(
	width=600,
	height=500,
	title='Events Distribution'
	)

	# Display the combined chart
	chart1

	match_ev_count = {}
	for nation in nations:
	for ev in events[nation]:
	if ev['matchId'] not in match_ev_count:
	match_ev_count[ev['matchId']] = 1
	else:
	match_ev_count[ev['matchId']] += 1

	data = pd.DataFrame({
	'Event Count': list(match_ev_count.values())
	})

	event_count_values = list(match_ev_count.values())
	min_value = min(event_count_values)
	max_value = max(event_count_values)


	ticks = list(range((min_value // 200) * 200, (max_value // 200 + 1) * 200, 200))

	click = alt.selection_single(encodings=['x'], nearest=True)

	hist = alt.Chart(data).mark_bar().encode(
	alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)', axis=alt.Axis(values=ticks)),
	alt.Y('count()', title='frequency (n)'),
	tooltip=[alt.Tooltip('mean(Event Count):Q', title='Mean', format='.2f')]
	).properties(
	width=600,
	height=400
	).add_selection(
	click
	)

	mean_rule = alt.Chart(data).transform_filter(
	click
	).mark_rule(color='firebrick', size=3).encode(
	x='mean(Event Count):Q',
	)

	chart2 = alt.layer(hist, mean_rule).properties(
	title='Histogram of Event Counts with Click Interaction and Tooltip'
	)

	chart2

	combined_chart1 = alt.hconcat(
	chart1,
	chart2,
	spacing=10
	).resolve_scale(
	color='independent'
	)
	combined_chart1