Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,27 +1,9 @@
|
|
1 |
import json
|
2 |
from collections import Counter
|
3 |
import numpy as np
|
4 |
-
import operator
|
5 |
-
import matplotlib.pyplot as plt
|
6 |
-
from matplotlib.ticker import FuncFormatter
|
7 |
-
from matplotlib.patches import Ellipse
|
8 |
-
import seaborn as sns
|
9 |
import pandas as pd
|
10 |
-
import networkx as nx
|
11 |
-
import base64
|
12 |
-
from collections import defaultdict
|
13 |
-
import sys,os
|
14 |
-
import math
|
15 |
-
import random
|
16 |
-
import operator
|
17 |
-
import csv
|
18 |
-
import matplotlib.pylab as pyl
|
19 |
-
import itertools
|
20 |
-
import scipy as sp
|
21 |
-
from scipy import stats
|
22 |
-
from scipy import optimize
|
23 |
-
from scipy.integrate import quad
|
24 |
import altair as alt
|
|
|
25 |
|
26 |
import warnings
|
27 |
warnings.filterwarnings('ignore')
|
@@ -30,14 +12,14 @@ warnings.filterwarnings('ignore')
|
|
30 |
events={}
|
31 |
nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
|
32 |
for nation in nations:
|
33 |
-
with open('
|
34 |
events[nation] = json.load(json_data)
|
35 |
|
36 |
# loading the match data
|
37 |
matches={}
|
38 |
nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
|
39 |
for nation in nations:
|
40 |
-
with open('
|
41 |
matches[nation] = json.load(json_data)
|
42 |
|
43 |
# loading the players data
|
@@ -50,7 +32,6 @@ competitions={}
|
|
50 |
with open('competitions.json') as json_data:
|
51 |
competitions = json.load(json_data)
|
52 |
|
53 |
-
|
54 |
ev_all_nations = []
|
55 |
for nation in nations:
|
56 |
for i in range(len(events[nation])):
|
@@ -61,310 +42,96 @@ total = len(ev_all_nations)
|
|
61 |
counter = {event: int((count / total) * 100) for event, count in count.items()}
|
62 |
sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False)
|
63 |
|
64 |
-
# Convert to DataFrame for Altair plotting.
|
65 |
-
data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage'])
|
66 |
-
|
67 |
-
# Generate the bar chart using Altair.
|
68 |
-
chart = alt.Chart(data).mark_bar().encode(
|
69 |
-
y=alt.Y('Event:N', title=None, sort='-x'), # Sort is handled by the data frame's order.
|
70 |
-
x='Percentage:Q',
|
71 |
-
).properties(
|
72 |
-
width=600,
|
73 |
-
height=400
|
74 |
-
)
|
75 |
-
|
76 |
-
# Display the chart.
|
77 |
-
chart.display()
|
78 |
-
|
79 |
-
|
80 |
-
data = pd.DataFrame({
|
81 |
-
'Event Count': list(match_ev_count.values())
|
82 |
-
})
|
83 |
-
|
84 |
-
mean_val = int(np.mean(data['Event Count']))
|
85 |
-
std_val = int(np.std(data['Event Count']))
|
86 |
-
|
87 |
-
hist = alt.Chart(data).mark_bar().encode(
|
88 |
-
alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)',
|
89 |
-
axis=alt.Axis(values=np.arange(0, max(data['Event Count']) + 100, 100))),
|
90 |
-
alt.Y('count()', title='frequency (n)')
|
91 |
-
).properties(
|
92 |
-
width=600,
|
93 |
-
height=400
|
94 |
-
)
|
95 |
-
|
96 |
-
text = alt.Chart(pd.DataFrame({'x': [mean_val + std_val], 'y': [1], 'text': [f'μ = {mean_val} \n σ = {std_val}']})).mark_text(
|
97 |
-
align='left',
|
98 |
-
baseline='top',
|
99 |
-
fontSize=20,
|
100 |
-
dx=-120,
|
101 |
-
dy=-300
|
102 |
-
).encode(
|
103 |
-
x='x:Q',
|
104 |
-
y='y:Q',
|
105 |
-
text='text:N'
|
106 |
-
)
|
107 |
-
|
108 |
-
chart = hist + text
|
109 |
-
chart.display()
|
110 |
-
|
111 |
|
112 |
-
|
113 |
-
import pandas as pd
|
114 |
-
|
115 |
-
|
116 |
-
match_id = 2576335
|
117 |
-
a_match = []
|
118 |
-
for nation in nations:
|
119 |
-
for ev in events[nation]:
|
120 |
-
if ev['matchId'] == match_id:
|
121 |
-
a_match.append(ev)
|
122 |
-
|
123 |
-
for nation in nations:
|
124 |
-
for match in matches[nation]:
|
125 |
-
if match['wyId'] == match_id:
|
126 |
-
match_f = match
|
127 |
-
|
128 |
-
df_a_match = pd.DataFrame(a_match)
|
129 |
-
|
130 |
-
background_data = pd.DataFrame({
|
131 |
-
'x': [0],
|
132 |
-
'y': [0],
|
133 |
-
'x2': [100],
|
134 |
-
'y2': [100]
|
135 |
-
})
|
136 |
-
|
137 |
-
# Create the background using `mark_rect`
|
138 |
-
background = alt.Chart(background_data).mark_rect(
|
139 |
-
color='#195905' # Soccer field green color
|
140 |
-
).encode(
|
141 |
-
x='x:Q',
|
142 |
-
y='y:Q',
|
143 |
-
x2='x2:Q',
|
144 |
-
y2='y2:Q'
|
145 |
-
)
|
146 |
-
|
147 |
-
|
148 |
-
#Define the center circle
|
149 |
-
center_circle = alt.Chart(pd.DataFrame({'x': [50], 'y': [50]})).mark_point(
|
150 |
-
size=12000, # Adjust size as necessary
|
151 |
-
color='white',
|
152 |
-
strokeWidth=3
|
153 |
-
).encode(
|
154 |
-
x='x:Q',
|
155 |
-
y='y:Q'
|
156 |
-
)
|
157 |
-
|
158 |
-
border_lines_data = pd.DataFrame({
|
159 |
-
'x': [1, 1, 99.5, 99.5, 1],
|
160 |
-
'y': [1, 99.5, 99.5, 1, 1],
|
161 |
-
'x2': [1, 99.5, 99.5, 1, 1],
|
162 |
-
'y2': [99.5, 99.5, 1, 1, 1]
|
163 |
-
})
|
164 |
-
|
165 |
-
# Create the border lines using `mark_line`
|
166 |
-
|
167 |
-
border_lines = alt.Chart(border_lines_data).mark_line(
|
168 |
-
color='white',
|
169 |
-
strokeWidth=3 # This controls the thickness of the line
|
170 |
-
).encode(
|
171 |
-
x=alt.X('x:Q', scale=alt.Scale(domain=[1, 99.5])),
|
172 |
-
y=alt.Y('y:Q', scale=alt.Scale(domain=[1, 99.5])),
|
173 |
-
x2='x2:Q',
|
174 |
-
y2='y2:Q'
|
175 |
-
)
|
176 |
-
|
177 |
-
midline_data = pd.DataFrame({
|
178 |
-
'x': [50, 50,],
|
179 |
-
'y': [1, 99, ]
|
180 |
-
})
|
181 |
-
|
182 |
-
# Create the line using `mark_line`
|
183 |
-
midline = alt.Chart(midline_data).mark_line(
|
184 |
-
color='white', # Color of the line
|
185 |
-
strokeWidth=3 # Thickness of the line
|
186 |
-
).encode(
|
187 |
-
x='x:Q',
|
188 |
-
y='y:Q'
|
189 |
-
)
|
190 |
-
lines_data = pd.DataFrame({
|
191 |
-
'x': [1, 17.5, 17.5, 1, 82.5, 82.5, 99,1,6.5,6.5,1, 99,93.5,93.5],
|
192 |
-
'y': [21.3, 21.3, 77.7, 77.7, 21.3, 77.7, 77.7,37.5,37.5,62.5,62.5,37.5,37.5,62.5],
|
193 |
-
'x2': [17.5, 17.5, 1, 17.5, 99, 82.5, 82.5, 6.5,6.5,1,6.5,93.5,93.5,99],
|
194 |
-
'y2': [21.3, 77.7, 77.7, 77.7, 21.3, 21.3,77.7,37.5,62.5,62.5,62.5,37.5,62.5,62.5]
|
195 |
-
})
|
196 |
|
197 |
-
|
198 |
-
color='white', # Color of the lines
|
199 |
-
strokeWidth=3 # Thickness of the lines
|
200 |
-
).encode(
|
201 |
-
x='x:Q',
|
202 |
-
y='y:Q',
|
203 |
-
x2='x2:Q',
|
204 |
-
y2='y2:Q'
|
205 |
-
)
|
206 |
|
207 |
-
|
208 |
-
|
209 |
-
'y': [50, 50]
|
210 |
-
})
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
color='
|
216 |
-
|
217 |
-
|
218 |
-
x='x:Q',
|
219 |
-
y='y:Q'
|
220 |
)
|
221 |
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
semicircle_y = 50 + 9.5 * np.sin(theta)
|
227 |
-
|
228 |
-
# Convert the points to a dataframe
|
229 |
-
semicircle_data = pd.DataFrame({
|
230 |
-
'x': semicircle_x,
|
231 |
-
'y': semicircle_y
|
232 |
-
})
|
233 |
-
|
234 |
-
# Filter to keep the right side of the semicircle only
|
235 |
-
semicircle_data = semicircle_data[semicircle_data['x'] >= 17.5]
|
236 |
-
|
237 |
-
# Create the semicircle line
|
238 |
-
arc1 = alt.Chart(semicircle_data).mark_line(
|
239 |
-
color='white',
|
240 |
-
strokeWidth=3
|
241 |
-
).encode(
|
242 |
-
x=alt.X('x', scale=alt.Scale(domain=[0, 100])),
|
243 |
-
y=alt.Y('y', scale=alt.Scale(domain=[0, 100]))
|
244 |
)
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
theta = np.linspace(0, np.pi, 100) # Radians from 0 to pi for a full semicircle
|
249 |
-
semicircle_x2 = 12 + 9.5 * np.cos(theta) # Shift the semicircle to the right of x=12
|
250 |
-
semicircle_y2 = 50 - 9.5 * np.sin(theta)
|
251 |
-
|
252 |
-
# Convert the points to a dataframe
|
253 |
-
semicircle_data2 = pd.DataFrame({
|
254 |
-
'x': semicircle_x2,
|
255 |
-
'y': semicircle_y2
|
256 |
-
})
|
257 |
-
|
258 |
-
# Filter to keep the right side of the semicircle only
|
259 |
-
semicircle_data2 = semicircle_data2[semicircle_data2['x'] >= 17.5]
|
260 |
-
|
261 |
-
# Create the semicircle line
|
262 |
-
arc2 = alt.Chart(semicircle_data2).mark_line(
|
263 |
-
color='white',
|
264 |
-
strokeWidth=3
|
265 |
).encode(
|
266 |
-
x=alt.X('
|
267 |
-
|
|
|
|
|
268 |
)
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
# Convert the points to a dataframe
|
276 |
-
semicircle_data3 = pd.DataFrame({
|
277 |
-
'x': semicircle_x3,
|
278 |
-
'y': semicircle_y3
|
279 |
-
})
|
280 |
-
|
281 |
-
# Filter to keep the right side of the semicircle only
|
282 |
-
semicircle_data3 = semicircle_data3[semicircle_data3['x'] <= 82.5]
|
283 |
-
|
284 |
-
# Create the semicircle line
|
285 |
-
arc3 = alt.Chart(semicircle_data3).mark_line(
|
286 |
-
color='white',
|
287 |
-
strokeWidth=3
|
288 |
-
).encode(
|
289 |
-
x=alt.X('x', scale=alt.Scale(domain=[0, 100])),
|
290 |
-
y=alt.Y('y', scale=alt.Scale(domain=[0, 100]))
|
291 |
)
|
292 |
|
|
|
|
|
293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
semicircle_y4 = 50 - 9.5 * np.sin(theta)
|
298 |
-
|
299 |
-
# Convert the points to a dataframe
|
300 |
-
semicircle_data4 = pd.DataFrame({
|
301 |
-
'x': semicircle_x4,
|
302 |
-
'y': semicircle_y4
|
303 |
})
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
# Create the semicircle line
|
309 |
-
arc4 = alt.Chart(semicircle_data4).mark_line(
|
310 |
-
color='white',
|
311 |
-
strokeWidth=3
|
312 |
-
).encode(
|
313 |
-
x=alt.X('x', scale=alt.Scale(domain=[0, 100]), title=None ),
|
314 |
-
y=alt.Y('y', scale=alt.Scale(domain=[0, 100]), title=None)
|
315 |
-
)
|
316 |
|
317 |
|
318 |
-
|
319 |
-
df_a_match['y'] = [pos[0]['y'] for pos in df_a_match['positions']]
|
320 |
|
321 |
-
|
322 |
-
#Create scatter plots for events and color by teamId
|
323 |
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
).
|
329 |
-
|
330 |
-
|
331 |
-
#color=alt.Color('teamId:N', legend=None, scale=alt.Scale(domain=list(df_a_match['teamId'].unique()), range=['black', 'cyan'])),
|
332 |
-
color=alt.condition(brush, # Only apply color to selected points
|
333 |
-
alt.Color('teamId:N', legend=None, scale=alt.Scale(domain=list(df_a_match['teamId'].unique()), range=['black', 'cyan'])),
|
334 |
-
alt.value('lightgray')),
|
335 |
-
tooltip=['eventName:N', 'teamId:N', 'x:Q', 'y:Q']
|
336 |
).add_selection(
|
337 |
-
|
338 |
)
|
339 |
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
soccer_pitch = alt.layer(background, border_lines, midline, lines, white_dots, arc1, arc2, arc3, arc4, center_circle, team_event).properties(
|
345 |
-
width=700,
|
346 |
-
height=440,
|
347 |
-
title="Lazio - Internazionale, 2 - 3"
|
348 |
)
|
349 |
|
350 |
-
|
351 |
-
|
352 |
-
x=alt.X('count():Q', title=None),
|
353 |
-
# This specifies a separate color encoding for the bar chart based on eventName
|
354 |
-
color=alt.Color('eventName:N',legend=None)
|
355 |
-
).transform_filter(
|
356 |
-
brush
|
357 |
)
|
358 |
|
|
|
359 |
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
).resolve_scale(
|
365 |
color='independent'
|
366 |
)
|
367 |
-
|
368 |
-
combined_chart.display()
|
369 |
-
|
370 |
|
|
|
1 |
import json
|
2 |
from collections import Counter
|
3 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
4 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import altair as alt
|
6 |
+
import operator
|
7 |
|
8 |
import warnings
|
9 |
warnings.filterwarnings('ignore')
|
|
|
12 |
events={}
|
13 |
nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
|
14 |
for nation in nations:
|
15 |
+
with open('events/events_%s.json' %nation) as json_data:
|
16 |
events[nation] = json.load(json_data)
|
17 |
|
18 |
# loading the match data
|
19 |
matches={}
|
20 |
nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
|
21 |
for nation in nations:
|
22 |
+
with open('matches/matches_%s.json' %nation) as json_data:
|
23 |
matches[nation] = json.load(json_data)
|
24 |
|
25 |
# loading the players data
|
|
|
32 |
with open('competitions.json') as json_data:
|
33 |
competitions = json.load(json_data)
|
34 |
|
|
|
35 |
ev_all_nations = []
|
36 |
for nation in nations:
|
37 |
for i in range(len(events[nation])):
|
|
|
42 |
counter = {event: int((count / total) * 100) for event, count in count.items()}
|
43 |
sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False)
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
brush = alt.selection_interval(encodings=['y'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
max_value = data['Percentage'].max()
|
51 |
+
tick_values = list(range(0, int(max_value) + 10, 10))
|
|
|
|
|
52 |
|
53 |
+
bars = alt.Chart(data).mark_bar().encode(
|
54 |
+
y=alt.Y('Event:N', title=None, sort='-x'),
|
55 |
+
x=alt.X('Percentage:Q', title='events(%)', axis=alt.Axis(values=tick_values)),
|
56 |
+
color=alt.condition(brush, alt.Color('Event:N', legend=None), alt.value('lightgray'))
|
57 |
+
).add_selection(
|
58 |
+
brush
|
|
|
|
|
59 |
)
|
60 |
|
61 |
+
average_rule = alt.Chart(data).mark_rule(color='firebrick', strokeWidth=2).encode(
|
62 |
+
x='mean(Percentage):Q'
|
63 |
+
).transform_filter(
|
64 |
+
brush
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
)
|
66 |
|
67 |
+
average_text = alt.Chart(data).mark_text(
|
68 |
+
dx=5, dy=-5, color='firebrick', align='left', fontWeight='bold'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
).encode(
|
70 |
+
x=alt.X('mean(Percentage):Q', aggregate='mean'),
|
71 |
+
text=alt.Text('mean(Percentage):Q', aggregate='mean', format='.1f')
|
72 |
+
).transform_filter(
|
73 |
+
brush
|
74 |
)
|
75 |
|
76 |
+
chart1 = alt.layer(bars, average_rule, average_text).properties(
|
77 |
+
width=600,
|
78 |
+
height=500,
|
79 |
+
title='Events Distribution'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
)
|
81 |
|
82 |
+
# Display the combined chart
|
83 |
+
chart1
|
84 |
|
85 |
+
match_ev_count = {}
|
86 |
+
for nation in nations:
|
87 |
+
for ev in events[nation]:
|
88 |
+
if ev['matchId'] not in match_ev_count:
|
89 |
+
match_ev_count[ev['matchId']] = 1
|
90 |
+
else:
|
91 |
+
match_ev_count[ev['matchId']] += 1
|
92 |
|
93 |
+
data = pd.DataFrame({
|
94 |
+
'Event Count': list(match_ev_count.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
})
|
96 |
|
97 |
+
event_count_values = list(match_ev_count.values())
|
98 |
+
min_value = min(event_count_values)
|
99 |
+
max_value = max(event_count_values)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
|
102 |
+
ticks = list(range((min_value // 200) * 200, (max_value // 200 + 1) * 200, 200))
|
|
|
103 |
|
104 |
+
click = alt.selection_single(encodings=['x'], nearest=True)
|
|
|
105 |
|
106 |
+
hist = alt.Chart(data).mark_bar().encode(
|
107 |
+
alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)', axis=alt.Axis(values=ticks)),
|
108 |
+
alt.Y('count()', title='frequency (n)'),
|
109 |
+
tooltip=[alt.Tooltip('mean(Event Count):Q', title='Mean', format='.2f')]
|
110 |
+
).properties(
|
111 |
+
width=600,
|
112 |
+
height=400
|
|
|
|
|
|
|
|
|
|
|
113 |
).add_selection(
|
114 |
+
click
|
115 |
)
|
116 |
|
117 |
+
mean_rule = alt.Chart(data).transform_filter(
|
118 |
+
click
|
119 |
+
).mark_rule(color='firebrick', size=3).encode(
|
120 |
+
x='mean(Event Count):Q',
|
|
|
|
|
|
|
|
|
121 |
)
|
122 |
|
123 |
+
chart2 = alt.layer(hist, mean_rule).properties(
|
124 |
+
title='Histogram of Event Counts with Click Interaction and Tooltip'
|
|
|
|
|
|
|
|
|
|
|
125 |
)
|
126 |
|
127 |
+
chart2
|
128 |
|
129 |
+
combined_chart1 = alt.hconcat(
|
130 |
+
chart1,
|
131 |
+
chart2,
|
132 |
+
spacing=10
|
133 |
).resolve_scale(
|
134 |
color='independent'
|
135 |
)
|
136 |
+
combined_chart1
|
|
|
|
|
137 |
|