siyuansc commited on
Commit
dc79570
1 Parent(s): a805f20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -296
app.py CHANGED
@@ -1,27 +1,9 @@
1
  import json
2
  from collections import Counter
3
  import numpy as np
4
- import operator
5
- import matplotlib.pyplot as plt
6
- from matplotlib.ticker import FuncFormatter
7
- from matplotlib.patches import Ellipse
8
- import seaborn as sns
9
  import pandas as pd
10
- import networkx as nx
11
- import base64
12
- from collections import defaultdict
13
- import sys,os
14
- import math
15
- import random
16
- import operator
17
- import csv
18
- import matplotlib.pylab as pyl
19
- import itertools
20
- import scipy as sp
21
- from scipy import stats
22
- from scipy import optimize
23
- from scipy.integrate import quad
24
  import altair as alt
 
25
 
26
  import warnings
27
  warnings.filterwarnings('ignore')
@@ -30,14 +12,14 @@ warnings.filterwarnings('ignore')
30
  events={}
31
  nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
32
  for nation in nations:
33
- with open('./events_%s.json' %nation) as json_data:
34
  events[nation] = json.load(json_data)
35
 
36
  # loading the match data
37
  matches={}
38
  nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
39
  for nation in nations:
40
- with open('./matches_%s.json' %nation) as json_data:
41
  matches[nation] = json.load(json_data)
42
 
43
  # loading the players data
@@ -50,7 +32,6 @@ competitions={}
50
  with open('competitions.json') as json_data:
51
  competitions = json.load(json_data)
52
 
53
-
54
  ev_all_nations = []
55
  for nation in nations:
56
  for i in range(len(events[nation])):
@@ -61,310 +42,96 @@ total = len(ev_all_nations)
61
  counter = {event: int((count / total) * 100) for event, count in count.items()}
62
  sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False)
63
 
64
- # Convert to DataFrame for Altair plotting.
65
- data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage'])
66
-
67
- # Generate the bar chart using Altair.
68
- chart = alt.Chart(data).mark_bar().encode(
69
- y=alt.Y('Event:N', title=None, sort='-x'), # Sort is handled by the data frame's order.
70
- x='Percentage:Q',
71
- ).properties(
72
- width=600,
73
- height=400
74
- )
75
-
76
- # Display the chart.
77
- chart.display()
78
-
79
-
80
- data = pd.DataFrame({
81
- 'Event Count': list(match_ev_count.values())
82
- })
83
-
84
- mean_val = int(np.mean(data['Event Count']))
85
- std_val = int(np.std(data['Event Count']))
86
-
87
- hist = alt.Chart(data).mark_bar().encode(
88
- alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)',
89
- axis=alt.Axis(values=np.arange(0, max(data['Event Count']) + 100, 100))),
90
- alt.Y('count()', title='frequency (n)')
91
- ).properties(
92
- width=600,
93
- height=400
94
- )
95
-
96
- text = alt.Chart(pd.DataFrame({'x': [mean_val + std_val], 'y': [1], 'text': [f'μ = {mean_val} \n σ = {std_val}']})).mark_text(
97
- align='left',
98
- baseline='top',
99
- fontSize=20,
100
- dx=-120,
101
- dy=-300
102
- ).encode(
103
- x='x:Q',
104
- y='y:Q',
105
- text='text:N'
106
- )
107
-
108
- chart = hist + text
109
- chart.display()
110
-
111
 
112
- import altair as alt
113
- import pandas as pd
114
-
115
-
116
- match_id = 2576335
117
- a_match = []
118
- for nation in nations:
119
- for ev in events[nation]:
120
- if ev['matchId'] == match_id:
121
- a_match.append(ev)
122
-
123
- for nation in nations:
124
- for match in matches[nation]:
125
- if match['wyId'] == match_id:
126
- match_f = match
127
-
128
- df_a_match = pd.DataFrame(a_match)
129
-
130
- background_data = pd.DataFrame({
131
- 'x': [0],
132
- 'y': [0],
133
- 'x2': [100],
134
- 'y2': [100]
135
- })
136
-
137
- # Create the background using `mark_rect`
138
- background = alt.Chart(background_data).mark_rect(
139
- color='#195905' # Soccer field green color
140
- ).encode(
141
- x='x:Q',
142
- y='y:Q',
143
- x2='x2:Q',
144
- y2='y2:Q'
145
- )
146
-
147
-
148
- #Define the center circle
149
- center_circle = alt.Chart(pd.DataFrame({'x': [50], 'y': [50]})).mark_point(
150
- size=12000, # Adjust size as necessary
151
- color='white',
152
- strokeWidth=3
153
- ).encode(
154
- x='x:Q',
155
- y='y:Q'
156
- )
157
-
158
- border_lines_data = pd.DataFrame({
159
- 'x': [1, 1, 99.5, 99.5, 1],
160
- 'y': [1, 99.5, 99.5, 1, 1],
161
- 'x2': [1, 99.5, 99.5, 1, 1],
162
- 'y2': [99.5, 99.5, 1, 1, 1]
163
- })
164
-
165
- # Create the border lines using `mark_line`
166
-
167
- border_lines = alt.Chart(border_lines_data).mark_line(
168
- color='white',
169
- strokeWidth=3 # This controls the thickness of the line
170
- ).encode(
171
- x=alt.X('x:Q', scale=alt.Scale(domain=[1, 99.5])),
172
- y=alt.Y('y:Q', scale=alt.Scale(domain=[1, 99.5])),
173
- x2='x2:Q',
174
- y2='y2:Q'
175
- )
176
-
177
- midline_data = pd.DataFrame({
178
- 'x': [50, 50,],
179
- 'y': [1, 99, ]
180
- })
181
-
182
- # Create the line using `mark_line`
183
- midline = alt.Chart(midline_data).mark_line(
184
- color='white', # Color of the line
185
- strokeWidth=3 # Thickness of the line
186
- ).encode(
187
- x='x:Q',
188
- y='y:Q'
189
- )
190
- lines_data = pd.DataFrame({
191
- 'x': [1, 17.5, 17.5, 1, 82.5, 82.5, 99,1,6.5,6.5,1, 99,93.5,93.5],
192
- 'y': [21.3, 21.3, 77.7, 77.7, 21.3, 77.7, 77.7,37.5,37.5,62.5,62.5,37.5,37.5,62.5],
193
- 'x2': [17.5, 17.5, 1, 17.5, 99, 82.5, 82.5, 6.5,6.5,1,6.5,93.5,93.5,99],
194
- 'y2': [21.3, 77.7, 77.7, 77.7, 21.3, 21.3,77.7,37.5,62.5,62.5,62.5,37.5,62.5,62.5]
195
- })
196
 
197
- lines = alt.Chart(lines_data).mark_line(
198
- color='white', # Color of the lines
199
- strokeWidth=3 # Thickness of the lines
200
- ).encode(
201
- x='x:Q',
202
- y='y:Q',
203
- x2='x2:Q',
204
- y2='y2:Q'
205
- )
206
 
207
- dot_positions = pd.DataFrame({
208
- 'x': [12, 87],
209
- 'y': [50, 50]
210
- })
211
 
212
- # Create the white dots using `mark_point`
213
- white_dots = alt.Chart(dot_positions).mark_point(
214
- size=100, # Adjust the size as needed
215
- color='white',
216
- filled=True
217
- ).encode(
218
- x='x:Q',
219
- y='y:Q'
220
  )
221
 
222
-
223
-
224
- theta = np.linspace(0, np.pi, 100) # Radians from 0 to pi for a full semicircle
225
- semicircle_x = 12 + 9.5 * np.cos(theta) # Shift the semicircle to the right of x=12
226
- semicircle_y = 50 + 9.5 * np.sin(theta)
227
-
228
- # Convert the points to a dataframe
229
- semicircle_data = pd.DataFrame({
230
- 'x': semicircle_x,
231
- 'y': semicircle_y
232
- })
233
-
234
- # Filter to keep the right side of the semicircle only
235
- semicircle_data = semicircle_data[semicircle_data['x'] >= 17.5]
236
-
237
- # Create the semicircle line
238
- arc1 = alt.Chart(semicircle_data).mark_line(
239
- color='white',
240
- strokeWidth=3
241
- ).encode(
242
- x=alt.X('x', scale=alt.Scale(domain=[0, 100])),
243
- y=alt.Y('y', scale=alt.Scale(domain=[0, 100]))
244
  )
245
 
246
-
247
-
248
- theta = np.linspace(0, np.pi, 100) # Radians from 0 to pi for a full semicircle
249
- semicircle_x2 = 12 + 9.5 * np.cos(theta) # Shift the semicircle to the right of x=12
250
- semicircle_y2 = 50 - 9.5 * np.sin(theta)
251
-
252
- # Convert the points to a dataframe
253
- semicircle_data2 = pd.DataFrame({
254
- 'x': semicircle_x2,
255
- 'y': semicircle_y2
256
- })
257
-
258
- # Filter to keep the right side of the semicircle only
259
- semicircle_data2 = semicircle_data2[semicircle_data2['x'] >= 17.5]
260
-
261
- # Create the semicircle line
262
- arc2 = alt.Chart(semicircle_data2).mark_line(
263
- color='white',
264
- strokeWidth=3
265
  ).encode(
266
- x=alt.X('x', scale=alt.Scale(domain=[0, 100])),
267
- y=alt.Y('y', scale=alt.Scale(domain=[0, 100]))
 
 
268
  )
269
 
270
-
271
- theta = np.linspace(0, np.pi, 100) # Radians from 0 to pi for a full semicircle
272
- semicircle_x3 = 87 - 9.5 * np.cos(theta) # Shift the semicircle to the right of x=12
273
- semicircle_y3 = 50 + 9.5 * np.sin(theta)
274
-
275
- # Convert the points to a dataframe
276
- semicircle_data3 = pd.DataFrame({
277
- 'x': semicircle_x3,
278
- 'y': semicircle_y3
279
- })
280
-
281
- # Filter to keep the right side of the semicircle only
282
- semicircle_data3 = semicircle_data3[semicircle_data3['x'] <= 82.5]
283
-
284
- # Create the semicircle line
285
- arc3 = alt.Chart(semicircle_data3).mark_line(
286
- color='white',
287
- strokeWidth=3
288
- ).encode(
289
- x=alt.X('x', scale=alt.Scale(domain=[0, 100])),
290
- y=alt.Y('y', scale=alt.Scale(domain=[0, 100]))
291
  )
292
 
 
 
293
 
 
 
 
 
 
 
 
294
 
295
- theta = np.linspace(0, np.pi, 100) # Radians from 0 to pi for a full semicircle
296
- semicircle_x4 = 87 - 9.5 * np.cos(theta) # Shift the semicircle to the right of x=12
297
- semicircle_y4 = 50 - 9.5 * np.sin(theta)
298
-
299
- # Convert the points to a dataframe
300
- semicircle_data4 = pd.DataFrame({
301
- 'x': semicircle_x4,
302
- 'y': semicircle_y4
303
  })
304
 
305
- # Filter to keep the right side of the semicircle only
306
- semicircle_data4 = semicircle_data4[semicircle_data4['x'] <= 82.5]
307
-
308
- # Create the semicircle line
309
- arc4 = alt.Chart(semicircle_data4).mark_line(
310
- color='white',
311
- strokeWidth=3
312
- ).encode(
313
- x=alt.X('x', scale=alt.Scale(domain=[0, 100]), title=None ),
314
- y=alt.Y('y', scale=alt.Scale(domain=[0, 100]), title=None)
315
- )
316
 
317
 
318
- df_a_match['x'] = [pos[0]['x'] for pos in df_a_match['positions']]
319
- df_a_match['y'] = [pos[0]['y'] for pos in df_a_match['positions']]
320
 
321
- brush = alt.selection(type='interval', encodings=['x','y'])
322
- #Create scatter plots for events and color by teamId
323
 
324
- team_event = alt.Chart(df_a_match).mark_point(
325
- size=50, # Adjust the size as necessary
326
- opacity=1,
327
- filled=True
328
- ).encode(
329
- x=alt.X('x:Q', axis=alt.Axis(labels=False, ticks=False, grid=False)),
330
- y=alt.Y('y:Q', axis=alt.Axis(labels=False, ticks=False, grid=False)),
331
- #color=alt.Color('teamId:N', legend=None, scale=alt.Scale(domain=list(df_a_match['teamId'].unique()), range=['black', 'cyan'])),
332
- color=alt.condition(brush, # Only apply color to selected points
333
- alt.Color('teamId:N', legend=None, scale=alt.Scale(domain=list(df_a_match['teamId'].unique()), range=['black', 'cyan'])),
334
- alt.value('lightgray')),
335
- tooltip=['eventName:N', 'teamId:N', 'x:Q', 'y:Q']
336
  ).add_selection(
337
- brush
338
  )
339
 
340
-
341
-
342
-
343
- # Combine the pitch background, center dot, and events
344
- soccer_pitch = alt.layer(background, border_lines, midline, lines, white_dots, arc1, arc2, arc3, arc4, center_circle, team_event).properties(
345
- width=700,
346
- height=440,
347
- title="Lazio - Internazionale, 2 - 3"
348
  )
349
 
350
- bars = alt.Chart(df_a_match).mark_bar().encode(
351
- y=alt.Y('eventName:N', sort='-x', title=None),
352
- x=alt.X('count():Q', title=None),
353
- # This specifies a separate color encoding for the bar chart based on eventName
354
- color=alt.Color('eventName:N',legend=None)
355
- ).transform_filter(
356
- brush
357
  )
358
 
 
359
 
360
-
361
- combined_chart = alt.hconcat(
362
- soccer_pitch,
363
- bars
364
  ).resolve_scale(
365
  color='independent'
366
  )
367
- # Display the combined chart
368
- combined_chart.display()
369
-
370
 
 
1
  import json
2
  from collections import Counter
3
  import numpy as np
 
 
 
 
 
4
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import altair as alt
6
+ import operator
7
 
8
  import warnings
9
  warnings.filterwarnings('ignore')
 
12
  events={}
13
  nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
14
  for nation in nations:
15
+ with open('events/events_%s.json' %nation) as json_data:
16
  events[nation] = json.load(json_data)
17
 
18
  # loading the match data
19
  matches={}
20
  nations = ['Italy','England','Germany','France','Spain','European_Championship','World_Cup']
21
  for nation in nations:
22
+ with open('matches/matches_%s.json' %nation) as json_data:
23
  matches[nation] = json.load(json_data)
24
 
25
  # loading the players data
 
32
  with open('competitions.json') as json_data:
33
  competitions = json.load(json_data)
34
 
 
35
  ev_all_nations = []
36
  for nation in nations:
37
  for i in range(len(events[nation])):
 
42
  counter = {event: int((count / total) * 100) for event, count in count.items()}
43
  sorted_counter = sorted(counter.items(), key=operator.itemgetter(1), reverse=False)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ data = pd.DataFrame(sorted_counter, columns=['Event', 'Percentage'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ brush = alt.selection_interval(encodings=['y'])
 
 
 
 
 
 
 
 
49
 
50
+ max_value = data['Percentage'].max()
51
+ tick_values = list(range(0, int(max_value) + 10, 10))
 
 
52
 
53
+ bars = alt.Chart(data).mark_bar().encode(
54
+ y=alt.Y('Event:N', title=None, sort='-x'),
55
+ x=alt.X('Percentage:Q', title='events(%)', axis=alt.Axis(values=tick_values)),
56
+ color=alt.condition(brush, alt.Color('Event:N', legend=None), alt.value('lightgray'))
57
+ ).add_selection(
58
+ brush
 
 
59
  )
60
 
61
+ average_rule = alt.Chart(data).mark_rule(color='firebrick', strokeWidth=2).encode(
62
+ x='mean(Percentage):Q'
63
+ ).transform_filter(
64
+ brush
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  )
66
 
67
+ average_text = alt.Chart(data).mark_text(
68
+ dx=5, dy=-5, color='firebrick', align='left', fontWeight='bold'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  ).encode(
70
+ x=alt.X('mean(Percentage):Q', aggregate='mean'),
71
+ text=alt.Text('mean(Percentage):Q', aggregate='mean', format='.1f')
72
+ ).transform_filter(
73
+ brush
74
  )
75
 
76
+ chart1 = alt.layer(bars, average_rule, average_text).properties(
77
+ width=600,
78
+ height=500,
79
+ title='Events Distribution'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
+ # Display the combined chart
83
+ chart1
84
 
85
+ match_ev_count = {}
86
+ for nation in nations:
87
+ for ev in events[nation]:
88
+ if ev['matchId'] not in match_ev_count:
89
+ match_ev_count[ev['matchId']] = 1
90
+ else:
91
+ match_ev_count[ev['matchId']] += 1
92
 
93
+ data = pd.DataFrame({
94
+ 'Event Count': list(match_ev_count.values())
 
 
 
 
 
 
95
  })
96
 
97
+ event_count_values = list(match_ev_count.values())
98
+ min_value = min(event_count_values)
99
+ max_value = max(event_count_values)
 
 
 
 
 
 
 
 
100
 
101
 
102
+ ticks = list(range((min_value // 200) * 200, (max_value // 200 + 1) * 200, 200))
 
103
 
104
+ click = alt.selection_single(encodings=['x'], nearest=True)
 
105
 
106
+ hist = alt.Chart(data).mark_bar().encode(
107
+ alt.X('Event Count:Q', bin=alt.Bin(maxbins=20), title='events (n)', axis=alt.Axis(values=ticks)),
108
+ alt.Y('count()', title='frequency (n)'),
109
+ tooltip=[alt.Tooltip('mean(Event Count):Q', title='Mean', format='.2f')]
110
+ ).properties(
111
+ width=600,
112
+ height=400
 
 
 
 
 
113
  ).add_selection(
114
+ click
115
  )
116
 
117
+ mean_rule = alt.Chart(data).transform_filter(
118
+ click
119
+ ).mark_rule(color='firebrick', size=3).encode(
120
+ x='mean(Event Count):Q',
 
 
 
 
121
  )
122
 
123
+ chart2 = alt.layer(hist, mean_rule).properties(
124
+ title='Histogram of Event Counts with Click Interaction and Tooltip'
 
 
 
 
 
125
  )
126
 
127
+ chart2
128
 
129
+ combined_chart1 = alt.hconcat(
130
+ chart1,
131
+ chart2,
132
+ spacing=10
133
  ).resolve_scale(
134
  color='independent'
135
  )
136
+ combined_chart1
 
 
137