Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,897 +1,335 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
import numpy as np
|
4 |
import pandas as pd
|
5 |
-
import
|
6 |
-
import os
|
7 |
-
import warnings
|
8 |
-
|
9 |
-
warnings.filterwarnings('ignore')
|
10 |
-
|
11 |
-
|
12 |
-
# In[90]:
|
13 |
-
|
14 |
-
|
15 |
-
# In[91]:
|
16 |
|
17 |
def main():
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
# fifa_rank_top10 = fifa_rank.groupby('team').first().sort_values('rank', ascending=True)[0:10].reset_index()
|
59 |
-
|
60 |
-
|
61 |
-
# # fifa_rank_top10
|
62 |
-
|
63 |
-
|
64 |
-
# # ### Top 10 teams with the highest winning percentage at home and away
|
65 |
-
|
66 |
-
# # In[95]:
|
67 |
-
|
68 |
-
|
69 |
-
# def home_percentage(team):
|
70 |
-
# score = len(df[(df['home_team'] == team) & (df['home_team_result'] == "Win")]) / len(
|
71 |
-
# df[df['home_team'] == team]) * 100
|
72 |
-
# return round(score)
|
73 |
-
|
74 |
-
|
75 |
-
# def away_percentage(team):
|
76 |
-
# score = len(df[(df['away_team'] == team) & (df['home_team_result'] == "Lose")]) / len(
|
77 |
-
# df[df['away_team'] == team]) * 100
|
78 |
-
# return round(score)
|
79 |
-
|
80 |
-
|
81 |
-
# # In[96]:
|
82 |
-
|
83 |
-
|
84 |
-
# fifa_rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(fifa_rank_top10['team'])
|
85 |
-
# fifa_rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(fifa_rank_top10['team'])
|
86 |
-
# fifa_rank_top10['Average_win_Per'] = round((fifa_rank_top10['Home_win_Per'] + fifa_rank_top10['Away_win_Per']) / 2)
|
87 |
-
# fifa_rank_win = fifa_rank_top10.sort_values('Average_win_Per', ascending=False)
|
88 |
-
# # fifa_rank_win
|
89 |
-
|
90 |
-
|
91 |
-
# # ### Top 10 attacking teams in the last FIFA date
|
92 |
-
|
93 |
-
# # In[97]:
|
94 |
-
|
95 |
-
|
96 |
-
# fifa_offense = df[['date', 'home_team', 'away_team', 'home_team_mean_offense_score', 'away_team_mean_offense_score']]
|
97 |
-
# home = fifa_offense[['date', 'home_team', 'home_team_mean_offense_score']].rename(
|
98 |
-
# columns={"home_team": "team", "home_team_mean_offense_score": "offense_score"})
|
99 |
-
# away = fifa_offense[['date', 'away_team', 'away_team_mean_offense_score']].rename(
|
100 |
-
# columns={"away_team": "team", "away_team_mean_offense_score": "offense_score"})
|
101 |
-
# fifa_offense = pd.concat([home, away])
|
102 |
-
# fifa_offense = fifa_offense.sort_values(['date', 'team'], ascending=[False, True])
|
103 |
-
# last_offense = fifa_offense
|
104 |
-
# fifa_offense_top10 = fifa_offense.groupby('team').first().sort_values('offense_score', ascending=False)[
|
105 |
-
# 0:10].reset_index()
|
106 |
-
# # fifa_offense_top10
|
107 |
-
|
108 |
-
# import plotly.graph_objs as go
|
109 |
-
# import plotly.figure_factory as ff
|
110 |
-
|
111 |
-
# # In[99]:
|
112 |
-
|
113 |
-
# # Display the data for the bar chart
|
114 |
-
# st.write("Top 10 Attacking Teams")
|
115 |
-
# st.write(fifa_offense_top10)
|
116 |
-
|
117 |
-
# # Create a horizontal bar chart
|
118 |
-
# fig_bar = go.Figure(data=[go.Bar(y=fifa_offense_top10['team'], x=fifa_offense_top10['offense_score'], orientation='h')])
|
119 |
-
# # Update layout to include title, x-label, and y-label
|
120 |
-
# fig_bar.update_layout(title='Top 10 Attacking Teams',
|
121 |
-
# xaxis_title='Offense Score',
|
122 |
-
# yaxis_title='Team')
|
123 |
-
# st.plotly_chart(fig_bar)
|
124 |
-
|
125 |
-
# # Display the data for the bar chart
|
126 |
-
# # st.write("Top 10 Offense Teams")
|
127 |
-
# # st.write(fifa_offense_top10)
|
128 |
-
|
129 |
-
# # sns.barplot(data=fifa_offense_top10, x='offense_score', y='team', color="#7F1431")
|
130 |
-
# # plt.xlabel('Offense Score', size = 20)
|
131 |
-
# # plt.ylabel('Team', size = 20)
|
132 |
-
# # plt.title("Top 10 Attacking teams");
|
133 |
-
|
134 |
-
|
135 |
-
# # ### Top 10 Midfield teams in the last FIFA date
|
136 |
-
|
137 |
-
# # In[100]:
|
138 |
-
|
139 |
-
|
140 |
-
# fifa_midfield = df[['date', 'home_team', 'away_team', 'home_team_mean_midfield_score', 'away_team_mean_midfield_score']]
|
141 |
-
# home = fifa_midfield[['date', 'home_team', 'home_team_mean_midfield_score']].rename(
|
142 |
-
# columns={"home_team": "team", "home_team_mean_midfield_score": "midfield_score"})
|
143 |
-
# away = fifa_midfield[['date', 'away_team', 'away_team_mean_midfield_score']].rename(
|
144 |
-
# columns={"away_team": "team", "away_team_mean_midfield_score": "midfield_score"})
|
145 |
-
# fifa_midfield = pd.concat([home, away])
|
146 |
-
# fifa_midfield = fifa_midfield.sort_values(['date', 'team'], ascending=[False, True])
|
147 |
-
# last_midfield = fifa_midfield
|
148 |
-
# fifa_midfield_top10 = fifa_midfield.groupby('team').first().sort_values('midfield_score', ascending=False)[
|
149 |
-
# 0:10].reset_index()
|
150 |
-
# # fifa_midfield_top10
|
151 |
-
|
152 |
-
|
153 |
-
# # In[101]:
|
154 |
-
|
155 |
-
# # Display the data for the bar chart
|
156 |
-
# st.write("Top 10 Midfield Teams")
|
157 |
-
# st.write(fifa_midfield_top10)
|
158 |
-
|
159 |
-
# # Create a horizontal bar chart
|
160 |
-
# fig_bar = go.Figure(
|
161 |
-
# data=[go.Bar(y=fifa_midfield_top10['team'], x=fifa_midfield_top10['midfield_score'], orientation='h')])
|
162 |
-
# # Update layout to include title, x-label, and y-label
|
163 |
-
# fig_bar.update_layout(title='Top 10 Midfield Teams', # Set the title
|
164 |
-
# xaxis_title='Midfield Score', # Set the x-axis label
|
165 |
-
# yaxis_title='Team') # Set the y-axis label
|
166 |
-
|
167 |
-
# # Display the bar chart
|
168 |
-
# st.plotly_chart(fig_bar)
|
169 |
-
|
170 |
-
# # sns.barplot(data=fifa_midfield_top10, x='midfield_score', y='team', color="#7F1431")
|
171 |
-
# # plt.xlabel('Midfield Score', size = 20)
|
172 |
-
# # plt.ylabel('Team', size = 20)
|
173 |
-
# # plt.title("Top 10 Midfield teams");
|
174 |
-
|
175 |
-
|
176 |
-
# # ### Top 10 defending teams in the last FIFA date
|
177 |
-
|
178 |
-
# # In[102]:
|
179 |
-
|
180 |
-
|
181 |
-
# fifa_defense = df[['date', 'home_team', 'away_team', 'home_team_mean_defense_score', 'away_team_mean_defense_score']]
|
182 |
-
# home = fifa_defense[['date', 'home_team', 'home_team_mean_defense_score']].rename(
|
183 |
-
# columns={"home_team": "team", "home_team_mean_defense_score": "defense_score"})
|
184 |
-
# away = fifa_defense[['date', 'away_team', 'away_team_mean_defense_score']].rename(
|
185 |
-
# columns={"away_team": "team", "away_team_mean_defense_score": "defense_score"})
|
186 |
-
# fifa_defense = pd.concat([home, away])
|
187 |
-
# fifa_defense = fifa_defense.sort_values(['date', 'team'], ascending=[False, True])
|
188 |
-
# last_defense = fifa_defense
|
189 |
-
# fifa_defense_top10 = fifa_defense.groupby('team').first().sort_values('defense_score', ascending=False)[
|
190 |
-
# 0:10].reset_index()
|
191 |
-
# # fifa_defense_top10
|
192 |
-
|
193 |
-
|
194 |
-
# # In[103]:
|
195 |
-
|
196 |
-
# # Display the data for the bar chart
|
197 |
-
# st.write("Top 10 Defensive Teams")
|
198 |
-
# st.write(fifa_defense_top10)
|
199 |
-
|
200 |
-
# # Create the horizontal bar chart
|
201 |
-
# fig_bar = go.Figure(data=[go.Bar(y=fifa_defense_top10['team'], x=fifa_defense_top10['defense_score'], orientation='h')])
|
202 |
-
|
203 |
-
# # Update layout to include title, x-label, and y-label
|
204 |
-
# fig_bar.update_layout(title='Top 10 Defensive Teams', # Set the title
|
205 |
-
# xaxis_title='Defense Score', # Set the x-axis label
|
206 |
-
# yaxis_title='Team') # Set the y-axis label
|
207 |
-
|
208 |
-
# # Display the bar chart
|
209 |
-
# st.plotly_chart(fig_bar)
|
210 |
-
|
211 |
-
# # sns.barplot(data=fifa_defense_top10, x='defense_score', y='team', color="#7F1431")
|
212 |
-
# # plt.xlabel('Defense Score', size=20)
|
213 |
-
# # plt.ylabel('Team', size=20)
|
214 |
-
# # plt.title("Top 10 Defense Teams")
|
215 |
-
|
216 |
-
# # ### Do Home teams have any advantage?
|
217 |
-
|
218 |
-
# # In[104]:
|
219 |
-
|
220 |
-
|
221 |
-
# # Select all matches played at non-neutral locations
|
222 |
-
# home_team_advantage = df[df['neutral_location'] == False]['home_team_result'].value_counts(normalize=True)
|
223 |
-
|
224 |
-
# # # Plot
|
225 |
-
# # fig, axes = plt.subplots(1, 1, figsize=(8,8))
|
226 |
-
# # ax =plt.pie(home_team_advantage ,labels = ['Win', 'Lose', 'Draw'], autopct='%.0f%%')
|
227 |
-
# # plt.title('Home team match result', fontsize = 15)
|
228 |
-
# # plt.show()
|
229 |
-
|
230 |
-
|
231 |
-
# # As the graph shows, the home team has an advantage over the away team. This is due to factors such as the fans, the weather and the confidence of the players. For this reason, in the World Cup, those teams that sit at home will have an advantage.
|
232 |
-
|
233 |
-
# # # DATA PREPARATION AND FEATURE ENGINEERING
|
234 |
-
# # In this section, I will fill in the empty fields in the dataset and clean up the data for teams that did not qualify for the World Cup. Then, I will use the correlation matrix to choose the characteristics that will define the training dataset of the Machine Learning model. Finally, I will use the ratings of the teams in their last match to define the "Last Team Scores" dataset (i.e., the dataset that I will use to predict the World Cup matches).
|
235 |
-
|
236 |
-
# # ### Analyze and fill na's
|
237 |
-
|
238 |
-
# # In[105]:
|
239 |
-
|
240 |
-
# #
|
241 |
-
# # df.isnull().sum()
|
242 |
-
|
243 |
-
|
244 |
-
# # In[106]:
|
245 |
-
|
246 |
-
|
247 |
-
# # We can fill mean for na's in goal_keeper_score
|
248 |
-
# df[df['home_team'] == "Brazil"]['home_team_goalkeeper_score'].describe()
|
249 |
-
|
250 |
-
# # In[107]:
|
251 |
-
|
252 |
-
|
253 |
-
# df['home_team_goalkeeper_score'] = round(
|
254 |
-
# df.groupby("home_team")["home_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
|
255 |
-
# df['away_team_goalkeeper_score'] = round(
|
256 |
-
# df.groupby("away_team")["away_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
|
257 |
-
|
258 |
-
# # In[108]:
|
259 |
-
|
260 |
-
|
261 |
-
# # We can fill mean for na's in defense score
|
262 |
-
# df[df['away_team'] == "Uruguay"]['home_team_mean_defense_score'].describe()
|
263 |
-
|
264 |
-
# # In[65]:
|
265 |
-
|
266 |
-
|
267 |
-
# df['home_team_mean_defense_score'] = round(
|
268 |
-
# df.groupby('home_team')['home_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
|
269 |
-
# df['away_team_mean_defense_score'] = round(
|
270 |
-
# df.groupby('away_team')['away_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
|
271 |
-
|
272 |
-
# # In[109]:
|
273 |
-
|
274 |
-
|
275 |
-
# # We can fill mean for na's in offense score
|
276 |
-
# df[df['away_team'] == "Uruguay"]['home_team_mean_offense_score'].describe()
|
277 |
-
|
278 |
-
# # In[67]:
|
279 |
-
|
280 |
-
|
281 |
-
# df['home_team_mean_offense_score'] = round(
|
282 |
-
# df.groupby('home_team')['home_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
|
283 |
-
# df['away_team_mean_offense_score'] = round(
|
284 |
-
# df.groupby('away_team')['away_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
|
285 |
-
|
286 |
-
# # In[110]:
|
287 |
-
|
288 |
-
|
289 |
-
# # We can fill mean for na's in midfield score
|
290 |
-
# df[df['away_team'] == "Uruguay"]['home_team_mean_midfield_score'].describe()
|
291 |
-
|
292 |
-
# # In[111]:
|
293 |
-
|
294 |
-
|
295 |
-
# df['home_team_mean_midfield_score'] = round(
|
296 |
-
# df.groupby('home_team')['home_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
|
297 |
-
# df['away_team_mean_midfield_score'] = round(
|
298 |
-
# df.groupby('away_team')['away_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
|
299 |
-
|
300 |
-
# # In[112]:
|
301 |
-
|
302 |
-
|
303 |
-
# df.isnull().sum()
|
304 |
-
|
305 |
-
# # In[113]:
|
306 |
-
|
307 |
-
|
308 |
-
# # Teams are not available in FIFA game itself, so they are not less than average performing teams, so giving a average score of 50 for all.
|
309 |
-
# df.fillna(50, inplace=True)
|
310 |
-
|
311 |
-
# # ### Filter the teams participating in QATAR - World cup 2022
|
312 |
-
|
313 |
-
# # In[115]:
|
314 |
-
|
315 |
-
|
316 |
-
# list_2022 = ['Qatar', 'Germany', 'Denmark', 'Brazil', 'France', 'Belgium', 'Croatia', 'Spain', 'Serbia', 'England',
|
317 |
-
# 'Switzerland', 'Netherlands', 'Argentina', 'IR Iran', 'Korea Republic', 'Japan', 'Saudi Arabia', 'Ecuador',
|
318 |
-
# 'Uruguay', 'Canada', 'Ghana', 'Senegal', 'Portugal', 'Poland', 'Tunisia', 'Morocco', 'Cameroon', 'USA',
|
319 |
-
# 'Mexico', 'Wales', 'Australia', 'Costa Rica']
|
320 |
-
# final_df = df[(df["home_team"].apply(lambda x: x in list_2022)) | (df["away_team"].apply(lambda x: x in list_2022))]
|
321 |
-
|
322 |
-
# # **Top 10 teams in QATAR 2022**
|
323 |
-
|
324 |
-
# # In[116]:
|
325 |
-
|
326 |
-
|
327 |
-
# rank = final_df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
|
328 |
-
# home = rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
|
329 |
-
# columns={"home_team": "team", "home_team_fifa_rank": "rank"})
|
330 |
-
# away = rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
|
331 |
-
# columns={"away_team": "team", "away_team_fifa_rank": "rank"})
|
332 |
-
# rank = pd.concat([home, away])
|
333 |
-
|
334 |
-
# # Select each country latest match
|
335 |
-
# rank = rank.sort_values(['team', 'date'], ascending=[True, False])
|
336 |
-
# rank_top10 = rank.groupby('team').first().sort_values('rank', ascending=True).reset_index()
|
337 |
-
# rank_top10 = rank_top10[(rank_top10["team"].apply(lambda x: x in list_2022))][0:10]
|
338 |
-
|
339 |
-
# st.write("Top 10 Countries by Rank - Latest Match")
|
340 |
-
# rank_top10
|
341 |
-
|
342 |
-
# # # Create a scatter plot
|
343 |
-
# # fig_scatter = go.Figure(data=go.Scatter(x=rank_top10['team'], y=rank_top10['rank'], mode='markers', marker=dict(color='lightskyblue', size=12)))
|
344 |
-
# #
|
345 |
-
# # # Update layout to include title and labels
|
346 |
-
# # fig_scatter.update_layout(title='Top 10 Countries by Rank - Latest Match',
|
347 |
-
# # xaxis_title='Country',
|
348 |
-
# # yaxis_title='Rank')
|
349 |
-
# #
|
350 |
-
# # # Display the scatter plot
|
351 |
-
# # st.plotly_chart(fig_scatter)
|
352 |
-
|
353 |
-
# # **Top 10 teams with the highest winning percentage in QATAR 2022**
|
354 |
-
|
355 |
-
# # In[117]:
|
356 |
-
|
357 |
-
|
358 |
-
# rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(rank_top10['team'])
|
359 |
-
# rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(rank_top10['team'])
|
360 |
-
# rank_top10['Average_win_Per'] = round((rank_top10['Home_win_Per'] + rank_top10['Away_win_Per']) / 2)
|
361 |
-
# rank_top10_Win = rank_top10.sort_values('Average_win_Per', ascending=False)
|
362 |
-
|
363 |
-
# # st.write("Top 10 Countries by Rank - Latest Match")
|
364 |
-
# # rank_top10_Win
|
365 |
-
|
366 |
-
|
367 |
-
# # In[118]:
|
368 |
-
|
369 |
-
# # Display the data for the bar chart
|
370 |
-
# st.write("Top 10 Average Win Per game Teams")
|
371 |
-
# st.write(rank_top10_Win)
|
372 |
-
|
373 |
-
# # Create a horizontal bar chart
|
374 |
-
# # Create a horizontal bar chart
|
375 |
-
# fig_bar = go.Figure(data=[go.Bar(y=rank_top10_Win['team'], x=rank_top10_Win['Average_win_Per'], orientation='h')])
|
376 |
-
|
377 |
-
# # Update layout to include title and labels
|
378 |
-
# fig_bar.update_layout(title='Top 10 Countries by Average Win Percentage',
|
379 |
-
# xaxis_title='Average Win Percentage',
|
380 |
-
# yaxis_title='Country')
|
381 |
-
|
382 |
-
# # Display the horizontal bar chart
|
383 |
-
# st.plotly_chart(fig_bar)
|
384 |
-
|
385 |
-
# sns.barplot(data=rank_top10_Win,x='Average_win_Per',y='team',color="#7F1431")
|
386 |
-
# plt.xticks()
|
387 |
-
# plt.xlabel('Win Average', size = 20)
|
388 |
-
# plt.ylabel('Team', size = 20)
|
389 |
-
# plt.title('Top 10 QATAR 2022 teams with the highest winning percentage')
|
390 |
-
|
391 |
-
#
|
392 |
-
# # ### Correlation Matrix
|
393 |
-
#
|
394 |
-
# # In[124]:
|
395 |
-
#
|
396 |
-
#
|
397 |
-
# final_df['home_team_result'].values
|
398 |
-
# # for index, value in final_df['home_team_result'].items():
|
399 |
-
# # print(f"Row {index}: {value}")
|
400 |
-
#
|
401 |
-
#
|
402 |
-
# # In[125]:
|
403 |
-
#
|
404 |
-
#
|
405 |
-
# team_result_df = final_df
|
406 |
-
# # for index, value in team_result_df['home_team_result'].items():
|
407 |
-
# # print(f"Row {index}: {value}")
|
408 |
-
#
|
409 |
-
#
|
410 |
-
# # In[151]:
|
411 |
-
#
|
412 |
-
#
|
413 |
-
# # Mapping numeric values for home_team_result to find the correleations
|
414 |
-
# final_df['home_team_result'] = final_df['home_team_result'].map({'Win':1, 'Draw':2, 'Lose':0})
|
415 |
-
#
|
416 |
-
#
|
417 |
-
# # In[145]:
|
418 |
-
#
|
419 |
-
#
|
420 |
-
#
|
421 |
-
#
|
422 |
-
#
|
423 |
-
# # In[150]:
|
424 |
-
#
|
425 |
-
#
|
426 |
-
# final_df['home_team_result'].head(1)
|
427 |
-
#
|
428 |
-
#
|
429 |
-
# # In[152]:
|
430 |
-
#
|
431 |
-
#
|
432 |
-
# final_df['home_team_result'] = pd.to_numeric(final_df['home_team_result'], errors='coerce')
|
433 |
-
#
|
434 |
-
#
|
435 |
-
# # In[155]:
|
436 |
-
#
|
437 |
-
#
|
438 |
-
# # df.head()
|
439 |
-
#
|
440 |
-
#
|
441 |
-
# # In[156]:
|
442 |
-
#
|
443 |
-
#
|
444 |
-
# # final_df.head()
|
445 |
-
#
|
446 |
-
#
|
447 |
-
# # In[157]:
|
448 |
-
#
|
449 |
-
#
|
450 |
-
# numerical_df = final_df.select_dtypes(include=['number'])
|
451 |
-
#
|
452 |
-
#
|
453 |
-
# # In[158]:
|
454 |
-
#
|
455 |
-
#
|
456 |
-
# numerical_df.corr()['home_team_result'].sort_values(ascending=False)
|
457 |
-
#
|
458 |
-
#
|
459 |
-
# # In[153]:
|
460 |
-
#
|
461 |
-
#
|
462 |
-
# # final_df.corr()['home_team_result'].sort_values(ascending=False)
|
463 |
-
#
|
464 |
-
#
|
465 |
-
# # Dropping unnecessary colums.
|
466 |
-
#
|
467 |
-
# # In[ ]:
|
468 |
-
#
|
469 |
-
#
|
470 |
-
# #Dropping unnecessary colums
|
471 |
-
# final_df = final_df.drop(['date', 'home_team_continent', 'away_team_continent', 'home_team_total_fifa_points', 'away_team_total_fifa_points', 'home_team_score', 'away_team_score', 'tournament', 'city', 'country', 'neutral_location', 'shoot_out'],axis=1)
|
472 |
-
#
|
473 |
-
#
|
474 |
-
# # In[ ]:
|
475 |
-
#
|
476 |
-
#
|
477 |
-
# # final_df.columns
|
478 |
-
#
|
479 |
-
#
|
480 |
-
# # In[ ]:
|
481 |
-
#
|
482 |
-
#
|
483 |
-
# # Change column names
|
484 |
-
# final_df.rename(columns={"home_team":"Team1", "away_team":"Team2", "home_team_fifa_rank":"Team1_FIFA_RANK",
|
485 |
-
# "away_team_fifa_rank":"Team2_FIFA_RANK", "home_team_result":"Team1_Result", "home_team_goalkeeper_score":"Team1_Goalkeeper_Score",
|
486 |
-
# "away_team_goalkeeper_score":"Team2_Goalkeeper_Score", "home_team_mean_defense_score":"Team1_Defense",
|
487 |
-
# "home_team_mean_offense_score":"Team1_Offense", "home_team_mean_midfield_score":"Team1_Midfield",
|
488 |
-
# "away_team_mean_defense_score":"Team2_Defense", "away_team_mean_offense_score":"Team2_Offense",
|
489 |
-
# "away_team_mean_midfield_score":"Team2_Midfield"}, inplace=True)
|
490 |
-
#
|
491 |
-
#
|
492 |
-
# # In[ ]:
|
493 |
-
#
|
494 |
-
#
|
495 |
-
# plt.figure(figsize=(10, 4), dpi=200)
|
496 |
-
# sns.heatmap(final_df.corr(), annot=True)
|
497 |
-
#
|
498 |
-
#
|
499 |
-
# # In[ ]:
|
500 |
-
#
|
501 |
-
#
|
502 |
-
# # final_df.info()
|
503 |
-
#
|
504 |
-
#
|
505 |
-
# # In[ ]:
|
506 |
-
#
|
507 |
-
#
|
508 |
-
# # final_df
|
509 |
-
#
|
510 |
-
#
|
511 |
-
# # Exporting the training dataset.
|
512 |
-
#
|
513 |
-
# # In[ ]:
|
514 |
-
#
|
515 |
-
#
|
516 |
-
# # final_df.to_csv("./data/training.csv", index = False)
|
517 |
-
#
|
518 |
-
#
|
519 |
-
# # ### Creating "Last Team Scores" dataset
|
520 |
-
# # This dataset contains the qualifications of each team on the previous FIFA date and will be used to predict the World Cup matches.
|
521 |
-
#
|
522 |
-
# # In[ ]:
|
523 |
-
#
|
524 |
-
#
|
525 |
-
# last_goalkeeper = df[['date', 'home_team', 'away_team', 'home_team_goalkeeper_score', 'away_team_goalkeeper_score']]
|
526 |
-
# home = last_goalkeeper[['date', 'home_team', 'home_team_goalkeeper_score']].rename(columns={"home_team":"team", "home_team_goalkeeper_score":"goalkeeper_score"})
|
527 |
-
# away = last_goalkeeper[['date', 'away_team', 'away_team_goalkeeper_score']].rename(columns={"away_team":"team", "away_team_goalkeeper_score":"goalkeeper_score"})
|
528 |
-
# last_goalkeeper = pd.concat([home,away])
|
529 |
-
#
|
530 |
-
# last_goalkeeper = last_goalkeeper.sort_values(['date', 'team'],ascending=[False, True])
|
531 |
-
#
|
532 |
-
# list_2022 = ['Qatar', 'Germany', 'Denmark', 'Brazil', 'France', 'Belgium', 'Croatia', 'Spain', 'Serbia', 'England', 'Switzerland', 'Netherlands', 'Argentina', 'IR Iran', 'Korea Republic', 'Japan', 'Saudi Arabia', 'Ecuador', 'Uruguay', 'Canada', 'Ghana', 'Senegal', 'Portugal', 'Poland', 'Tunisia', 'Morocco', 'Cameroon', 'USA', 'Mexico', 'Wales', 'Australia', 'Costa Rica']
|
533 |
-
#
|
534 |
-
# rank_qatar = last_rank[(last_rank["team"].apply(lambda x: x in list_2022))]
|
535 |
-
# rank_qatar = rank_qatar.groupby('team').first().reset_index()
|
536 |
-
# goal_qatar = last_goalkeeper[(last_goalkeeper["team"].apply(lambda x: x in list_2022))]
|
537 |
-
# goal_qatar = goal_qatar.groupby('team').first().reset_index()
|
538 |
-
# goal_qatar = goal_qatar.drop(['date'], axis = 1)
|
539 |
-
# off_qatar = last_offense[(last_offense["team"].apply(lambda x: x in list_2022))]
|
540 |
-
# off_qatar = off_qatar.groupby('team').first().reset_index()
|
541 |
-
# off_qatar = off_qatar.drop(['date'], axis = 1)
|
542 |
-
# mid_qatar = last_midfield[(last_midfield["team"].apply(lambda x: x in list_2022))]
|
543 |
-
# mid_qatar = mid_qatar.groupby('team').first().reset_index()
|
544 |
-
# mid_qatar = mid_qatar.drop(['date'], axis = 1)
|
545 |
-
# def_qatar = last_defense[(last_defense["team"].apply(lambda x: x in list_2022))]
|
546 |
-
# def_qatar = def_qatar.groupby('team').first().reset_index()
|
547 |
-
# def_qatar = def_qatar.drop(['date'], axis = 1)
|
548 |
-
#
|
549 |
-
# qatar = pd.merge(rank_qatar, goal_qatar, on = 'team')
|
550 |
-
# qatar = pd.merge(qatar, def_qatar, on ='team')
|
551 |
-
# qatar = pd.merge(qatar, off_qatar, on ='team')
|
552 |
-
# qatar = pd.merge(qatar, mid_qatar, on ='team')
|
553 |
-
#
|
554 |
-
# qatar['goalkeeper_score'] = round(qatar["goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
|
555 |
-
# qatar['offense_score'] = round(qatar["offense_score"].transform(lambda x: x.fillna(x.mean())))
|
556 |
-
# qatar['midfield_score'] = round(qatar["midfield_score"].transform(lambda x: x.fillna(x.mean())))
|
557 |
-
# qatar['defense_score'] = round(qatar["defense_score"].transform(lambda x: x.fillna(x.mean())))
|
558 |
-
# # qatar.head(5)
|
559 |
-
#
|
560 |
-
#
|
561 |
-
# # Exporting the "Last Team Scores" dataset.
|
562 |
-
#
|
563 |
-
# # In[ ]:
|
564 |
-
#
|
565 |
-
|
566 |
-
|
567 |
-
st.title("FIFA winner predication")
|
568 |
-
st.write('This app predict 2022 FIFA winner')
|
569 |
-
|
570 |
-
if st.button("Predict FIFA Winner"):
|
571 |
-
|
572 |
-
last_team_scores = pd.read_csv('./data/last_team_scores.csv')
|
573 |
-
last_team_scores.tail()
|
574 |
-
|
575 |
-
squad_stats = pd.read_csv('./data/squad_stats.csv')
|
576 |
-
squad_stats.tail()
|
577 |
-
|
578 |
-
group_matches = pd.read_csv('./data/Qatar_group_stage.csv')
|
579 |
-
round_16 = group_matches.iloc[48:56, :]
|
580 |
-
quarter_finals = group_matches.iloc[56:60, :]
|
581 |
-
semi_finals = group_matches.iloc[60:62, :]
|
582 |
-
final = group_matches.iloc[62:63, :]
|
583 |
-
second_final = group_matches.iloc[63:64, :]
|
584 |
-
group_matches = group_matches.iloc[:48, :]
|
585 |
-
group_matches.tail()
|
586 |
-
|
587 |
-
xgb_gs_model = joblib.load("./groups_stage_prediction.pkl")
|
588 |
-
|
589 |
-
xgb_ks_model = joblib.load("./knockout_stage_prediction.pkl")
|
590 |
-
|
591 |
-
team_group = group_matches.drop(['country2'], axis=1)
|
592 |
-
team_group = team_group.drop_duplicates().reset_index(drop=True)
|
593 |
-
team_group = team_group.rename(columns={"country1": "team"})
|
594 |
-
team_group.head(5)
|
595 |
-
|
596 |
-
def matches(g_matches):
|
597 |
-
g_matches.insert(2, 'potential1',
|
598 |
-
g_matches['country1'].map(squad_stats.set_index('nationality_name')['potential']))
|
599 |
-
g_matches.insert(3, 'potential2',
|
600 |
-
g_matches['country2'].map(squad_stats.set_index('nationality_name')['potential']))
|
601 |
-
g_matches.insert(4, 'rank1', g_matches['country1'].map(last_team_scores.set_index('team')['rank']))
|
602 |
-
g_matches.insert(5, 'rank2', g_matches['country2'].map(last_team_scores.set_index('team')['rank']))
|
603 |
-
pred_set = []
|
604 |
-
|
605 |
-
for index, row in g_matches.iterrows():
|
606 |
-
if row['potential1'] > row['potential2'] and abs(row['potential1'] - row['potential2']) > 2:
|
607 |
-
pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
|
608 |
-
elif row['potential2'] > row['potential1'] and abs(row['potential2'] - row['potential1']) > 2:
|
609 |
-
pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
|
610 |
-
else:
|
611 |
-
if row['rank1'] > row['rank2']:
|
612 |
pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
|
613 |
-
|
614 |
pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
|
615 |
-
|
616 |
-
pred_set = pd.DataFrame(pred_set)
|
617 |
-
pred_set.insert(2, 'Team1_FIFA_RANK', pred_set['Team1'].map(last_team_scores.set_index('team')['rank']))
|
618 |
-
pred_set.insert(3, 'Team2_FIFA_RANK', pred_set['Team2'].map(last_team_scores.set_index('team')['rank']))
|
619 |
-
pred_set.insert(4, 'Team1_Goalkeeper_Score',
|
620 |
-
pred_set['Team1'].map(last_team_scores.set_index('team')['goalkeeper_score']))
|
621 |
-
pred_set.insert(5, 'Team2_Goalkeeper_Score',
|
622 |
-
pred_set['Team2'].map(last_team_scores.set_index('team')['goalkeeper_score']))
|
623 |
-
pred_set.insert(6, 'Team1_Defense', pred_set['Team1'].map(last_team_scores.set_index('team')['defense_score']))
|
624 |
-
pred_set.insert(7, 'Team1_Offense', pred_set['Team1'].map(last_team_scores.set_index('team')['offense_score']))
|
625 |
-
pred_set.insert(8, 'Team1_Midfield',
|
626 |
-
pred_set['Team1'].map(last_team_scores.set_index('team')['midfield_score']))
|
627 |
-
pred_set.insert(9, 'Team2_Defense', pred_set['Team2'].map(last_team_scores.set_index('team')['defense_score']))
|
628 |
-
pred_set.insert(10, 'Team2_Offense', pred_set['Team2'].map(last_team_scores.set_index('team')['offense_score']))
|
629 |
-
pred_set.insert(11, 'Team2_Midfield',
|
630 |
-
pred_set['Team2'].map(last_team_scores.set_index('team')['midfield_score']))
|
631 |
-
return pred_set
|
632 |
-
|
633 |
-
def print_results(dataset, y_pred, matches, proba):
|
634 |
-
results = []
|
635 |
-
for i in range(dataset.shape[0]):
|
636 |
-
print()
|
637 |
-
if y_pred[i] == 2:
|
638 |
-
print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Draw")
|
639 |
-
results.append({'result': 'Draw'})
|
640 |
-
elif y_pred[i] == 1:
|
641 |
-
print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 0])
|
642 |
-
results.append({'result': dataset.iloc[i, 0]})
|
643 |
-
else:
|
644 |
-
print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 1])
|
645 |
-
results.append({'result': dataset.iloc[i, 1]})
|
646 |
-
try:
|
647 |
-
print('Probability of ' + dataset.iloc[i, 0] + ' winning: ', '%.3f' % (proba[i][1]))
|
648 |
-
print('Probability of Draw: ', '%.3f' % (proba[i][2]))
|
649 |
-
print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
|
650 |
-
except:
|
651 |
-
print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
|
652 |
-
print("")
|
653 |
-
results = pd.DataFrame(results)
|
654 |
-
matches = pd.concat([matches.group, results], axis=1)
|
655 |
-
return matches
|
656 |
-
|
657 |
-
def winner_to_match(round, prev_match):
|
658 |
-
round.insert(0, 'c1', round['country1'].map(prev_match.set_index('group')['result']))
|
659 |
-
round.insert(1, 'c2', round['country2'].map(prev_match.set_index('group')['result']))
|
660 |
-
round = round.drop(['country1', 'country2'], axis=1)
|
661 |
-
round = round.rename(columns={'c1': 'country1', 'c2': 'country2'}).reset_index(drop=True)
|
662 |
-
return round
|
663 |
-
|
664 |
-
def prediction_knockout(round):
|
665 |
-
dataset_round = matches(round)
|
666 |
-
prediction_round = xgb_ks_model.predict(dataset_round)
|
667 |
-
proba_round = xgb_ks_model.predict_proba(dataset_round)
|
668 |
-
|
669 |
-
# prediction_round = ada_ks_model.predict(dataset_round)
|
670 |
-
# proba_round = ada_ks_model.predict_proba(dataset_round)
|
671 |
-
|
672 |
-
# prediction_round = rf_ks_model.predict(dataset_round)
|
673 |
-
# proba_round = rf_ks_model.predict_proba(dataset_round)
|
674 |
-
|
675 |
-
results_round = print_results(dataset_round, prediction_round, round, proba_round)
|
676 |
-
return results_round
|
677 |
-
|
678 |
-
def center_str(round):
|
679 |
-
spaces = ['', ' ', ' ', ' ', ' ', ' ', ]
|
680 |
-
for j in range(2):
|
681 |
-
for i in range(round.shape[0]):
|
682 |
-
if (13 - len(round.iloc[i, j])) % 2 == 0:
|
683 |
-
round.iloc[i, j] = spaces[int((13 - len(round.iloc[i, j])) / 2)] + round.iloc[i, j] + spaces[
|
684 |
-
int((13 - len(round.iloc[i, j])) / 2)]
|
685 |
else:
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
'
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
)
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
|
829 |
-
|
830 |
-
|
831 |
-
|
832 |
-
|
833 |
-
|
834 |
-
|
835 |
-
|
836 |
-
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
847 |
-
|
848 |
-
|
849 |
-
|
850 |
-
|
851 |
-
|
852 |
-
|
853 |
-
|
854 |
-
|
855 |
-
|
856 |
-
|
857 |
-
|
858 |
-
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
|
868 |
-
|
869 |
-
|
870 |
-
|
871 |
-
|
872 |
-
|
873 |
-
|
874 |
-
|
875 |
-
|
876 |
-
|
877 |
-
|
878 |
-
|
879 |
-
|
880 |
-
|
881 |
-
|
882 |
-
|
883 |
-
|
884 |
-
|
885 |
-
|
886 |
-
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
893 |
-
|
894 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
895 |
|
896 |
|
897 |
if __name__ == "__main__":
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
import pandas as pd
|
3 |
+
import joblib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def main():
|
6 |
+
st.title("FIFA winner predication")
|
7 |
+
st.write('This app predict 2022 FIFA winner')
|
8 |
+
|
9 |
+
if st.button("Predict FIFA Winner"):
|
10 |
+
|
11 |
+
last_team_scores = pd.read_csv('./data/last_team_scores.csv')
|
12 |
+
last_team_scores.tail()
|
13 |
+
|
14 |
+
squad_stats = pd.read_csv('./data/squad_stats.csv')
|
15 |
+
squad_stats.tail()
|
16 |
+
|
17 |
+
group_matches = pd.read_csv('./data/Qatar_group_stage.csv')
|
18 |
+
round_16 = group_matches.iloc[48:56, :]
|
19 |
+
quarter_finals = group_matches.iloc[56:60, :]
|
20 |
+
semi_finals = group_matches.iloc[60:62, :]
|
21 |
+
final = group_matches.iloc[62:63, :]
|
22 |
+
second_final = group_matches.iloc[63:64, :]
|
23 |
+
group_matches = group_matches.iloc[:48, :]
|
24 |
+
group_matches.tail()
|
25 |
+
|
26 |
+
xgb_gs_model = joblib.load("./groups_stage_prediction.pkl")
|
27 |
+
|
28 |
+
xgb_ks_model = joblib.load("./knockout_stage_prediction.pkl")
|
29 |
+
|
30 |
+
team_group = group_matches.drop(['country2'], axis=1)
|
31 |
+
team_group = team_group.drop_duplicates().reset_index(drop=True)
|
32 |
+
team_group = team_group.rename(columns={"country1": "team"})
|
33 |
+
team_group.head(5)
|
34 |
+
|
35 |
+
def matches(g_matches):
|
36 |
+
g_matches.insert(2, 'potential1',
|
37 |
+
g_matches['country1'].map(squad_stats.set_index('nationality_name')['potential']))
|
38 |
+
g_matches.insert(3, 'potential2',
|
39 |
+
g_matches['country2'].map(squad_stats.set_index('nationality_name')['potential']))
|
40 |
+
g_matches.insert(4, 'rank1', g_matches['country1'].map(last_team_scores.set_index('team')['rank']))
|
41 |
+
g_matches.insert(5, 'rank2', g_matches['country2'].map(last_team_scores.set_index('team')['rank']))
|
42 |
+
pred_set = []
|
43 |
+
|
44 |
+
for index, row in g_matches.iterrows():
|
45 |
+
if row['potential1'] > row['potential2'] and abs(row['potential1'] - row['potential2']) > 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
|
47 |
+
elif row['potential2'] > row['potential1'] and abs(row['potential2'] - row['potential1']) > 2:
|
48 |
pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
else:
|
50 |
+
if row['rank1'] > row['rank2']:
|
51 |
+
pred_set.append({'Team1': row['country1'], 'Team2': row['country2']})
|
52 |
+
else:
|
53 |
+
pred_set.append({'Team1': row['country2'], 'Team2': row['country1']})
|
54 |
+
|
55 |
+
pred_set = pd.DataFrame(pred_set)
|
56 |
+
pred_set.insert(2, 'Team1_FIFA_RANK', pred_set['Team1'].map(last_team_scores.set_index('team')['rank']))
|
57 |
+
pred_set.insert(3, 'Team2_FIFA_RANK', pred_set['Team2'].map(last_team_scores.set_index('team')['rank']))
|
58 |
+
pred_set.insert(4, 'Team1_Goalkeeper_Score',
|
59 |
+
pred_set['Team1'].map(last_team_scores.set_index('team')['goalkeeper_score']))
|
60 |
+
pred_set.insert(5, 'Team2_Goalkeeper_Score',
|
61 |
+
pred_set['Team2'].map(last_team_scores.set_index('team')['goalkeeper_score']))
|
62 |
+
pred_set.insert(6, 'Team1_Defense', pred_set['Team1'].map(last_team_scores.set_index('team')['defense_score']))
|
63 |
+
pred_set.insert(7, 'Team1_Offense', pred_set['Team1'].map(last_team_scores.set_index('team')['offense_score']))
|
64 |
+
pred_set.insert(8, 'Team1_Midfield',
|
65 |
+
pred_set['Team1'].map(last_team_scores.set_index('team')['midfield_score']))
|
66 |
+
pred_set.insert(9, 'Team2_Defense', pred_set['Team2'].map(last_team_scores.set_index('team')['defense_score']))
|
67 |
+
pred_set.insert(10, 'Team2_Offense', pred_set['Team2'].map(last_team_scores.set_index('team')['offense_score']))
|
68 |
+
pred_set.insert(11, 'Team2_Midfield',
|
69 |
+
pred_set['Team2'].map(last_team_scores.set_index('team')['midfield_score']))
|
70 |
+
return pred_set
|
71 |
+
|
72 |
+
def print_results(dataset, y_pred, matches, proba):
|
73 |
+
results = []
|
74 |
+
for i in range(dataset.shape[0]):
|
75 |
+
print()
|
76 |
+
if y_pred[i] == 2:
|
77 |
+
print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Draw")
|
78 |
+
results.append({'result': 'Draw'})
|
79 |
+
elif y_pred[i] == 1:
|
80 |
+
print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 0])
|
81 |
+
results.append({'result': dataset.iloc[i, 0]})
|
82 |
+
else:
|
83 |
+
print(matches.iloc[i, 0] + " vs. " + matches.iloc[i, 1] + " => Winner: " + dataset.iloc[i, 1])
|
84 |
+
results.append({'result': dataset.iloc[i, 1]})
|
85 |
+
try:
|
86 |
+
print('Probability of ' + dataset.iloc[i, 0] + ' winning: ', '%.3f' % (proba[i][1]))
|
87 |
+
print('Probability of Draw: ', '%.3f' % (proba[i][2]))
|
88 |
+
print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
|
89 |
+
except:
|
90 |
+
print('Probability of ' + dataset.iloc[i, 1] + ' winning: ', '%.3f' % (proba[i][0]))
|
91 |
+
print("")
|
92 |
+
results = pd.DataFrame(results)
|
93 |
+
matches = pd.concat([matches.group, results], axis=1)
|
94 |
+
return matches
|
95 |
+
|
96 |
+
def winner_to_match(round, prev_match):
|
97 |
+
round.insert(0, 'c1', round['country1'].map(prev_match.set_index('group')['result']))
|
98 |
+
round.insert(1, 'c2', round['country2'].map(prev_match.set_index('group')['result']))
|
99 |
+
round = round.drop(['country1', 'country2'], axis=1)
|
100 |
+
round = round.rename(columns={'c1': 'country1', 'c2': 'country2'}).reset_index(drop=True)
|
101 |
+
return round
|
102 |
+
|
103 |
+
def prediction_knockout(round):
|
104 |
+
dataset_round = matches(round)
|
105 |
+
prediction_round = xgb_ks_model.predict(dataset_round)
|
106 |
+
proba_round = xgb_ks_model.predict_proba(dataset_round)
|
107 |
+
|
108 |
+
# prediction_round = ada_ks_model.predict(dataset_round)
|
109 |
+
# proba_round = ada_ks_model.predict_proba(dataset_round)
|
110 |
+
|
111 |
+
# prediction_round = rf_ks_model.predict(dataset_round)
|
112 |
+
# proba_round = rf_ks_model.predict_proba(dataset_round)
|
113 |
+
|
114 |
+
results_round = print_results(dataset_round, prediction_round, round, proba_round)
|
115 |
+
return results_round
|
116 |
+
|
117 |
+
def center_str(round):
|
118 |
+
spaces = ['', ' ', ' ', ' ', ' ', ' ', ]
|
119 |
+
for j in range(2):
|
120 |
+
for i in range(round.shape[0]):
|
121 |
+
if (13 - len(round.iloc[i, j])) % 2 == 0:
|
122 |
+
round.iloc[i, j] = spaces[int((13 - len(round.iloc[i, j])) / 2)] + round.iloc[i, j] + spaces[
|
123 |
+
int((13 - len(round.iloc[i, j])) / 2)]
|
124 |
+
else:
|
125 |
+
round.iloc[i, j] = spaces[int(((13 - len(round.iloc[i, j])) / 2) - 0.5)] + round.iloc[i, j] + \
|
126 |
+
spaces[int(((13 - len(round.iloc[i, j])) / 2) + 0.5)]
|
127 |
+
return round
|
128 |
+
|
129 |
+
def center2(a):
|
130 |
+
spaces = ['', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
|
131 |
+
' ', ' ', ' ', ' ', ' ',
|
132 |
+
' ', ' ', ' ', ' ',
|
133 |
+
' ']
|
134 |
+
if (29 - len(a)) % 2 == 0:
|
135 |
+
a = spaces[int((29 - len(a)) / 2)] + a + spaces[int((29 - len(a)) / 2)]
|
136 |
+
else:
|
137 |
+
a = spaces[int(((29 - len(a)) / 2) - 0.5)] + a + spaces[int(((29 - len(a)) / 2) + 0.5)]
|
138 |
+
return a
|
139 |
+
|
140 |
+
dataset_groups = matches(group_matches)
|
141 |
+
dataset_groups.tail()
|
142 |
+
print(dataset_groups)
|
143 |
+
|
144 |
+
prediction_groups = xgb_gs_model.predict(dataset_groups)
|
145 |
+
proba = xgb_gs_model.predict_proba(dataset_groups)
|
146 |
+
|
147 |
+
# prediction_groups = ada_gs_model.predict(dataset_groups)
|
148 |
+
# proba = ada_gs_model.predict_proba(dataset_groups)
|
149 |
+
|
150 |
+
# prediction_groups = rf_gs_model.predict(dataset_groups)
|
151 |
+
# proba = rf_gs_model.predict_proba(dataset_groups)
|
152 |
+
|
153 |
+
results = print_results(dataset_groups, prediction_groups, group_matches, proba)
|
154 |
+
|
155 |
+
team_group['points'] = 0
|
156 |
+
team_group
|
157 |
+
for i in range(results.shape[0]):
|
158 |
+
for j in range(team_group.shape[0]):
|
159 |
+
if results.iloc[i, 1] == team_group.iloc[j, 0]:
|
160 |
+
team_group.iloc[j, 2] += 3
|
161 |
+
|
162 |
+
print(team_group.groupby(['group', 'team']).mean().astype(int))
|
163 |
+
|
164 |
+
round_of_16 = team_group[team_group['points'] > 5].reset_index(drop=True)
|
165 |
+
round_of_16['group'] = (4 - 1 / 3 * round_of_16.points).astype(int).astype(str) + round_of_16.group
|
166 |
+
round_of_16 = round_of_16.rename(columns={"team": "result"})
|
167 |
+
|
168 |
+
round_16 = winner_to_match(round_16, round_of_16)
|
169 |
+
results_round_16 = prediction_knockout(round_16)
|
170 |
+
|
171 |
+
quarter_finals = winner_to_match(quarter_finals, results_round_16)
|
172 |
+
results_quarter_finals = prediction_knockout(quarter_finals)
|
173 |
+
|
174 |
+
semi_finals = winner_to_match(semi_finals, results_quarter_finals)
|
175 |
+
results_finals = prediction_knockout(semi_finals)
|
176 |
+
|
177 |
+
final = winner_to_match(final, results_finals)
|
178 |
+
winner = prediction_knockout(final)
|
179 |
+
|
180 |
+
second = results_finals[~results_finals.result.isin(winner.result)]
|
181 |
+
results_finals_3 = results_quarter_finals[~results_quarter_finals.result.isin(results_finals.result)]
|
182 |
+
results_finals_3.iloc[0, 0] = 'z1'
|
183 |
+
results_finals_3.iloc[1, 0] = 'z2'
|
184 |
+
second_final = winner_to_match(second_final, results_finals_3)
|
185 |
+
third = prediction_knockout(second_final)
|
186 |
+
|
187 |
+
round_16 = center_str(round_16)
|
188 |
+
quarter_finals = center_str(quarter_finals)
|
189 |
+
semi_finals = center_str(semi_finals)
|
190 |
+
final = center_str(final)
|
191 |
+
group_matches = center_str(group_matches)
|
192 |
+
|
193 |
+
# Function to center align text
|
194 |
+
def center(text):
|
195 |
+
return f"<div style='text-align: center;'>{text}</div>"
|
196 |
+
|
197 |
+
# Function to generate the formatted text
|
198 |
+
def generate_text(round_16, quarter_finals, semi_finals, final):
|
199 |
+
formatted_text = (
|
200 |
+
round_16.iloc[
|
201 |
+
0, 0] + 'βββββ βββββ' +
|
202 |
+
round_16.iloc[4, 0] + '\n' +
|
203 |
+
' β β\n' +
|
204 |
+
' βββββ' + quarter_finals.iloc[
|
205 |
+
0, 0] + 'βββββ βββββ' +
|
206 |
+
quarter_finals.iloc[2, 0] + 'βββββ\n' +
|
207 |
+
' β β β β\n' +
|
208 |
+
round_16.iloc[
|
209 |
+
0, 1] + 'βββββ β β βββββ' +
|
210 |
+
round_16.iloc[4, 1] + '\n' +
|
211 |
+
' βββββ' + semi_finals.iloc[
|
212 |
+
0, 0] + 'βββββ βββββ' + semi_finals.iloc[1, 0] + 'βββββ\n' +
|
213 |
+
round_16.iloc[
|
214 |
+
1, 0] + 'βββββ β β β β βββββ' +
|
215 |
+
round_16.iloc[5, 0] + '\n' +
|
216 |
+
' β β β β β β\n' +
|
217 |
+
' βββββ' + quarter_finals.iloc[
|
218 |
+
0, 1] + 'βββββ β β βββββ' +
|
219 |
+
quarter_finals.iloc[2, 1] + 'βββββ\n' +
|
220 |
+
' β β β β\n' +
|
221 |
+
round_16.iloc[
|
222 |
+
1, 1] + 'βββββ β β βββββ' +
|
223 |
+
round_16.iloc[5, 1] + '\n' +
|
224 |
+
' βββββ' + final.iloc[0, 0] + 'vs.' +
|
225 |
+
final.iloc[0, 1] + 'βββββ\n' +
|
226 |
+
round_16.iloc[
|
227 |
+
2, 0] + 'βββββ β β βββββ' +
|
228 |
+
round_16.iloc[6, 0] + '\n' +
|
229 |
+
' β β β β\n' +
|
230 |
+
' βββββ' + quarter_finals.iloc[
|
231 |
+
1, 0] + 'βββββ β β βββββ' +
|
232 |
+
quarter_finals.iloc[3, 0] + 'βββββ\n' +
|
233 |
+
' β β β β β β\n' +
|
234 |
+
round_16.iloc[
|
235 |
+
2, 1] + 'βββββ β β β β βββββ' +
|
236 |
+
round_16.iloc[6, 1] + '\n' +
|
237 |
+
' βββββ' + semi_finals.iloc[
|
238 |
+
0, 1] + 'βββββ βββββ' + semi_finals.iloc[1, 1] + 'βββββ\n' +
|
239 |
+
round_16.iloc[
|
240 |
+
3, 0] + 'βββββ β β βββββ' +
|
241 |
+
round_16.iloc[7, 0] + '\n' +
|
242 |
+
' β β β β\n' +
|
243 |
+
' βββββ' + quarter_finals.iloc[
|
244 |
+
1, 1] + 'βββββ βββββ' +
|
245 |
+
quarter_finals.iloc[3, 1] + 'βββββ\n' +
|
246 |
+
' β β\n' +
|
247 |
+
round_16.iloc[
|
248 |
+
3, 1] + 'βββββ βββββ' +
|
249 |
+
round_16.iloc[7, 1] + '\n' +
|
250 |
+
" " + center(
|
251 |
+
"\U0001F947" + winner.iloc[0, 1]) + '\n' +
|
252 |
+
" " + center(
|
253 |
+
"\U0001F948" + second.iloc[0, 1]) + '\n' +
|
254 |
+
" " + center(
|
255 |
+
"\U0001F949" + third.iloc[0, 1])
|
256 |
+
)
|
257 |
+
return formatted_text
|
258 |
+
|
259 |
+
# Generate the formatted text
|
260 |
+
formatted_text = generate_text(round_16, quarter_finals, semi_finals, final)
|
261 |
+
|
262 |
+
# Define the round_16, quarter_finals, semi_finals, final DataFrames
|
263 |
+
# Replace the DataFrame creation with your actual data
|
264 |
+
|
265 |
+
# Display the formatted text
|
266 |
+
st.text(formatted_text)
|
267 |
+
# st.markdown(formatted_text)
|
268 |
+
|
269 |
+
print(round_16.iloc[
|
270 |
+
0, 0] + 'βββββ βββββ' +
|
271 |
+
round_16.iloc[4, 0])
|
272 |
+
print(
|
273 |
+
' β β')
|
274 |
+
print(' βββββ' + quarter_finals.iloc[
|
275 |
+
0, 0] + 'βββββ βββββ' +
|
276 |
+
quarter_finals.iloc[2, 0] + 'βββββ')
|
277 |
+
print(
|
278 |
+
' β β β β')
|
279 |
+
print(round_16.iloc[
|
280 |
+
0, 1] + 'βββββ β β βββββ' +
|
281 |
+
round_16.iloc[4, 1])
|
282 |
+
print(' βββββ' + semi_finals.iloc[
|
283 |
+
0, 0] + 'βββββ βββββ' + semi_finals.iloc[1, 0] + 'βββββ')
|
284 |
+
print(round_16.iloc[
|
285 |
+
1, 0] + 'βββββ β β β β βββββ' +
|
286 |
+
round_16.iloc[5, 0])
|
287 |
+
print(
|
288 |
+
' β β β β β β')
|
289 |
+
print(' βββββ' + quarter_finals.iloc[
|
290 |
+
0, 1] + 'βββββ β β βββββ' +
|
291 |
+
quarter_finals.iloc[2, 1] + 'βββββ')
|
292 |
+
print(
|
293 |
+
' β β β β')
|
294 |
+
print(round_16.iloc[
|
295 |
+
1, 1] + 'βββββ β β βββββ' +
|
296 |
+
round_16.iloc[5, 1])
|
297 |
+
print(' βββββ' + final.iloc[0, 0] + 'vs.' + final.iloc[
|
298 |
+
0, 1] + 'βββββ')
|
299 |
+
print(round_16.iloc[
|
300 |
+
2, 0] + 'βββββ β β βββββ' +
|
301 |
+
round_16.iloc[6, 0])
|
302 |
+
print(
|
303 |
+
' β β β β')
|
304 |
+
print(' βββββ' + quarter_finals.iloc[
|
305 |
+
1, 0] + 'βββββ β β βββββ' +
|
306 |
+
quarter_finals.iloc[3, 0] + 'βββββ')
|
307 |
+
print(
|
308 |
+
' β β β β β β')
|
309 |
+
print(round_16.iloc[
|
310 |
+
2, 1] + 'βββββ β β β β βββββ' +
|
311 |
+
round_16.iloc[6, 1])
|
312 |
+
print(' βββββ' + semi_finals.iloc[
|
313 |
+
0, 1] + 'βββββ βββββ' + semi_finals.iloc[1, 1] + 'βββββ')
|
314 |
+
print(round_16.iloc[
|
315 |
+
3, 0] + 'βββββ β β βββββ' +
|
316 |
+
round_16.iloc[7, 0])
|
317 |
+
print(
|
318 |
+
' β β β β')
|
319 |
+
print(' βββββ' + quarter_finals.iloc[
|
320 |
+
1, 1] + 'βββββ βββββ' +
|
321 |
+
quarter_finals.iloc[3, 1] + 'βββββ')
|
322 |
+
print(
|
323 |
+
' β β')
|
324 |
+
print(round_16.iloc[
|
325 |
+
3, 1] + 'βββββ βββββ' +
|
326 |
+
round_16.iloc[7, 1])
|
327 |
+
print(
|
328 |
+
" " + center2("\U0001F947" + winner.iloc[0, 1]))
|
329 |
+
print(
|
330 |
+
" " + center2("\U0001F948" + second.iloc[0, 1]))
|
331 |
+
print(
|
332 |
+
" " + center2("\U0001F949" + third.iloc[0, 1]))
|
333 |
|
334 |
|
335 |
if __name__ == "__main__":
|