cassiomo commited on
Commit
39c83c9
1 Parent(s): 54e963e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -239
app.py CHANGED
@@ -15,372 +15,372 @@ warnings.filterwarnings('ignore')
15
  # In[91]:
16
 
17
  def main():
18
- st.title("FIFA Data visualization")
19
 
20
 
21
- df = pd.read_csv('./data/international_matches.csv', parse_dates=['date'])
22
- # df.tail()
23
- #
24
 
25
- # In[92]:
26
 
27
 
28
- # df.columns
29
 
30
 
31
- # In[93]:
32
 
33
 
34
- # df.isnull().sum()
35
 
36
 
37
- # # PRE-ANALYSIS
38
- # The dataset has a lot of blank fields that need to be fixed.
39
- # However, before modifying any field, I want to analyze the teams' qualifications on the last FIFA date (June 2022). This is important because, from these qualifications, I will create the inference dataset that enters the machine learning algorithm that predicts the World Cup matches.
40
 
41
- # ### Top 10 FIFA Ranking
42
- # Top 10 national teams to date FIFA June 2022.
43
- # **ref:** https://www.fifa.com/fifa-world-ranking/men?dateId=id13603
44
 
45
 
46
- # In[94]:
47
 
48
 
49
- fifa_rank = df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
50
- home = fifa_rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
51
- columns={"home_team": "team", "home_team_fifa_rank": "rank"})
52
- away = fifa_rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
53
- columns={"away_team": "team", "away_team_fifa_rank": "rank"})
54
- fifa_rank = pd.concat([home, away])
55
- # Select each country latest match
56
- fifa_rank = fifa_rank.sort_values(['team', 'date'], ascending=[True, False])
57
- last_rank = fifa_rank
58
- fifa_rank_top10 = fifa_rank.groupby('team').first().sort_values('rank', ascending=True)[0:10].reset_index()
59
 
60
 
61
- # fifa_rank_top10
62
 
63
 
64
- # ### Top 10 teams with the highest winning percentage at home and away
65
 
66
- # In[95]:
67
 
68
 
69
- def home_percentage(team):
70
- score = len(df[(df['home_team'] == team) & (df['home_team_result'] == "Win")]) / len(
71
- df[df['home_team'] == team]) * 100
72
- return round(score)
73
 
74
 
75
- def away_percentage(team):
76
- score = len(df[(df['away_team'] == team) & (df['home_team_result'] == "Lose")]) / len(
77
- df[df['away_team'] == team]) * 100
78
- return round(score)
79
 
80
 
81
- # In[96]:
82
 
83
 
84
- fifa_rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(fifa_rank_top10['team'])
85
- fifa_rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(fifa_rank_top10['team'])
86
- fifa_rank_top10['Average_win_Per'] = round((fifa_rank_top10['Home_win_Per'] + fifa_rank_top10['Away_win_Per']) / 2)
87
- fifa_rank_win = fifa_rank_top10.sort_values('Average_win_Per', ascending=False)
88
- # fifa_rank_win
89
 
90
 
91
- # ### Top 10 attacking teams in the last FIFA date
92
 
93
- # In[97]:
94
 
95
 
96
- fifa_offense = df[['date', 'home_team', 'away_team', 'home_team_mean_offense_score', 'away_team_mean_offense_score']]
97
- home = fifa_offense[['date', 'home_team', 'home_team_mean_offense_score']].rename(
98
- columns={"home_team": "team", "home_team_mean_offense_score": "offense_score"})
99
- away = fifa_offense[['date', 'away_team', 'away_team_mean_offense_score']].rename(
100
- columns={"away_team": "team", "away_team_mean_offense_score": "offense_score"})
101
- fifa_offense = pd.concat([home, away])
102
- fifa_offense = fifa_offense.sort_values(['date', 'team'], ascending=[False, True])
103
- last_offense = fifa_offense
104
- fifa_offense_top10 = fifa_offense.groupby('team').first().sort_values('offense_score', ascending=False)[
105
- 0:10].reset_index()
106
- # fifa_offense_top10
107
 
108
- import plotly.graph_objs as go
109
- import plotly.figure_factory as ff
110
 
111
- # In[99]:
112
 
113
- # Display the data for the bar chart
114
- st.write("Top 10 Attacking Teams")
115
- st.write(fifa_offense_top10)
116
 
117
- # Create a horizontal bar chart
118
- fig_bar = go.Figure(data=[go.Bar(y=fifa_offense_top10['team'], x=fifa_offense_top10['offense_score'], orientation='h')])
119
- # Update layout to include title, x-label, and y-label
120
- fig_bar.update_layout(title='Top 10 Attacking Teams',
121
- xaxis_title='Offense Score',
122
- yaxis_title='Team')
123
- st.plotly_chart(fig_bar)
124
 
125
- # Display the data for the bar chart
126
- # st.write("Top 10 Offense Teams")
127
- # st.write(fifa_offense_top10)
128
 
129
- # sns.barplot(data=fifa_offense_top10, x='offense_score', y='team', color="#7F1431")
130
- # plt.xlabel('Offense Score', size = 20)
131
- # plt.ylabel('Team', size = 20)
132
- # plt.title("Top 10 Attacking teams");
133
 
134
 
135
- # ### Top 10 Midfield teams in the last FIFA date
136
 
137
- # In[100]:
138
 
139
 
140
- fifa_midfield = df[['date', 'home_team', 'away_team', 'home_team_mean_midfield_score', 'away_team_mean_midfield_score']]
141
- home = fifa_midfield[['date', 'home_team', 'home_team_mean_midfield_score']].rename(
142
- columns={"home_team": "team", "home_team_mean_midfield_score": "midfield_score"})
143
- away = fifa_midfield[['date', 'away_team', 'away_team_mean_midfield_score']].rename(
144
- columns={"away_team": "team", "away_team_mean_midfield_score": "midfield_score"})
145
- fifa_midfield = pd.concat([home, away])
146
- fifa_midfield = fifa_midfield.sort_values(['date', 'team'], ascending=[False, True])
147
- last_midfield = fifa_midfield
148
- fifa_midfield_top10 = fifa_midfield.groupby('team').first().sort_values('midfield_score', ascending=False)[
149
- 0:10].reset_index()
150
- # fifa_midfield_top10
151
 
152
 
153
- # In[101]:
154
 
155
- # Display the data for the bar chart
156
- st.write("Top 10 Midfield Teams")
157
- st.write(fifa_midfield_top10)
158
 
159
- # Create a horizontal bar chart
160
- fig_bar = go.Figure(
161
- data=[go.Bar(y=fifa_midfield_top10['team'], x=fifa_midfield_top10['midfield_score'], orientation='h')])
162
- # Update layout to include title, x-label, and y-label
163
- fig_bar.update_layout(title='Top 10 Midfield Teams', # Set the title
164
- xaxis_title='Midfield Score', # Set the x-axis label
165
- yaxis_title='Team') # Set the y-axis label
166
 
167
- # Display the bar chart
168
- st.plotly_chart(fig_bar)
169
 
170
- # sns.barplot(data=fifa_midfield_top10, x='midfield_score', y='team', color="#7F1431")
171
- # plt.xlabel('Midfield Score', size = 20)
172
- # plt.ylabel('Team', size = 20)
173
- # plt.title("Top 10 Midfield teams");
174
 
175
 
176
- # ### Top 10 defending teams in the last FIFA date
177
 
178
- # In[102]:
179
 
180
 
181
- fifa_defense = df[['date', 'home_team', 'away_team', 'home_team_mean_defense_score', 'away_team_mean_defense_score']]
182
- home = fifa_defense[['date', 'home_team', 'home_team_mean_defense_score']].rename(
183
- columns={"home_team": "team", "home_team_mean_defense_score": "defense_score"})
184
- away = fifa_defense[['date', 'away_team', 'away_team_mean_defense_score']].rename(
185
- columns={"away_team": "team", "away_team_mean_defense_score": "defense_score"})
186
- fifa_defense = pd.concat([home, away])
187
- fifa_defense = fifa_defense.sort_values(['date', 'team'], ascending=[False, True])
188
- last_defense = fifa_defense
189
- fifa_defense_top10 = fifa_defense.groupby('team').first().sort_values('defense_score', ascending=False)[
190
- 0:10].reset_index()
191
- # fifa_defense_top10
192
 
193
 
194
- # In[103]:
195
 
196
- # Display the data for the bar chart
197
- st.write("Top 10 Defensive Teams")
198
- st.write(fifa_defense_top10)
199
 
200
- # Create the horizontal bar chart
201
- fig_bar = go.Figure(data=[go.Bar(y=fifa_defense_top10['team'], x=fifa_defense_top10['defense_score'], orientation='h')])
202
 
203
- # Update layout to include title, x-label, and y-label
204
- fig_bar.update_layout(title='Top 10 Defensive Teams', # Set the title
205
- xaxis_title='Defense Score', # Set the x-axis label
206
- yaxis_title='Team') # Set the y-axis label
207
 
208
- # Display the bar chart
209
- st.plotly_chart(fig_bar)
210
 
211
- # sns.barplot(data=fifa_defense_top10, x='defense_score', y='team', color="#7F1431")
212
- # plt.xlabel('Defense Score', size=20)
213
- # plt.ylabel('Team', size=20)
214
- # plt.title("Top 10 Defense Teams")
215
 
216
- # ### Do Home teams have any advantage?
217
 
218
- # In[104]:
219
 
220
 
221
- # Select all matches played at non-neutral locations
222
- home_team_advantage = df[df['neutral_location'] == False]['home_team_result'].value_counts(normalize=True)
223
 
224
- # # Plot
225
- # fig, axes = plt.subplots(1, 1, figsize=(8,8))
226
- # ax =plt.pie(home_team_advantage ,labels = ['Win', 'Lose', 'Draw'], autopct='%.0f%%')
227
- # plt.title('Home team match result', fontsize = 15)
228
- # plt.show()
229
 
230
 
231
- # As the graph shows, the home team has an advantage over the away team. This is due to factors such as the fans, the weather and the confidence of the players. For this reason, in the World Cup, those teams that sit at home will have an advantage.
232
 
233
- # # DATA PREPARATION AND FEATURE ENGINEERING
234
- # In this section, I will fill in the empty fields in the dataset and clean up the data for teams that did not qualify for the World Cup. Then, I will use the correlation matrix to choose the characteristics that will define the training dataset of the Machine Learning model. Finally, I will use the ratings of the teams in their last match to define the "Last Team Scores" dataset (i.e., the dataset that I will use to predict the World Cup matches).
235
 
236
- # ### Analyze and fill na's
237
 
238
- # In[105]:
239
 
240
- #
241
- # df.isnull().sum()
242
 
243
 
244
- # In[106]:
245
 
246
 
247
- # We can fill mean for na's in goal_keeper_score
248
- df[df['home_team'] == "Brazil"]['home_team_goalkeeper_score'].describe()
249
 
250
- # In[107]:
251
 
252
 
253
- df['home_team_goalkeeper_score'] = round(
254
- df.groupby("home_team")["home_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
255
- df['away_team_goalkeeper_score'] = round(
256
- df.groupby("away_team")["away_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
257
 
258
- # In[108]:
259
 
260
 
261
- # We can fill mean for na's in defense score
262
- df[df['away_team'] == "Uruguay"]['home_team_mean_defense_score'].describe()
263
 
264
- # In[65]:
265
 
266
 
267
- df['home_team_mean_defense_score'] = round(
268
- df.groupby('home_team')['home_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
269
- df['away_team_mean_defense_score'] = round(
270
- df.groupby('away_team')['away_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
271
 
272
- # In[109]:
273
 
274
 
275
- # We can fill mean for na's in offense score
276
- df[df['away_team'] == "Uruguay"]['home_team_mean_offense_score'].describe()
277
 
278
- # In[67]:
279
 
280
 
281
- df['home_team_mean_offense_score'] = round(
282
- df.groupby('home_team')['home_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
283
- df['away_team_mean_offense_score'] = round(
284
- df.groupby('away_team')['away_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
285
 
286
- # In[110]:
287
 
288
 
289
- # We can fill mean for na's in midfield score
290
- df[df['away_team'] == "Uruguay"]['home_team_mean_midfield_score'].describe()
291
 
292
- # In[111]:
293
 
294
 
295
- df['home_team_mean_midfield_score'] = round(
296
- df.groupby('home_team')['home_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
297
- df['away_team_mean_midfield_score'] = round(
298
- df.groupby('away_team')['away_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
299
 
300
- # In[112]:
301
 
302
 
303
- df.isnull().sum()
304
 
305
- # In[113]:
306
 
307
 
308
- # Teams are not available in FIFA game itself, so they are not less than average performing teams, so giving a average score of 50 for all.
309
- df.fillna(50, inplace=True)
310
 
311
- # ### Filter the teams participating in QATAR - World cup 2022
312
 
313
- # In[115]:
314
 
315
 
316
- list_2022 = ['Qatar', 'Germany', 'Denmark', 'Brazil', 'France', 'Belgium', 'Croatia', 'Spain', 'Serbia', 'England',
317
- 'Switzerland', 'Netherlands', 'Argentina', 'IR Iran', 'Korea Republic', 'Japan', 'Saudi Arabia', 'Ecuador',
318
- 'Uruguay', 'Canada', 'Ghana', 'Senegal', 'Portugal', 'Poland', 'Tunisia', 'Morocco', 'Cameroon', 'USA',
319
- 'Mexico', 'Wales', 'Australia', 'Costa Rica']
320
- final_df = df[(df["home_team"].apply(lambda x: x in list_2022)) | (df["away_team"].apply(lambda x: x in list_2022))]
321
 
322
- # **Top 10 teams in QATAR 2022**
323
 
324
- # In[116]:
325
 
326
 
327
- rank = final_df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
328
- home = rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
329
- columns={"home_team": "team", "home_team_fifa_rank": "rank"})
330
- away = rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
331
- columns={"away_team": "team", "away_team_fifa_rank": "rank"})
332
- rank = pd.concat([home, away])
333
 
334
- # Select each country latest match
335
- rank = rank.sort_values(['team', 'date'], ascending=[True, False])
336
- rank_top10 = rank.groupby('team').first().sort_values('rank', ascending=True).reset_index()
337
- rank_top10 = rank_top10[(rank_top10["team"].apply(lambda x: x in list_2022))][0:10]
338
 
339
- st.write("Top 10 Countries by Rank - Latest Match")
340
- rank_top10
341
 
342
- # # Create a scatter plot
343
- # fig_scatter = go.Figure(data=go.Scatter(x=rank_top10['team'], y=rank_top10['rank'], mode='markers', marker=dict(color='lightskyblue', size=12)))
344
- #
345
- # # Update layout to include title and labels
346
- # fig_scatter.update_layout(title='Top 10 Countries by Rank - Latest Match',
347
- # xaxis_title='Country',
348
- # yaxis_title='Rank')
349
- #
350
- # # Display the scatter plot
351
- # st.plotly_chart(fig_scatter)
352
 
353
- # **Top 10 teams with the highest winning percentage in QATAR 2022**
354
 
355
- # In[117]:
356
 
357
 
358
- rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(rank_top10['team'])
359
- rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(rank_top10['team'])
360
- rank_top10['Average_win_Per'] = round((rank_top10['Home_win_Per'] + rank_top10['Away_win_Per']) / 2)
361
- rank_top10_Win = rank_top10.sort_values('Average_win_Per', ascending=False)
362
 
363
- # st.write("Top 10 Countries by Rank - Latest Match")
364
- # rank_top10_Win
365
 
366
 
367
- # In[118]:
368
 
369
- # Display the data for the bar chart
370
- st.write("Top 10 Average Win Per game Teams")
371
- st.write(rank_top10_Win)
372
 
373
- # Create a horizontal bar chart
374
- # Create a horizontal bar chart
375
- fig_bar = go.Figure(data=[go.Bar(y=rank_top10_Win['team'], x=rank_top10_Win['Average_win_Per'], orientation='h')])
376
 
377
- # Update layout to include title and labels
378
- fig_bar.update_layout(title='Top 10 Countries by Average Win Percentage',
379
- xaxis_title='Average Win Percentage',
380
- yaxis_title='Country')
381
 
382
- # Display the horizontal bar chart
383
- st.plotly_chart(fig_bar)
384
 
385
  # sns.barplot(data=rank_top10_Win,x='Average_win_Per',y='team',color="#7F1431")
386
  # plt.xticks()
 
15
  # In[91]:
16
 
17
  def main():
18
+ # st.title("FIFA Data visualization")
19
 
20
 
21
+ # df = pd.read_csv('./data/international_matches.csv', parse_dates=['date'])
22
+ # # df.tail()
23
+ # #
24
 
25
+ # # In[92]:
26
 
27
 
28
+ # # df.columns
29
 
30
 
31
+ # # In[93]:
32
 
33
 
34
+ # # df.isnull().sum()
35
 
36
 
37
+ # # # PRE-ANALYSIS
38
+ # # The dataset has a lot of blank fields that need to be fixed.
39
+ # # However, before modifying any field, I want to analyze the teams' qualifications on the last FIFA date (June 2022). This is important because, from these qualifications, I will create the inference dataset that enters the machine learning algorithm that predicts the World Cup matches.
40
 
41
+ # # ### Top 10 FIFA Ranking
42
+ # # Top 10 national teams to date FIFA June 2022.
43
+ # # **ref:** https://www.fifa.com/fifa-world-ranking/men?dateId=id13603
44
 
45
 
46
+ # # In[94]:
47
 
48
 
49
+ # fifa_rank = df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
50
+ # home = fifa_rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
51
+ # columns={"home_team": "team", "home_team_fifa_rank": "rank"})
52
+ # away = fifa_rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
53
+ # columns={"away_team": "team", "away_team_fifa_rank": "rank"})
54
+ # fifa_rank = pd.concat([home, away])
55
+ # # Select each country latest match
56
+ # fifa_rank = fifa_rank.sort_values(['team', 'date'], ascending=[True, False])
57
+ # last_rank = fifa_rank
58
+ # fifa_rank_top10 = fifa_rank.groupby('team').first().sort_values('rank', ascending=True)[0:10].reset_index()
59
 
60
 
61
+ # # fifa_rank_top10
62
 
63
 
64
+ # # ### Top 10 teams with the highest winning percentage at home and away
65
 
66
+ # # In[95]:
67
 
68
 
69
+ # def home_percentage(team):
70
+ # score = len(df[(df['home_team'] == team) & (df['home_team_result'] == "Win")]) / len(
71
+ # df[df['home_team'] == team]) * 100
72
+ # return round(score)
73
 
74
 
75
+ # def away_percentage(team):
76
+ # score = len(df[(df['away_team'] == team) & (df['home_team_result'] == "Lose")]) / len(
77
+ # df[df['away_team'] == team]) * 100
78
+ # return round(score)
79
 
80
 
81
+ # # In[96]:
82
 
83
 
84
+ # fifa_rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(fifa_rank_top10['team'])
85
+ # fifa_rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(fifa_rank_top10['team'])
86
+ # fifa_rank_top10['Average_win_Per'] = round((fifa_rank_top10['Home_win_Per'] + fifa_rank_top10['Away_win_Per']) / 2)
87
+ # fifa_rank_win = fifa_rank_top10.sort_values('Average_win_Per', ascending=False)
88
+ # # fifa_rank_win
89
 
90
 
91
+ # # ### Top 10 attacking teams in the last FIFA date
92
 
93
+ # # In[97]:
94
 
95
 
96
+ # fifa_offense = df[['date', 'home_team', 'away_team', 'home_team_mean_offense_score', 'away_team_mean_offense_score']]
97
+ # home = fifa_offense[['date', 'home_team', 'home_team_mean_offense_score']].rename(
98
+ # columns={"home_team": "team", "home_team_mean_offense_score": "offense_score"})
99
+ # away = fifa_offense[['date', 'away_team', 'away_team_mean_offense_score']].rename(
100
+ # columns={"away_team": "team", "away_team_mean_offense_score": "offense_score"})
101
+ # fifa_offense = pd.concat([home, away])
102
+ # fifa_offense = fifa_offense.sort_values(['date', 'team'], ascending=[False, True])
103
+ # last_offense = fifa_offense
104
+ # fifa_offense_top10 = fifa_offense.groupby('team').first().sort_values('offense_score', ascending=False)[
105
+ # 0:10].reset_index()
106
+ # # fifa_offense_top10
107
 
108
+ # import plotly.graph_objs as go
109
+ # import plotly.figure_factory as ff
110
 
111
+ # # In[99]:
112
 
113
+ # # Display the data for the bar chart
114
+ # st.write("Top 10 Attacking Teams")
115
+ # st.write(fifa_offense_top10)
116
 
117
+ # # Create a horizontal bar chart
118
+ # fig_bar = go.Figure(data=[go.Bar(y=fifa_offense_top10['team'], x=fifa_offense_top10['offense_score'], orientation='h')])
119
+ # # Update layout to include title, x-label, and y-label
120
+ # fig_bar.update_layout(title='Top 10 Attacking Teams',
121
+ # xaxis_title='Offense Score',
122
+ # yaxis_title='Team')
123
+ # st.plotly_chart(fig_bar)
124
 
125
+ # # Display the data for the bar chart
126
+ # # st.write("Top 10 Offense Teams")
127
+ # # st.write(fifa_offense_top10)
128
 
129
+ # # sns.barplot(data=fifa_offense_top10, x='offense_score', y='team', color="#7F1431")
130
+ # # plt.xlabel('Offense Score', size = 20)
131
+ # # plt.ylabel('Team', size = 20)
132
+ # # plt.title("Top 10 Attacking teams");
133
 
134
 
135
+ # # ### Top 10 Midfield teams in the last FIFA date
136
 
137
+ # # In[100]:
138
 
139
 
140
+ # fifa_midfield = df[['date', 'home_team', 'away_team', 'home_team_mean_midfield_score', 'away_team_mean_midfield_score']]
141
+ # home = fifa_midfield[['date', 'home_team', 'home_team_mean_midfield_score']].rename(
142
+ # columns={"home_team": "team", "home_team_mean_midfield_score": "midfield_score"})
143
+ # away = fifa_midfield[['date', 'away_team', 'away_team_mean_midfield_score']].rename(
144
+ # columns={"away_team": "team", "away_team_mean_midfield_score": "midfield_score"})
145
+ # fifa_midfield = pd.concat([home, away])
146
+ # fifa_midfield = fifa_midfield.sort_values(['date', 'team'], ascending=[False, True])
147
+ # last_midfield = fifa_midfield
148
+ # fifa_midfield_top10 = fifa_midfield.groupby('team').first().sort_values('midfield_score', ascending=False)[
149
+ # 0:10].reset_index()
150
+ # # fifa_midfield_top10
151
 
152
 
153
+ # # In[101]:
154
 
155
+ # # Display the data for the bar chart
156
+ # st.write("Top 10 Midfield Teams")
157
+ # st.write(fifa_midfield_top10)
158
 
159
+ # # Create a horizontal bar chart
160
+ # fig_bar = go.Figure(
161
+ # data=[go.Bar(y=fifa_midfield_top10['team'], x=fifa_midfield_top10['midfield_score'], orientation='h')])
162
+ # # Update layout to include title, x-label, and y-label
163
+ # fig_bar.update_layout(title='Top 10 Midfield Teams', # Set the title
164
+ # xaxis_title='Midfield Score', # Set the x-axis label
165
+ # yaxis_title='Team') # Set the y-axis label
166
 
167
+ # # Display the bar chart
168
+ # st.plotly_chart(fig_bar)
169
 
170
+ # # sns.barplot(data=fifa_midfield_top10, x='midfield_score', y='team', color="#7F1431")
171
+ # # plt.xlabel('Midfield Score', size = 20)
172
+ # # plt.ylabel('Team', size = 20)
173
+ # # plt.title("Top 10 Midfield teams");
174
 
175
 
176
+ # # ### Top 10 defending teams in the last FIFA date
177
 
178
+ # # In[102]:
179
 
180
 
181
+ # fifa_defense = df[['date', 'home_team', 'away_team', 'home_team_mean_defense_score', 'away_team_mean_defense_score']]
182
+ # home = fifa_defense[['date', 'home_team', 'home_team_mean_defense_score']].rename(
183
+ # columns={"home_team": "team", "home_team_mean_defense_score": "defense_score"})
184
+ # away = fifa_defense[['date', 'away_team', 'away_team_mean_defense_score']].rename(
185
+ # columns={"away_team": "team", "away_team_mean_defense_score": "defense_score"})
186
+ # fifa_defense = pd.concat([home, away])
187
+ # fifa_defense = fifa_defense.sort_values(['date', 'team'], ascending=[False, True])
188
+ # last_defense = fifa_defense
189
+ # fifa_defense_top10 = fifa_defense.groupby('team').first().sort_values('defense_score', ascending=False)[
190
+ # 0:10].reset_index()
191
+ # # fifa_defense_top10
192
 
193
 
194
+ # # In[103]:
195
 
196
+ # # Display the data for the bar chart
197
+ # st.write("Top 10 Defensive Teams")
198
+ # st.write(fifa_defense_top10)
199
 
200
+ # # Create the horizontal bar chart
201
+ # fig_bar = go.Figure(data=[go.Bar(y=fifa_defense_top10['team'], x=fifa_defense_top10['defense_score'], orientation='h')])
202
 
203
+ # # Update layout to include title, x-label, and y-label
204
+ # fig_bar.update_layout(title='Top 10 Defensive Teams', # Set the title
205
+ # xaxis_title='Defense Score', # Set the x-axis label
206
+ # yaxis_title='Team') # Set the y-axis label
207
 
208
+ # # Display the bar chart
209
+ # st.plotly_chart(fig_bar)
210
 
211
+ # # sns.barplot(data=fifa_defense_top10, x='defense_score', y='team', color="#7F1431")
212
+ # # plt.xlabel('Defense Score', size=20)
213
+ # # plt.ylabel('Team', size=20)
214
+ # # plt.title("Top 10 Defense Teams")
215
 
216
+ # # ### Do Home teams have any advantage?
217
 
218
+ # # In[104]:
219
 
220
 
221
+ # # Select all matches played at non-neutral locations
222
+ # home_team_advantage = df[df['neutral_location'] == False]['home_team_result'].value_counts(normalize=True)
223
 
224
+ # # # Plot
225
+ # # fig, axes = plt.subplots(1, 1, figsize=(8,8))
226
+ # # ax =plt.pie(home_team_advantage ,labels = ['Win', 'Lose', 'Draw'], autopct='%.0f%%')
227
+ # # plt.title('Home team match result', fontsize = 15)
228
+ # # plt.show()
229
 
230
 
231
+ # # As the graph shows, the home team has an advantage over the away team. This is due to factors such as the fans, the weather and the confidence of the players. For this reason, in the World Cup, those teams that sit at home will have an advantage.
232
 
233
+ # # # DATA PREPARATION AND FEATURE ENGINEERING
234
+ # # In this section, I will fill in the empty fields in the dataset and clean up the data for teams that did not qualify for the World Cup. Then, I will use the correlation matrix to choose the characteristics that will define the training dataset of the Machine Learning model. Finally, I will use the ratings of the teams in their last match to define the "Last Team Scores" dataset (i.e., the dataset that I will use to predict the World Cup matches).
235
 
236
+ # # ### Analyze and fill na's
237
 
238
+ # # In[105]:
239
 
240
+ # #
241
+ # # df.isnull().sum()
242
 
243
 
244
+ # # In[106]:
245
 
246
 
247
+ # # We can fill mean for na's in goal_keeper_score
248
+ # df[df['home_team'] == "Brazil"]['home_team_goalkeeper_score'].describe()
249
 
250
+ # # In[107]:
251
 
252
 
253
+ # df['home_team_goalkeeper_score'] = round(
254
+ # df.groupby("home_team")["home_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
255
+ # df['away_team_goalkeeper_score'] = round(
256
+ # df.groupby("away_team")["away_team_goalkeeper_score"].transform(lambda x: x.fillna(x.mean())))
257
 
258
+ # # In[108]:
259
 
260
 
261
+ # # We can fill mean for na's in defense score
262
+ # df[df['away_team'] == "Uruguay"]['home_team_mean_defense_score'].describe()
263
 
264
+ # # In[65]:
265
 
266
 
267
+ # df['home_team_mean_defense_score'] = round(
268
+ # df.groupby('home_team')['home_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
269
+ # df['away_team_mean_defense_score'] = round(
270
+ # df.groupby('away_team')['away_team_mean_defense_score'].transform(lambda x: x.fillna(x.mean())))
271
 
272
+ # # In[109]:
273
 
274
 
275
+ # # We can fill mean for na's in offense score
276
+ # df[df['away_team'] == "Uruguay"]['home_team_mean_offense_score'].describe()
277
 
278
+ # # In[67]:
279
 
280
 
281
+ # df['home_team_mean_offense_score'] = round(
282
+ # df.groupby('home_team')['home_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
283
+ # df['away_team_mean_offense_score'] = round(
284
+ # df.groupby('away_team')['away_team_mean_offense_score'].transform(lambda x: x.fillna(x.mean())))
285
 
286
+ # # In[110]:
287
 
288
 
289
+ # # We can fill mean for na's in midfield score
290
+ # df[df['away_team'] == "Uruguay"]['home_team_mean_midfield_score'].describe()
291
 
292
+ # # In[111]:
293
 
294
 
295
+ # df['home_team_mean_midfield_score'] = round(
296
+ # df.groupby('home_team')['home_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
297
+ # df['away_team_mean_midfield_score'] = round(
298
+ # df.groupby('away_team')['away_team_mean_midfield_score'].transform(lambda x: x.fillna(x.mean())))
299
 
300
+ # # In[112]:
301
 
302
 
303
+ # df.isnull().sum()
304
 
305
+ # # In[113]:
306
 
307
 
308
+ # # Teams are not available in FIFA game itself, so they are not less than average performing teams, so giving a average score of 50 for all.
309
+ # df.fillna(50, inplace=True)
310
 
311
+ # # ### Filter the teams participating in QATAR - World cup 2022
312
 
313
+ # # In[115]:
314
 
315
 
316
+ # list_2022 = ['Qatar', 'Germany', 'Denmark', 'Brazil', 'France', 'Belgium', 'Croatia', 'Spain', 'Serbia', 'England',
317
+ # 'Switzerland', 'Netherlands', 'Argentina', 'IR Iran', 'Korea Republic', 'Japan', 'Saudi Arabia', 'Ecuador',
318
+ # 'Uruguay', 'Canada', 'Ghana', 'Senegal', 'Portugal', 'Poland', 'Tunisia', 'Morocco', 'Cameroon', 'USA',
319
+ # 'Mexico', 'Wales', 'Australia', 'Costa Rica']
320
+ # final_df = df[(df["home_team"].apply(lambda x: x in list_2022)) | (df["away_team"].apply(lambda x: x in list_2022))]
321
 
322
+ # # **Top 10 teams in QATAR 2022**
323
 
324
+ # # In[116]:
325
 
326
 
327
+ # rank = final_df[['date', 'home_team', 'away_team', 'home_team_fifa_rank', 'away_team_fifa_rank']]
328
+ # home = rank[['date', 'home_team', 'home_team_fifa_rank']].rename(
329
+ # columns={"home_team": "team", "home_team_fifa_rank": "rank"})
330
+ # away = rank[['date', 'away_team', 'away_team_fifa_rank']].rename(
331
+ # columns={"away_team": "team", "away_team_fifa_rank": "rank"})
332
+ # rank = pd.concat([home, away])
333
 
334
+ # # Select each country latest match
335
+ # rank = rank.sort_values(['team', 'date'], ascending=[True, False])
336
+ # rank_top10 = rank.groupby('team').first().sort_values('rank', ascending=True).reset_index()
337
+ # rank_top10 = rank_top10[(rank_top10["team"].apply(lambda x: x in list_2022))][0:10]
338
 
339
+ # st.write("Top 10 Countries by Rank - Latest Match")
340
+ # rank_top10
341
 
342
+ # # # Create a scatter plot
343
+ # # fig_scatter = go.Figure(data=go.Scatter(x=rank_top10['team'], y=rank_top10['rank'], mode='markers', marker=dict(color='lightskyblue', size=12)))
344
+ # #
345
+ # # # Update layout to include title and labels
346
+ # # fig_scatter.update_layout(title='Top 10 Countries by Rank - Latest Match',
347
+ # # xaxis_title='Country',
348
+ # # yaxis_title='Rank')
349
+ # #
350
+ # # # Display the scatter plot
351
+ # # st.plotly_chart(fig_scatter)
352
 
353
+ # # **Top 10 teams with the highest winning percentage in QATAR 2022**
354
 
355
+ # # In[117]:
356
 
357
 
358
+ # rank_top10['Home_win_Per'] = np.vectorize(home_percentage)(rank_top10['team'])
359
+ # rank_top10['Away_win_Per'] = np.vectorize(away_percentage)(rank_top10['team'])
360
+ # rank_top10['Average_win_Per'] = round((rank_top10['Home_win_Per'] + rank_top10['Away_win_Per']) / 2)
361
+ # rank_top10_Win = rank_top10.sort_values('Average_win_Per', ascending=False)
362
 
363
+ # # st.write("Top 10 Countries by Rank - Latest Match")
364
+ # # rank_top10_Win
365
 
366
 
367
+ # # In[118]:
368
 
369
+ # # Display the data for the bar chart
370
+ # st.write("Top 10 Average Win Per game Teams")
371
+ # st.write(rank_top10_Win)
372
 
373
+ # # Create a horizontal bar chart
374
+ # # Create a horizontal bar chart
375
+ # fig_bar = go.Figure(data=[go.Bar(y=rank_top10_Win['team'], x=rank_top10_Win['Average_win_Per'], orientation='h')])
376
 
377
+ # # Update layout to include title and labels
378
+ # fig_bar.update_layout(title='Top 10 Countries by Average Win Percentage',
379
+ # xaxis_title='Average Win Percentage',
380
+ # yaxis_title='Country')
381
 
382
+ # # Display the horizontal bar chart
383
+ # st.plotly_chart(fig_bar)
384
 
385
  # sns.barplot(data=rank_top10_Win,x='Average_win_Per',y='team',color="#7F1431")
386
  # plt.xticks()