Jon Solow commited on
Commit
77fb55b
1 Parent(s): 1f1a2a2

Parse out team names of practice reports to fix bug where only opponent report exists

Browse files
src/queries/nfl_teams/practice_reports.py CHANGED
@@ -1,8 +1,10 @@
 
1
  import datetime
2
  from multiprocessing import Pool
3
  import numpy as np
4
  import pandas as pd
5
  from pydantic import BaseModel, Field
 
6
  from typing import Optional
7
  from urllib.parse import urljoin
8
 
@@ -72,11 +74,23 @@ class PracticeReportRawRow(BaseModel):
72
  return cls(**{DAY_OF_WEEK_STRING_MAPPING.get(k, k): cls.replace_nan(v) for k, v in input_dict.items()})
73
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def scrape_team_injury_report(team: NFLTeam) -> pd.DataFrame:
76
  print(f"Scraping Injury Report for: {team.team_full_name}")
77
- injury_report_url = urljoin(team.injury_report_url, f"week/REG-{CURRENT_WEEK}")
78
  try:
79
- team_report = pd.read_html(injury_report_url)[0]
80
  except Exception:
81
  print(f"Failed to scrape practice report for: {team.team_full_name}")
82
  return pd.DataFrame()
 
1
+ from bs4 import BeautifulSoup
2
  import datetime
3
  from multiprocessing import Pool
4
  import numpy as np
5
  import pandas as pd
6
  from pydantic import BaseModel, Field
7
+ import requests
8
  from typing import Optional
9
  from urllib.parse import urljoin
10
 
 
74
  return cls(**{DAY_OF_WEEK_STRING_MAPPING.get(k, k): cls.replace_nan(v) for k, v in input_dict.items()})
75
 
76
 
77
+ def get_injury_report_dataframe(team: NFLTeam):
78
+ injury_report_url = urljoin(team.injury_report_url, f"week/REG-{CURRENT_WEEK}")
79
+ report_request = requests.get(injury_report_url)
80
+ report_soup = BeautifulSoup(report_request.content)
81
+ team_names_spans = report_soup.find_all("span", {"class": "nfl-o-injury-report__club-name"})
82
+ assert team_names_spans
83
+ team_names_str = [x.get_text() for x in team_names_spans]
84
+ assert team_names_str[0] == team.team_full_name
85
+ tables = report_soup.find_all("table")
86
+ df_report = pd.read_html(str(tables))[0]
87
+ return df_report
88
+
89
+
90
  def scrape_team_injury_report(team: NFLTeam) -> pd.DataFrame:
91
  print(f"Scraping Injury Report for: {team.team_full_name}")
 
92
  try:
93
+ team_report = get_injury_report_dataframe(team)
94
  except Exception:
95
  print(f"Failed to scrape practice report for: {team.team_full_name}")
96
  return pd.DataFrame()