Jon Solow commited on
Commit
c412d07
1 Parent(s): e4a5a25

Add 24 hour player news

Browse files
src/pages/10_Player_News.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import streamlit as st
3
+
4
+ from config import DEFAULT_ICON
5
+ from shared_page import common_page_config
6
+
7
+ from queries.nbcsports.player_news import get_player_news_window_hours
8
+
9
+
10
+ @st.cache_data(ttl=60 * 60 * 24)
11
+ def load_data():
12
+ data = get_player_news_window_hours(24)
13
+ teams_list = sorted(filter(None, data.Team.unique()))
14
+ position_list = data.Position.unique()
15
+ data_load_time_str = datetime.datetime.utcnow().strftime("%m/%d/%Y %I:%M %p")
16
+ return data, teams_list, position_list, data_load_time_str
17
+
18
+
19
+ def get_page():
20
+ page_title = "Player News - Last 24 Hours"
21
+ st.set_page_config(page_title=page_title, page_icon=DEFAULT_ICON, layout="wide")
22
+ common_page_config()
23
+ st.title(page_title)
24
+ if st.button("Refresh Data"):
25
+ st.cache_data.clear()
26
+ data, teams_list, position_list, data_load_time_str = load_data()
27
+ st.write(f"Data loaded as of: {data_load_time_str} UTC")
28
+
29
+ teams_selected = st.multiselect("Team:", teams_list, placeholder="Select a team to filter") or teams_list
30
+
31
+ with st.container():
32
+ filtered_data = data[(data.Team.isin(teams_selected))]
33
+ st.dataframe(
34
+ filtered_data,
35
+ hide_index=True,
36
+ height=35 * (len(filtered_data) + 1) + 12,
37
+ use_container_width=True,
38
+ column_order=[
39
+ "Date/Time",
40
+ "Name",
41
+ "Team",
42
+ "Position",
43
+ "Headline",
44
+ ],
45
+ column_config={"Date/Time": st.column_config.DatetimeColumn()},
46
+ )
47
+
48
+
49
+ if __name__ == "__main__":
50
+ get_page()
src/queries/nbcsports/player_news.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import datetime
3
+ import pandas as pd
4
+ import requests
5
+ from typing import Mapping
6
+
7
+ NEWS_URL = "https://www.nbcsports.com/fantasy/football/player-news"
8
+
9
+
10
+ def get_text_from_find_all(soup, element: str, find_search_map: Mapping[str, str]):
11
+ find_list = soup.find_all(element, find_search_map)
12
+ assert find_list
13
+ text_list = [x.text.strip() for x in find_list]
14
+ return text_list
15
+
16
+
17
+ def get_nfl_player_news(page_number: int = 1) -> pd.DataFrame:
18
+ url = f"{NEWS_URL}?p={page_number}"
19
+ request_page = requests.get(url)
20
+ soup = BeautifulSoup(request_page.content)
21
+ player_names_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-name"})
22
+ team_abbr_list = get_text_from_find_all(soup, "span", {"class": "PlayerNewsPost-team-abbr"})
23
+ position_list = get_text_from_find_all(soup, "span", {"class": "PlayerNewsPost-position"})
24
+ headline_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-headline"})
25
+ analysis_list = get_text_from_find_all(soup, "div", {"class": "PlayerNewsPost-analysis"})
26
+ datetime_div_list = soup.find_all("div", {"class": "PlayerNewsPost-date"})
27
+ assert datetime_div_list
28
+ datetime_list = [x["data-date"] for x in datetime_div_list]
29
+ assert (
30
+ len(player_names_list) == len(team_abbr_list) == len(position_list) == len(headline_list) == len(analysis_list)
31
+ )
32
+ df = pd.DataFrame(
33
+ zip(datetime_list, player_names_list, team_abbr_list, position_list, headline_list, analysis_list),
34
+ columns=["Date/Time", "Name", "Team", "Position", "Headline", "Analysis"],
35
+ )
36
+ df["Date/Time"] = pd.to_datetime(df["Date/Time"])
37
+ return df
38
+
39
+
40
+ def get_player_news_window_hours(hours: int = 1):
41
+ end_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=hours)
42
+ page = 1
43
+ max_pages = 20
44
+ date_reached = False
45
+ df_list = []
46
+ while page < max_pages and not date_reached:
47
+ last_news = get_nfl_player_news(page)
48
+ df_list.append(last_news)
49
+ date_reached = min(last_news["Date/Time"]) < end_date
50
+ page += 1
51
+ return pd.concat(df_list)
tests/contract/test_nbcsports_player_news.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from queries.nbcsports import player_news
4
+
5
+
6
+ @pytest.mark.parametrize("page_number", [(1), (2)])
7
+ def test_get_nfl_player_news(page_number: int):
8
+ _ = player_news.get_nfl_player_news(page_number)
9
+
10
+
11
+ @pytest.mark.parametrize("hours", [(1), (10)])
12
+ def test_get_player_news_window_hours(hours: int):
13
+ _ = player_news.get_player_news_window_hours(hours)