lalithadevi
commited on
Commit
•
c9fe461
1
Parent(s):
c54be12
Update news_extractor/news_extractor.py
Browse files
news_extractor/news_extractor.py
CHANGED
@@ -93,7 +93,7 @@ def news_agg(rss):
|
|
93 |
timeout = 5
|
94 |
|
95 |
resp = r.get(rss, timeout=timeout, headers=headers)
|
96 |
-
|
97 |
b = BeautifulSoup(resp.content, "xml")
|
98 |
items = b.find_all("item")
|
99 |
for i in items:
|
@@ -135,7 +135,7 @@ def get_news():
|
|
135 |
final_df = pd.concat([final_df, news_agg(i)], axis=0)
|
136 |
final_df.reset_index(drop=True, inplace=True)
|
137 |
|
138 |
-
|
139 |
|
140 |
final_df.sort_values(by="elapsed_time", inplace=True)
|
141 |
# final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
|
|
|
93 |
timeout = 5
|
94 |
|
95 |
resp = r.get(rss, timeout=timeout, headers=headers)
|
96 |
+
print(f'{rss}: {resp.status_code}')
|
97 |
b = BeautifulSoup(resp.content, "xml")
|
98 |
items = b.find_all("item")
|
99 |
for i in items:
|
|
|
135 |
final_df = pd.concat([final_df, news_agg(i)], axis=0)
|
136 |
final_df.reset_index(drop=True, inplace=True)
|
137 |
|
138 |
+
print(final_df['src'].unique())
|
139 |
|
140 |
final_df.sort_values(by="elapsed_time", inplace=True)
|
141 |
# final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
|