Commit
•
b125f18
1
Parent(s):
0ddd0af
Update news_extractor/news_extractor.py
Browse files
news_extractor/news_extractor.py
CHANGED
@@ -121,11 +121,13 @@ def get_news():
|
|
121 |
final_df = pd.concat([final_df, news_agg(i)], axis=0)
|
122 |
final_df.reset_index(drop=True, inplace=True)
|
123 |
|
|
|
|
|
124 |
final_df.sort_values(by="elapsed_time", inplace=True)
|
125 |
# final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
|
126 |
# final_df.drop(columns=['date', 'parsed_date', 'src', 'elapsed_time', 'elapsed_time_str'], inplace=True)
|
127 |
final_df.drop(columns=['elapsed_time'], inplace=True)
|
128 |
final_df.drop_duplicates(subset='description', inplace=True)
|
129 |
final_df = final_df.loc[(final_df["title"] != ""), :].copy()
|
130 |
-
|
131 |
return final_df
|
|
|
121 |
final_df = pd.concat([final_df, news_agg(i)], axis=0)
|
122 |
final_df.reset_index(drop=True, inplace=True)
|
123 |
|
124 |
+
logging.warning(final_df['src'].unique())
|
125 |
+
|
126 |
final_df.sort_values(by="elapsed_time", inplace=True)
|
127 |
# final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
|
128 |
# final_df.drop(columns=['date', 'parsed_date', 'src', 'elapsed_time', 'elapsed_time_str'], inplace=True)
|
129 |
final_df.drop(columns=['elapsed_time'], inplace=True)
|
130 |
final_df.drop_duplicates(subset='description', inplace=True)
|
131 |
final_df = final_df.loc[(final_df["title"] != ""), :].copy()
|
132 |
+
|
133 |
return final_df
|