Update app.py
Browse files
app.py
CHANGED
@@ -5,10 +5,11 @@ import functools
|
|
5 |
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
|
6 |
from sentence_transformers import SentenceTransformer, util
|
7 |
from datetime import datetime
|
|
|
8 |
import pandas as pd
|
9 |
import time
|
10 |
-
import gradio as gr
|
11 |
import sys
|
|
|
12 |
|
13 |
# Define sentiment analysis pipeline
|
14 |
sentiment_analysis = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
@@ -108,10 +109,10 @@ def fetch_and_analyze_news(company_name, company_ticker, event_name, start_date=
|
|
108 |
|
109 |
analyzed_news_name = []
|
110 |
|
111 |
-
# Fetch and analyze news entries for company name
|
112 |
-
|
113 |
-
|
114 |
-
analyzed_news_name.
|
115 |
|
116 |
return analyzed_news_name
|
117 |
|
@@ -136,20 +137,17 @@ def calculate_similarity(company_name, company_ticker, title, threshold=0.4):
|
|
136 |
return weighted_similarity_score
|
137 |
|
138 |
def analyze_sentiment(title):
|
|
|
139 |
# Perform sentiment analysis on the input title
|
140 |
result = sentiment_analysis(title)
|
141 |
# Extract sentiment label and score from the result
|
142 |
labels = result[0]['label']
|
143 |
scores = result[0]['score']
|
|
|
144 |
return labels, scores
|
145 |
|
146 |
def fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
|
147 |
-
|
148 |
-
|
149 |
-
include_domains = [domain.strip() for domain in include_domains.split(',')] if include_domains else None
|
150 |
-
exclude_domains = [domain.strip() for domain in exclude_domains.split(',')] if exclude_domains else None
|
151 |
-
|
152 |
-
analyzed_news_name = fetch_and_analyze_news(company_name, company_ticker, event_name, start_date, end_date, location, int(num_news), include_domains=include_domains, exclude_domains=exclude_domains)
|
153 |
|
154 |
above_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] >= 0.3]
|
155 |
below_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] < 0.3]
|
@@ -157,35 +155,40 @@ def fetch_news(company_name, company_ticker, event_name, start_date, end_date, l
|
|
157 |
above_threshold_df = pd.DataFrame(above_threshold_news)
|
158 |
below_threshold_df = pd.DataFrame(below_threshold_news)
|
159 |
|
160 |
-
file_name = f"{company_name}
|
161 |
-
|
162 |
with pd.ExcelWriter(file_name) as writer:
|
163 |
above_threshold_df.to_excel(writer, sheet_name='Above_Threshold', index=False)
|
164 |
below_threshold_df.to_excel(writer, sheet_name='Below_Threshold', index=False)
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
186 |
title="News Fetcher",
|
187 |
-
description="Fetch and analyze news articles
|
188 |
)
|
189 |
|
190 |
if __name__ == "__main__":
|
191 |
-
|
|
|
5 |
from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
|
6 |
from sentence_transformers import SentenceTransformer, util
|
7 |
from datetime import datetime
|
8 |
+
from concurrent.futures import ThreadPoolExecutor
|
9 |
import pandas as pd
|
10 |
import time
|
|
|
11 |
import sys
|
12 |
+
import gradio as gr
|
13 |
|
14 |
# Define sentiment analysis pipeline
|
15 |
sentiment_analysis = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
|
|
109 |
|
110 |
analyzed_news_name = []
|
111 |
|
112 |
+
# Fetch and analyze news entries for company name in parallel
|
113 |
+
with ThreadPoolExecutor() as executor:
|
114 |
+
analyze_news_entry_func = functools.partial(fetch_and_analyze_news_entry, company_name=company_name, company_ticker=company_ticker, location=location)
|
115 |
+
analyzed_news_name = list(executor.map(analyze_news_entry_func, news_entries_name))
|
116 |
|
117 |
return analyzed_news_name
|
118 |
|
|
|
137 |
return weighted_similarity_score
|
138 |
|
139 |
def analyze_sentiment(title):
|
140 |
+
print("Analyzing sentiment...")
|
141 |
# Perform sentiment analysis on the input title
|
142 |
result = sentiment_analysis(title)
|
143 |
# Extract sentiment label and score from the result
|
144 |
labels = result[0]['label']
|
145 |
scores = result[0]['score']
|
146 |
+
print("Sentiment analyzed successfully.")
|
147 |
return labels, scores
|
148 |
|
149 |
def fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
|
150 |
+
analyzed_news_name = fetch_and_analyze_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains)
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
above_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] >= 0.3]
|
153 |
below_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] < 0.3]
|
|
|
155 |
above_threshold_df = pd.DataFrame(above_threshold_news)
|
156 |
below_threshold_df = pd.DataFrame(below_threshold_news)
|
157 |
|
158 |
+
file_name = f"{company_name}_News_Data.xlsx"
|
159 |
+
|
160 |
with pd.ExcelWriter(file_name) as writer:
|
161 |
above_threshold_df.to_excel(writer, sheet_name='Above_Threshold', index=False)
|
162 |
below_threshold_df.to_excel(writer, sheet_name='Below_Threshold', index=False)
|
163 |
|
164 |
+
return file_name
|
165 |
+
|
166 |
+
# Gradio Interface
|
167 |
+
def gradio_fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
|
168 |
+
file_name = fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains)
|
169 |
+
return file_name
|
170 |
+
|
171 |
+
inputs = [
|
172 |
+
gr.Textbox(label="Company Name"),
|
173 |
+
gr.Textbox(label="Company Ticker"),
|
174 |
+
gr.Textbox(label="Event Name"),
|
175 |
+
gr.Textbox(label="Start Date (optional)"),
|
176 |
+
gr.Textbox(label="End Date (optional)"),
|
177 |
+
gr.Textbox(label="Location (optional)"),
|
178 |
+
gr.Number(label="Number of News to Fetch", default=5),
|
179 |
+
gr.Textbox(label="Include Domains (comma-separated)", placeholder="e.g., example.com,example.org"),
|
180 |
+
gr.Textbox(label="Exclude Domains (comma-separated)", placeholder="e.g., example.net,example.info")
|
181 |
+
]
|
182 |
+
|
183 |
+
outputs = gr.File(label="Download Excel File")
|
184 |
+
|
185 |
+
interface = gr.Interface(
|
186 |
+
fn=gradio_fetch_news,
|
187 |
+
inputs=inputs,
|
188 |
+
outputs=outputs,
|
189 |
title="News Fetcher",
|
190 |
+
description="Fetch and analyze news articles based on company name, event, and other criteria, and download the results as an Excel file."
|
191 |
)
|
192 |
|
193 |
if __name__ == "__main__":
|
194 |
+
interface.launch()
|