Shreyas94 commited on
Commit
1fbd756
·
verified ·
1 Parent(s): e30e9e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -35
app.py CHANGED
@@ -5,10 +5,11 @@ import functools
5
  from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
6
  from sentence_transformers import SentenceTransformer, util
7
  from datetime import datetime
 
8
  import pandas as pd
9
  import time
10
- import gradio as gr
11
  import sys
 
12
 
13
  # Define sentiment analysis pipeline
14
  sentiment_analysis = pipeline("sentiment-analysis", model="ProsusAI/finbert")
@@ -108,10 +109,10 @@ def fetch_and_analyze_news(company_name, company_ticker, event_name, start_date=
108
 
109
  analyzed_news_name = []
110
 
111
- # Fetch and analyze news entries for company name
112
- analyze_news_entry_func = functools.partial(fetch_and_analyze_news_entry, company_name=company_name, company_ticker=company_ticker, location=location)
113
- for entry in news_entries_name:
114
- analyzed_news_name.append(analyze_news_entry_func(entry))
115
 
116
  return analyzed_news_name
117
 
@@ -136,20 +137,17 @@ def calculate_similarity(company_name, company_ticker, title, threshold=0.4):
136
  return weighted_similarity_score
137
 
138
  def analyze_sentiment(title):
 
139
  # Perform sentiment analysis on the input title
140
  result = sentiment_analysis(title)
141
  # Extract sentiment label and score from the result
142
  labels = result[0]['label']
143
  scores = result[0]['score']
 
144
  return labels, scores
145
 
146
  def fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
147
- start_time = time.time() # Record the start time
148
-
149
- include_domains = [domain.strip() for domain in include_domains.split(',')] if include_domains else None
150
- exclude_domains = [domain.strip() for domain in exclude_domains.split(',')] if exclude_domains else None
151
-
152
- analyzed_news_name = fetch_and_analyze_news(company_name, company_ticker, event_name, start_date, end_date, location, int(num_news), include_domains=include_domains, exclude_domains=exclude_domains)
153
 
154
  above_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] >= 0.3]
155
  below_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] < 0.3]
@@ -157,35 +155,40 @@ def fetch_news(company_name, company_ticker, event_name, start_date, end_date, l
157
  above_threshold_df = pd.DataFrame(above_threshold_news)
158
  below_threshold_df = pd.DataFrame(below_threshold_news)
159
 
160
- file_name = f"{company_name}_News_Data_10002.xlsx"
161
-
162
  with pd.ExcelWriter(file_name) as writer:
163
  above_threshold_df.to_excel(writer, sheet_name='Above_Threshold', index=False)
164
  below_threshold_df.to_excel(writer, sheet_name='Below_Threshold', index=False)
165
 
166
- end_time = time.time() # Record the end time
167
- elapsed_time = end_time - start_time # Calculate the elapsed time
168
-
169
- return f"News data saved to {file_name} with separate sheets for above and below threshold news.\nComputation Time: {elapsed_time:.2f} seconds"
170
-
171
- # Gradio interface setup
172
- iface = gr.Interface(
173
- fn=fetch_news,
174
- inputs=[
175
- gr.Textbox(label="Company Name"),
176
- gr.Textbox(label="Company Ticker"),
177
- gr.Textbox(label="Event Name"),
178
- gr.Textbox(label="Start Date (optional)"),
179
- gr.Textbox(label="End Date (optional)"),
180
- gr.Textbox(label="Location (optional)"),
181
- gr.Textbox(label="Number of News to Fetch"),
182
- gr.Textbox(label="Include Domains (comma-separated, optional)"),
183
- gr.Textbox(label="Exclude Domains (comma-separated, optional)")
184
- ],
185
- outputs="text",
 
 
 
 
 
186
  title="News Fetcher",
187
- description="Fetch and analyze news articles for a specific company and event."
188
  )
189
 
190
  if __name__ == "__main__":
191
- iface.launch()
 
5
  from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
6
  from sentence_transformers import SentenceTransformer, util
7
  from datetime import datetime
8
+ from concurrent.futures import ThreadPoolExecutor
9
  import pandas as pd
10
  import time
 
11
  import sys
12
+ import gradio as gr
13
 
14
  # Define sentiment analysis pipeline
15
  sentiment_analysis = pipeline("sentiment-analysis", model="ProsusAI/finbert")
 
109
 
110
  analyzed_news_name = []
111
 
112
+ # Fetch and analyze news entries for company name in parallel
113
+ with ThreadPoolExecutor() as executor:
114
+ analyze_news_entry_func = functools.partial(fetch_and_analyze_news_entry, company_name=company_name, company_ticker=company_ticker, location=location)
115
+ analyzed_news_name = list(executor.map(analyze_news_entry_func, news_entries_name))
116
 
117
  return analyzed_news_name
118
 
 
137
  return weighted_similarity_score
138
 
139
  def analyze_sentiment(title):
140
+ print("Analyzing sentiment...")
141
  # Perform sentiment analysis on the input title
142
  result = sentiment_analysis(title)
143
  # Extract sentiment label and score from the result
144
  labels = result[0]['label']
145
  scores = result[0]['score']
146
+ print("Sentiment analyzed successfully.")
147
  return labels, scores
148
 
149
  def fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
150
+ analyzed_news_name = fetch_and_analyze_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains)
 
 
 
 
 
151
 
152
  above_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] >= 0.3]
153
  below_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] < 0.3]
 
155
  above_threshold_df = pd.DataFrame(above_threshold_news)
156
  below_threshold_df = pd.DataFrame(below_threshold_news)
157
 
158
+ file_name = f"{company_name}_News_Data.xlsx"
159
+
160
  with pd.ExcelWriter(file_name) as writer:
161
  above_threshold_df.to_excel(writer, sheet_name='Above_Threshold', index=False)
162
  below_threshold_df.to_excel(writer, sheet_name='Below_Threshold', index=False)
163
 
164
+ return file_name
165
+
166
+ # Gradio Interface
167
+ def gradio_fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
168
+ file_name = fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains)
169
+ return file_name
170
+
171
+ inputs = [
172
+ gr.Textbox(label="Company Name"),
173
+ gr.Textbox(label="Company Ticker"),
174
+ gr.Textbox(label="Event Name"),
175
+ gr.Textbox(label="Start Date (optional)"),
176
+ gr.Textbox(label="End Date (optional)"),
177
+ gr.Textbox(label="Location (optional)"),
178
+ gr.Number(label="Number of News to Fetch", default=5),
179
+ gr.Textbox(label="Include Domains (comma-separated)", placeholder="e.g., example.com,example.org"),
180
+ gr.Textbox(label="Exclude Domains (comma-separated)", placeholder="e.g., example.net,example.info")
181
+ ]
182
+
183
+ outputs = gr.File(label="Download Excel File")
184
+
185
+ interface = gr.Interface(
186
+ fn=gradio_fetch_news,
187
+ inputs=inputs,
188
+ outputs=outputs,
189
  title="News Fetcher",
190
+ description="Fetch and analyze news articles based on company name, event, and other criteria, and download the results as an Excel file."
191
  )
192
 
193
  if __name__ == "__main__":
194
+ interface.launch()