Spaces:

Shreyas94
/

News_Sentiment_Analysis_And_Summarizer

Sleeping

App Files Files Community

Shreyas94 commited on Jun 17, 2024

Commit

1fbd756

verified ·

1 Parent(s): e30e9e6

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -35

app.py CHANGED Viewed

@@ -5,10 +5,11 @@ import functools
 from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
 from sentence_transformers import SentenceTransformer, util
 from datetime import datetime
 import pandas as pd
 import time
-import gradio as gr
 import sys
 # Define sentiment analysis pipeline
 sentiment_analysis = pipeline("sentiment-analysis", model="ProsusAI/finbert")
@@ -108,10 +109,10 @@ def fetch_and_analyze_news(company_name, company_ticker, event_name, start_date=
     analyzed_news_name = []
-    # Fetch and analyze news entries for company name
-    analyze_news_entry_func = functools.partial(fetch_and_analyze_news_entry, company_name=company_name, company_ticker=company_ticker, location=location)
-    for entry in news_entries_name:
-        analyzed_news_name.append(analyze_news_entry_func(entry))
     return analyzed_news_name
@@ -136,20 +137,17 @@ def calculate_similarity(company_name, company_ticker, title, threshold=0.4):
     return weighted_similarity_score
 def analyze_sentiment(title):
     # Perform sentiment analysis on the input title
     result = sentiment_analysis(title)
     # Extract sentiment label and score from the result
     labels = result[0]['label']
     scores = result[0]['score']
     return labels, scores
 def fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
-    start_time = time.time()  # Record the start time
-    include_domains = [domain.strip() for domain in include_domains.split(',')] if include_domains else None
-    exclude_domains = [domain.strip() for domain in exclude_domains.split(',')] if exclude_domains else None
-    analyzed_news_name = fetch_and_analyze_news(company_name, company_ticker, event_name, start_date, end_date, location, int(num_news), include_domains=include_domains, exclude_domains=exclude_domains)
     above_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] >= 0.3]
     below_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] < 0.3]
@@ -157,35 +155,40 @@ def fetch_news(company_name, company_ticker, event_name, start_date, end_date, l
     above_threshold_df = pd.DataFrame(above_threshold_news)
     below_threshold_df = pd.DataFrame(below_threshold_news)
-    file_name = f"{company_name}_News_Data_10002.xlsx"
     with pd.ExcelWriter(file_name) as writer:
         above_threshold_df.to_excel(writer, sheet_name='Above_Threshold', index=False)
         below_threshold_df.to_excel(writer, sheet_name='Below_Threshold', index=False)
-    end_time = time.time()  # Record the end time
-    elapsed_time = end_time - start_time  # Calculate the elapsed time
-    return f"News data saved to {file_name} with separate sheets for above and below threshold news.\nComputation Time: {elapsed_time:.2f} seconds"
-# Gradio interface setup
-iface = gr.Interface(
-    fn=fetch_news,
-    inputs=[
-        gr.Textbox(label="Company Name"),
-        gr.Textbox(label="Company Ticker"),
-        gr.Textbox(label="Event Name"),
-        gr.Textbox(label="Start Date (optional)"),
-        gr.Textbox(label="End Date (optional)"),
-        gr.Textbox(label="Location (optional)"),
-        gr.Textbox(label="Number of News to Fetch"),
-        gr.Textbox(label="Include Domains (comma-separated, optional)"),
-        gr.Textbox(label="Exclude Domains (comma-separated, optional)")
-    ],
-    outputs="text",
     title="News Fetcher",
-    description="Fetch and analyze news articles for a specific company and event."
 )
 if __name__ == "__main__":
-    iface.launch()

 from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
 from sentence_transformers import SentenceTransformer, util
 from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor
 import pandas as pd
 import time
 import sys
+import gradio as gr
 # Define sentiment analysis pipeline
 sentiment_analysis = pipeline("sentiment-analysis", model="ProsusAI/finbert")
     analyzed_news_name = []
+    # Fetch and analyze news entries for company name in parallel
+    with ThreadPoolExecutor() as executor:
+        analyze_news_entry_func = functools.partial(fetch_and_analyze_news_entry, company_name=company_name, company_ticker=company_ticker, location=location)
+        analyzed_news_name = list(executor.map(analyze_news_entry_func, news_entries_name))
     return analyzed_news_name
     return weighted_similarity_score
 def analyze_sentiment(title):
+    print("Analyzing sentiment...")
     # Perform sentiment analysis on the input title
     result = sentiment_analysis(title)
     # Extract sentiment label and score from the result
     labels = result[0]['label']
     scores = result[0]['score']
+    print("Sentiment analyzed successfully.")
     return labels, scores
 def fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
+    analyzed_news_name = fetch_and_analyze_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains)
     above_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] >= 0.3]
     below_threshold_news = [news for news in analyzed_news_name if news is not None and news['similarity_score'] < 0.3]
     above_threshold_df = pd.DataFrame(above_threshold_news)
     below_threshold_df = pd.DataFrame(below_threshold_news)
+    file_name = f"{company_name}_News_Data.xlsx"
     with pd.ExcelWriter(file_name) as writer:
         above_threshold_df.to_excel(writer, sheet_name='Above_Threshold', index=False)
         below_threshold_df.to_excel(writer, sheet_name='Below_Threshold', index=False)
+    return file_name
+# Gradio Interface
+def gradio_fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains):
+    file_name = fetch_news(company_name, company_ticker, event_name, start_date, end_date, location, num_news, include_domains, exclude_domains)
+    return file_name
+inputs = [
+    gr.Textbox(label="Company Name"),
+    gr.Textbox(label="Company Ticker"),
+    gr.Textbox(label="Event Name"),
+    gr.Textbox(label="Start Date (optional)"),
+    gr.Textbox(label="End Date (optional)"),
+    gr.Textbox(label="Location (optional)"),
+    gr.Number(label="Number of News to Fetch", default=5),
+    gr.Textbox(label="Include Domains (comma-separated)", placeholder="e.g., example.com,example.org"),
+    gr.Textbox(label="Exclude Domains (comma-separated)", placeholder="e.g., example.net,example.info")
+]
+outputs = gr.File(label="Download Excel File")
+interface = gr.Interface(
+    fn=gradio_fetch_news,
+    inputs=inputs,
+    outputs=outputs,
     title="News Fetcher",
+    description="Fetch and analyze news articles based on company name, event, and other criteria, and download the results as an Excel file."
 )
 if __name__ == "__main__":
+    interface.launch()