Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
import os | |
import nltk | |
import spacy | |
import re | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
# Download necessary NLTK data for sentence tokenization | |
try: | |
nltk.data.find('tokenizers/punkt') | |
except LookupError: | |
nltk.download('punkt') | |
# Load spaCy model | |
nlp = spacy.load('en_core_web_sm') | |
nlp.add_pipe('sentencizer') | |
# Global loading of models and NLP components | |
fin_model = None | |
summarizer = None | |
ner_model = None | |
auth_token = os.environ.get("HF_Token") # For NER model loading | |
def load_models(): | |
global fin_model, summarizer, ner_model | |
# Load sentiment analysis model | |
print("Loading sentiment model...") | |
try: | |
fin_model = pipeline("sentiment-analysis", model="ylingag/ISOM5240_financial_tone") | |
print("Sentiment model loaded successfully.") | |
except Exception as e: | |
print(f"Failed to load sentiment model: {e}") | |
fin_model = None | |
# Load summarization model | |
print("Loading summarization model...") | |
try: | |
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") | |
print("Summarization model loaded successfully.") | |
except Exception as e: | |
print(f"Warning: Failed to load summarization model: {e}") | |
print("Will continue without summarization capability.") | |
summarizer = None | |
# Load NER model directly using pipeline | |
print("Loading NER model...") | |
try: | |
ner_model = pipeline("ner", model="dslim/bert-base-NER") | |
print("NER model loaded successfully.") | |
except Exception as e: | |
print(f"Warning: Failed to load NER model: {e}") | |
print("Will continue without NER capability.") | |
ner_model = None | |
def split_in_sentences(text): | |
"""Split text into sentences""" | |
doc = nlp(text) | |
return [str(sent).strip() for sent in doc.sents] | |
def make_spans(text, results): | |
"""Create highlighted text spans with sentiment labels""" | |
results_list = [] | |
for i in range(len(results)): | |
# Ensure we display specific sentiment labels, not LABEL format | |
label = results[i]['label'] | |
# If the label is in LABEL_ format, replace with specific sentiment terms | |
if label.startswith("LABEL_"): | |
if label == "LABEL_0": | |
label = "Negative" | |
elif label == "LABEL_1": | |
label = "Neutral" | |
elif label == "LABEL_2": | |
label = "Positive" | |
results_list.append(label) | |
spans = list(zip(split_in_sentences(text), results_list)) | |
return spans | |
def text_to_sentiment(text): | |
"""Analyze overall sentiment of the text""" | |
global fin_model | |
if not fin_model: | |
return "Sentiment model not available." | |
if not text or not text.strip(): | |
return "Please enter text for analysis." | |
try: | |
sentiment = fin_model(text)[0]["label"] | |
# If the label is in LABEL_ format, replace with specific sentiment terms | |
if sentiment.startswith("LABEL_"): | |
if sentiment == "LABEL_0": | |
sentiment = "Negative" | |
elif sentiment == "LABEL_1": | |
sentiment = "Neutral" | |
elif sentiment == "LABEL_2": | |
sentiment = "Positive" | |
return sentiment | |
except Exception as e: | |
print(f"Error during overall sentiment analysis: {e}") | |
return f"Error: {str(e)}" | |
def summarize_text(text): | |
"""Generate a summary for longer text""" | |
global summarizer | |
if not summarizer: | |
return "Summarization model not available." | |
if not text or len(text.strip()) < 50: | |
return "Text too short for summarization." | |
try: | |
resp = summarizer(text) | |
return resp[0]['summary_text'] | |
except Exception as e: | |
print(f"Error during summarization: {e}") | |
return f"Summarization error: {str(e)}" | |
def fin_ext(text): | |
"""Analyze sentiment of each sentence in the text for highlighting""" | |
global fin_model | |
if not fin_model or not text: | |
return None | |
try: | |
results = fin_model(split_in_sentences(text)) | |
return make_spans(text, results) | |
except Exception as e: | |
print(f"Error during sentence-level sentiment analysis: {e}") | |
return None | |
def identify_entities(text): | |
"""Identify entities using NER model and spaCy as backup""" | |
global ner_model | |
if not text: | |
return None | |
try: | |
# First, try to use the transformer-based NER model | |
if ner_model: | |
entities = ner_model(text) | |
# Process NER results into spans format for HighlightedText | |
spans = [] | |
last_end = 0 | |
current_position = 0 | |
# Sort entities by their position | |
sorted_entities = sorted(entities, key=lambda x: x['start']) | |
for entity in sorted_entities: | |
# Get entity position and label | |
start = entity['start'] | |
end = entity['end'] | |
entity_text = entity['word'] | |
entity_type = entity['entity'] | |
# Add text before entity | |
if start > last_end: | |
spans.append((text[last_end:start], None)) | |
# Add the entity with its type | |
spans.append((entity_text, entity_type)) | |
last_end = end | |
# Add remaining text | |
if last_end < len(text): | |
spans.append((text[last_end:], None)) | |
return spans | |
# If transformer model failed, fallback to spaCy | |
else: | |
doc = nlp(text) | |
spans = [] | |
last_end = 0 | |
for ent in doc.ents: | |
if ent.label_ in ["GPE", "LOC", "ORG"]: # Only locations and organizations | |
start = text.find(ent.text, last_end) | |
if start != -1: | |
end = start + len(ent.text) | |
if start > last_end: | |
spans.append((text[last_end:start], None)) | |
spans.append((ent.text, ent.label_)) | |
last_end = end | |
if last_end < len(text): | |
spans.append((text[last_end:], None)) | |
return spans | |
except Exception as e: | |
print(f"Error during entity identification: {e}") | |
# Fallback to spaCy if error occurred | |
try: | |
doc = nlp(text) | |
spans = [] | |
for ent in doc.ents: | |
if ent.label_ in ["GPE", "LOC", "ORG"]: | |
spans.append((ent.text, ent.label_)) | |
# If no entities found, return special message | |
if not spans: | |
spans = [(text, None)] | |
return spans | |
except: | |
# Last resort | |
return [(text, None)] | |
def analyze_financial_text(text): | |
"""Master function that performs all analysis tasks""" | |
if not text or not text.strip(): | |
return None, "No summary available.", None, "No sentiment available." | |
# Generate summary | |
summary = summarize_text(text) | |
# Perform overall sentiment analysis | |
overall_sentiment = text_to_sentiment(text) | |
# Perform sentence-level sentiment analysis with highlighting | |
sentiment_spans = fin_ext(text) | |
# Identify entities with highlighting | |
entity_spans = identify_entities(text) | |
return sentiment_spans, summary, entity_spans, overall_sentiment | |
# Try to load models at app startup | |
try: | |
load_models() | |
except Exception as e: | |
print(f"Initial model loading failed: {e}") | |
# Gradio interface will still start, but functionality will be limited | |
# Gradio interface definition | |
app_title = "Financial Tone Analysis" | |
app_description = "The project will summarize financial news content, analyze financial sentiment, and flag relevant companies and countries" | |
with gr.Blocks(title=app_title) as iface: | |
gr.Markdown(f"# {app_title}") | |
gr.Markdown(app_description) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
input_text = gr.Textbox( | |
lines=10, | |
label="Financial News Text", | |
placeholder="Enter a longer financial news text here for analysis...", | |
value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month." | |
) | |
analyze_btn = gr.Button("Start Analysis", variant="primary") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("### Text Summary") | |
summary_output = gr.Textbox(label="Summary", lines=3) | |
with gr.Row(): | |
gr.Markdown("### Market sentiment") | |
with gr.Column(scale=1): | |
gr.Markdown("#### Overall Tone") | |
overall_sentiment_output = gr.Label(label="Document Sentiment") | |
with gr.Column(scale=2): | |
gr.Markdown("#### Sentence-by-Sentence Analysis") | |
sentiment_output = gr.HighlightedText(label="Financial Tone by Sentence") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("### Interested Parties") | |
entities_output = gr.HighlightedText(label="Identified Companies & Locations") | |
# Set up the click event for the analyze button | |
analyze_btn.click( | |
fn=analyze_financial_text, | |
inputs=[input_text], | |
outputs=[sentiment_output, summary_output, entities_output, overall_sentiment_output] | |
) | |
if __name__ == "__main__": | |
print("Starting Gradio application...") | |
# share=True will generate a public link | |
iface.launch(share=True) |