File size: 10,147 Bytes
ef62cb6
 
 
 
 
 
 
529c0e6
ef62cb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
import gradio as gr
import json
import os
import nltk
import spacy
import re
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch

# Download necessary NLTK data for sentence tokenization
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

# Load spaCy model
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('sentencizer')

# Global loading of models and NLP components
fin_model = None
summarizer = None
ner_model = None
auth_token = os.environ.get("HF_Token")  # For NER model loading

def load_models():
    global fin_model, summarizer, ner_model
    
    # Load sentiment analysis model
    print("Loading sentiment model...")
    try:
        fin_model = pipeline("sentiment-analysis", model="ylingag/ISOM5240_financial_tone")
        print("Sentiment model loaded successfully.")
    except Exception as e:
        print(f"Failed to load sentiment model: {e}")
        fin_model = None
    
    # Load summarization model
    print("Loading summarization model...")
    try:
        summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
        print("Summarization model loaded successfully.")
    except Exception as e:
        print(f"Warning: Failed to load summarization model: {e}")
        print("Will continue without summarization capability.")
        summarizer = None
    
    # Load NER model directly using pipeline
    print("Loading NER model...")
    try:
        ner_model = pipeline("ner", model="dslim/bert-base-NER")
        print("NER model loaded successfully.")
    except Exception as e:
        print(f"Warning: Failed to load NER model: {e}")
        print("Will continue without NER capability.")
        ner_model = None

def split_in_sentences(text):
    """Split text into sentences"""
    doc = nlp(text)
    return [str(sent).strip() for sent in doc.sents]

def make_spans(text, results):
    """Create highlighted text spans with sentiment labels"""
    results_list = []
    for i in range(len(results)):
        # Ensure we display specific sentiment labels, not LABEL format
        label = results[i]['label']
        # If the label is in LABEL_ format, replace with specific sentiment terms
        if label.startswith("LABEL_"):
            if label == "LABEL_0":
                label = "Negative"
            elif label == "LABEL_1":
                label = "Neutral"
            elif label == "LABEL_2":
                label = "Positive"
        results_list.append(label)
    spans = list(zip(split_in_sentences(text), results_list))
    return spans

def text_to_sentiment(text):
    """Analyze overall sentiment of the text"""
    global fin_model
    if not fin_model:
        return "Sentiment model not available."
    
    if not text or not text.strip():
        return "Please enter text for analysis."
    
    try:
        sentiment = fin_model(text)[0]["label"]
        # If the label is in LABEL_ format, replace with specific sentiment terms
        if sentiment.startswith("LABEL_"):
            if sentiment == "LABEL_0":
                sentiment = "Negative"
            elif sentiment == "LABEL_1":
                sentiment = "Neutral"
            elif sentiment == "LABEL_2":
                sentiment = "Positive"
        return sentiment
    except Exception as e:
        print(f"Error during overall sentiment analysis: {e}")
        return f"Error: {str(e)}"

def summarize_text(text):
    """Generate a summary for longer text"""
    global summarizer
    if not summarizer:
        return "Summarization model not available."
    
    if not text or len(text.strip()) < 50:
        return "Text too short for summarization."
    
    try:
        resp = summarizer(text)
        return resp[0]['summary_text']
    except Exception as e:
        print(f"Error during summarization: {e}")
        return f"Summarization error: {str(e)}"

def fin_ext(text):
    """Analyze sentiment of each sentence in the text for highlighting"""
    global fin_model
    if not fin_model or not text:
        return None
    
    try:
        results = fin_model(split_in_sentences(text))
        return make_spans(text, results)
    except Exception as e:
        print(f"Error during sentence-level sentiment analysis: {e}")
        return None

def identify_entities(text):
    """Identify entities using NER model and spaCy as backup"""
    global ner_model
    if not text:
        return None
    
    try:
        # First, try to use the transformer-based NER model
        if ner_model:
            entities = ner_model(text)
            
            # Process NER results into spans format for HighlightedText
            spans = []
            last_end = 0
            current_position = 0
            
            # Sort entities by their position
            sorted_entities = sorted(entities, key=lambda x: x['start'])
            
            for entity in sorted_entities:
                # Get entity position and label
                start = entity['start']
                end = entity['end']
                entity_text = entity['word']
                entity_type = entity['entity']
                
                # Add text before entity
                if start > last_end:
                    spans.append((text[last_end:start], None))
                
                # Add the entity with its type
                spans.append((entity_text, entity_type))
                last_end = end
            
            # Add remaining text
            if last_end < len(text):
                spans.append((text[last_end:], None))
            
            return spans
        
        # If transformer model failed, fallback to spaCy
        else:
            doc = nlp(text)
            spans = []
            last_end = 0
            
            for ent in doc.ents:
                if ent.label_ in ["GPE", "LOC", "ORG"]:  # Only locations and organizations
                    start = text.find(ent.text, last_end)
                    if start != -1:
                        end = start + len(ent.text)
                        if start > last_end:
                            spans.append((text[last_end:start], None))
                        spans.append((ent.text, ent.label_))
                        last_end = end
            
            if last_end < len(text):
                spans.append((text[last_end:], None))
            
            return spans
            
    except Exception as e:
        print(f"Error during entity identification: {e}")
        # Fallback to spaCy if error occurred
        try:
            doc = nlp(text)
            spans = []
            for ent in doc.ents:
                if ent.label_ in ["GPE", "LOC", "ORG"]:
                    spans.append((ent.text, ent.label_))
            
            # If no entities found, return special message
            if not spans:
                spans = [(text, None)]
            
            return spans
        except:
            # Last resort
            return [(text, None)]

def analyze_financial_text(text):
    """Master function that performs all analysis tasks"""
    if not text or not text.strip():
        return None, "No summary available.", None, "No sentiment available."
    
    # Generate summary
    summary = summarize_text(text)
    
    # Perform overall sentiment analysis
    overall_sentiment = text_to_sentiment(text)
    
    # Perform sentence-level sentiment analysis with highlighting
    sentiment_spans = fin_ext(text)
    
    # Identify entities with highlighting
    entity_spans = identify_entities(text)
    
    return sentiment_spans, summary, entity_spans, overall_sentiment

# Try to load models at app startup
try:
    load_models()
except Exception as e:
    print(f"Initial model loading failed: {e}")
    # Gradio interface will still start, but functionality will be limited

# Gradio interface definition
app_title = "Financial Tone Analysis"
app_description = "The project will summarize financial news content, analyze financial sentiment, and flag relevant companies and countries"

with gr.Blocks(title=app_title) as iface:
    gr.Markdown(f"# {app_title}")
    gr.Markdown(app_description)
    
    with gr.Row():
        with gr.Column(scale=2):
            input_text = gr.Textbox(
                lines=10, 
                label="Financial News Text", 
                placeholder="Enter a longer financial news text here for analysis...",
                value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month."
            )
            analyze_btn = gr.Button("Start Analysis", variant="primary")
            
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Text Summary")
            summary_output = gr.Textbox(label="Summary", lines=3)
            
    with gr.Row():
        gr.Markdown("### Market sentiment")
        with gr.Column(scale=1):
            gr.Markdown("#### Overall Tone")
            overall_sentiment_output = gr.Label(label="Document Sentiment")
        with gr.Column(scale=2):
            gr.Markdown("#### Sentence-by-Sentence Analysis")
            sentiment_output = gr.HighlightedText(label="Financial Tone by Sentence")
            
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Interested Parties")
            entities_output = gr.HighlightedText(label="Identified Companies & Locations")
    
    # Set up the click event for the analyze button
    analyze_btn.click(
        fn=analyze_financial_text, 
        inputs=[input_text], 
        outputs=[sentiment_output, summary_output, entities_output, overall_sentiment_output]
    )

if __name__ == "__main__":
    print("Starting Gradio application...")
    # share=True will generate a public link
    iface.launch(share=True)