Spaces:

MatteoFasulo
/

SubjectivityDetection

Running

App Files Files Community

MatteoFasulo commited on 3 days ago

Commit

6c7f045

verified ·

1 Parent(s): da38f38

Dashboard-fixes (#1)

Browse files

- feat: enhanced GUI (63daa8ce6608610cb201ff8faeb918a65aa58d94)
- bug: removed share=True (ed1a96ef44b5680c941da2d9e14e98e4314f30a2)

Files changed (1) hide show

app.py +114 -82

app.py CHANGED Viewed

@@ -16,109 +16,102 @@ examples = [
     ["Boxing Day ambush & flagship attack Putin has long tried to downplay the true losses his army has faced in the Black Sea."],
 ]
-# Custom model class for combining sentiment analysis with subjectivity detection
 class CustomModel(PreTrainedModel):
     config_class = DebertaV2Config
     def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
         super().__init__(config, *args, **kwargs)
         self.deberta = DebertaV2Model(config)
         self.pooler = ContextPooler(config)
         output_dim = self.pooler.output_dim
         self.dropout = nn.Dropout(0.1)
         self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
     def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
         outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
         encoder_layer = outputs[0]
         pooled_output = self.pooler(encoder_layer)
-        # Sentiment features as a single tensor
-        sentiment_features = torch.stack((positive, neutral, negative), dim=1)  # Shape: (batch_size, 3)
-        # Combine CLS embedding with sentiment features
         combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
-        # Classification head
         logits = self.classifier(self.dropout(combined_features))
         return {'logits': logits}
-# Load the pre-trained tokenizer
 def load_tokenizer(model_name: str):
     return AutoTokenizer.from_pretrained(model_name)
-# Load the pre-trained model
 def load_model(model_name: str):
-    if 'sentiment' in model_name:
-        config = DebertaV2Config.from_pretrained(
-            model_name,
-            num_labels=2,
-            id2label={0: 'OBJ', 1: 'SUBJ'},
-            label2id={'OBJ': 0, 'SUBJ': 1},
-            output_attentions=False,
-            output_hidden_states=False
-        )
-        model = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
-    else:
-        model = AutoModelForSequenceClassification.from_pretrained(
-                model_name,
-                num_labels=2,
-                id2label={0: 'OBJ', 1: 'SUBJ'},
-                label2id={'OBJ': 0, 'SUBJ': 1},
-                output_attentions=False,
-                output_hidden_states=False
             )
-    return model
-# Get sentiment values using a pre-trained sentiment analysis model
 def get_sentiment_values(text: str):
-    pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment", tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment", top_k=None)
-    sentiments = pipe(text)[0]
-    return {k:v for k,v in [(list(sentiment.values())[0], list(sentiment.values())[1]) for sentiment in sentiments]}
 def analyze(text):
-    # Extract sentiment values
-    sentiment_values = get_sentiment_values(text)
-    # Load the tokenizer and model
     tokenizer = load_tokenizer(model_card)
     model_with_sentiment = load_model(sentiment_model)
     model_without_sentiment = load_model(subjectivity_only_model)
-    # Tokenize
-    inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
-    # Get the subjectivity model outputs
-    outputs_base = model_without_sentiment(**inputs)
     logits_base = outputs_base.get('logits')
-    # Calculate probabilities using softmax
     prob_base = torch.nn.functional.softmax(logits_base, dim=1)[0]
-    # Get the sentiment values
-    positive = sentiment_values['positive']
-    neutral = sentiment_values['neutral']
-    negative = sentiment_values['negative']
-    # Convert sentiment values to tensors
-    inputs['positive'] = torch.tensor(positive).unsqueeze(0)
-    inputs['neutral'] = torch.tensor(neutral).unsqueeze(0)
-    inputs['negative'] = torch.tensor(negative).unsqueeze(0)
-    # Get the sentiment model outputs
-    outputs_sentiment = model_with_sentiment(**inputs)
-    logits_sentiment = outputs_sentiment.get('logits')
-    # Calculate probabilities using softmax
     prob_sentiment = torch.nn.functional.softmax(logits_sentiment, dim=1)[0]
-    # Prepare data for the Dataframe (string values)
     table_data = [
         ["Positive", f"{positive:.2%}"],
         ["Neutral", f"{neutral:.2%}"],
@@ -128,31 +121,70 @@ def analyze(text):
         ["TextOnly OBJ", f"{prob_base[0]:.2%}"],
         ["TextOnly SUBJ", f"{prob_base[1]:.2%}"]
     ]
     return table_data
-# Update the Gradio interface
-with gr.Blocks(theme=gr.themes.Base()) as demo:
-    gr.Markdown("🚀 Advanced Subjectivity & Sentiment Dashboard 🚀")
-    with gr.Row():
-        txt = gr.Textbox(label="Enter text to analyze", placeholder="Paste news sentence here...", lines=2)
-        btn = gr.Button("Analyze 🔍", variant="primary")
     with gr.Tabs():
         with gr.TabItem("Raw Scores 📋"):
-            table = gr.Dataframe(headers=["Metric", "Value"], datatype=["str","str"], interactive=False)
         with gr.TabItem("About ℹ️"):
-            gr.Markdown("This dashboard uses two DeBERTa-based models (with and without sentiment integration) to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model.")
     with gr.Row():
         gr.Markdown("### Examples:")
-        gr.Examples(
-            examples=examples,
-            inputs=txt,
-            outputs=[table],
-            fn=analyze,
-            label="Examples",
-            cache_examples=True,
-        )
-    # Link inputs to outputs
     btn.click(fn=analyze, inputs=txt, outputs=[table])
 demo.queue().launch()

     ["Boxing Day ambush & flagship attack Putin has long tried to downplay the true losses his army has faced in the Black Sea."],
 ]
 class CustomModel(PreTrainedModel):
     config_class = DebertaV2Config
     def __init__(self, config, sentiment_dim=3, num_labels=2, *args, **kwargs):
         super().__init__(config, *args, **kwargs)
         self.deberta = DebertaV2Model(config)
         self.pooler = ContextPooler(config)
         output_dim = self.pooler.output_dim
         self.dropout = nn.Dropout(0.1)
         self.classifier = nn.Linear(output_dim + sentiment_dim, num_labels)
     def forward(self, input_ids, positive, neutral, negative, token_type_ids=None, attention_mask=None, labels=None):
         outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
         encoder_layer = outputs[0]
         pooled_output = self.pooler(encoder_layer)
+        sentiment_features = torch.stack((positive, neutral, negative), dim=1).to(pooled_output.dtype)
         combined_features = torch.cat((pooled_output, sentiment_features), dim=1)
         logits = self.classifier(self.dropout(combined_features))
         return {'logits': logits}
 def load_tokenizer(model_name: str):
     return AutoTokenizer.from_pretrained(model_name)
+load_model_cache = {}
 def load_model(model_name: str):
+    if model_name not in load_model_cache:
+        print(f"Loading model: {model_name}")
+        if 'sentiment' in model_name:
+            config = DebertaV2Config.from_pretrained(
+                model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1},
+                output_attentions=False, output_hidden_states=False
             )
+            model_instance = CustomModel(config=config, sentiment_dim=3, num_labels=2).from_pretrained(model_name)
+        else:
+            model_instance = AutoModelForSequenceClassification.from_pretrained(
+                model_name, num_labels=2, id2label={0: 'OBJ', 1: 'SUBJ'}, label2id={'OBJ': 0, 'SUBJ': 1},
+                output_attentions=False, output_hidden_states=False
+            )
+        load_model_cache[model_name] = model_instance
+    return load_model_cache[model_name]
+sentiment_pipeline_cache = None #
 def get_sentiment_values(text: str):
+    global sentiment_pipeline_cache
+    if sentiment_pipeline_cache is None:
+        print("Loading sentiment pipeline...")
+        sentiment_pipeline_cache = pipeline(
+            "sentiment-analysis",
+            model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
+            tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
+            top_k=None
+        )
+    sentiments_output = sentiment_pipeline_cache(text)
+    if sentiments_output and isinstance(sentiments_output, list) and sentiments_output[0]:
+        sentiments = sentiments_output[0]
+        return {s['label'].lower(): s['score'] for s in sentiments}
+    return {}
 def analyze(text):
+    if not text or not text.strip():
+        empty_data = [
+            ["Positive", ""], ["Neutral", ""], ["Negative", ""],
+            ["Sent-Subj OBJ", ""], ["Sent-Subj SUBJ", ""],
+            ["TextOnly OBJ", ""], ["TextOnly SUBJ", ""]
+        ]
+        return empty_data
+    sentiment_values = get_sentiment_values(text)
     tokenizer = load_tokenizer(model_card)
     model_with_sentiment = load_model(sentiment_model)
     model_without_sentiment = load_model(subjectivity_only_model)
+    inputs_dict = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors='pt')
+    device = next(model_without_sentiment.parameters()).device
+    inputs_dict_on_device = {k: v.to(device) for k, v in inputs_dict.items()}
+    outputs_base = model_without_sentiment(**inputs_dict_on_device)
     logits_base = outputs_base.get('logits')
     prob_base = torch.nn.functional.softmax(logits_base, dim=1)[0]
+    positive = sentiment_values.get('positive', 0.0)
+    neutral = sentiment_values.get('neutral', 0.0)
+    negative = sentiment_values.get('negative', 0.0)
+    current_inputs_for_sentiment_model = inputs_dict_on_device.copy()
+    current_inputs_for_sentiment_model['positive'] = torch.tensor(positive, device=device).unsqueeze(0).float()
+    current_inputs_for_sentiment_model['neutral'] = torch.tensor(neutral, device=device).unsqueeze(0).float()
+    current_inputs_for_sentiment_model['negative'] = torch.tensor(negative, device=device).unsqueeze(0).float()
+    outputs_sentiment = model_with_sentiment(**current_inputs_for_sentiment_model)
+    logits_sentiment = outputs_sentiment.get('logits')
     prob_sentiment = torch.nn.functional.softmax(logits_sentiment, dim=1)[0]
     table_data = [
         ["Positive", f"{positive:.2%}"],
         ["Neutral", f"{neutral:.2%}"],
         ["TextOnly OBJ", f"{prob_base[0]:.2%}"],
         ["TextOnly SUBJ", f"{prob_base[1]:.2%}"]
     ]
     return table_data
+def load_default_example_on_startup():
+    print("Loading default example on startup...")
+    if examples and examples[0] and isinstance(examples[0], list) and examples[0]:
+        default_text = examples[0][0]
+        default_analysis_results = analyze(default_text)
+        return default_text, default_analysis_results
+    print("Warning: No valid default example found. Loading empty.")
+    empty_text = ""
+    empty_results = analyze(empty_text)
+    return empty_text, empty_results
+with gr.Blocks(theme=gr.themes.Ocean(), title="Subjectivity & Sentiment Dashboard") as demo:
+    gr.Markdown("# 🚀 Subjectivity & Sentiment Analysis Dashboard 🚀")
+    with gr.Column():
+        txt = gr.Textbox(
+            label="Enter text to analyze",
+            placeholder="Paste news sentence here...",
+            lines=2,
+        )
+        with gr.Row():
+            gr.Column(scale=1, min_width=0)
+            btn = gr.Button(
+                "Analyze 🔍",
+                variant="primary",
+                size="md",
+                scale=0
+            )
     with gr.Tabs():
         with gr.TabItem("Raw Scores 📋"):
+            table = gr.Dataframe(
+                headers=["Metric", "Value"],
+                datatype=["str", "str"],
+                interactive=False
+            )
         with gr.TabItem("About ℹ️"):
+            gr.Markdown(
+                "This dashboard uses two DeBERTa-based models (with and without sentiment integration) "
+                "to detect subjectivity, alongside sentiment scores from an XLM-RoBERTa model."
+            )
     with gr.Row():
         gr.Markdown("### Examples:")
+    gr.Examples(
+        examples=examples,
+        inputs=txt,
+        outputs=[table],
+        fn=analyze,
+        label="Click an example to analyze",
+        cache_examples=True,
+    )
     btn.click(fn=analyze, inputs=txt, outputs=[table])
+    demo.load(
+        fn=load_default_example_on_startup,
+        inputs=None,
+        outputs=[txt, table]
+    )
 demo.queue().launch()