Spaces:

vdmbrsv
/

sentiment-analysis-english-five-classes

Running on Zero

App Files Files Community

Vadim Borisov commited on Oct 14

Commit

0e52f59

•

1 Parent(s): 88d5272

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -25

app.py CHANGED Viewed

@@ -31,12 +31,12 @@ def split_text(text, max_length=512):
     sentences = text.split('. ')
     chunks = []
     current_chunk = ""
     for sentence in sentences:
         # Add the period back if it was removed during splitting
         if not sentence.endswith('.'):
             sentence += '.'
         # Check if adding the sentence exceeds the max_length
         encoded = tokenizer.encode(current_chunk + " " + sentence, truncation=False)
         if len(encoded) > max_length:
@@ -52,38 +52,57 @@ def split_text(text, max_length=512):
                 current_chunk = ""
         else:
             current_chunk += " " + sentence
     if current_chunk:
         chunks.append(current_chunk.strip())
     return chunks
 @spaces.GPU
 def analyze_sentiment(text, show_probabilities=False):
     """
     Analyzes the sentiment of the input text. If the text exceeds the token limit,
-    it splits the text into chunks and aggregates the results.
     """
     try:
         chunks = split_text(text)
         all_probabilities = []
-        all_predictions = []
         detailed_results = ""
         for idx, chunk in enumerate(chunks, 1):
             inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
             with torch.no_grad():
                 outputs = model(**inputs)
             probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
             predicted_class = probabilities.argmax()
             predicted_sentiment = SENTIMENT_MAP[predicted_class]
             confidence = probabilities[predicted_class]
             all_probabilities.append(probabilities)
-            all_predictions.append(predicted_class)
             if show_probabilities:
                 detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
                 for cls, prob in zip(SENTIMENT_MAP.values(), probabilities):
@@ -91,20 +110,20 @@ def analyze_sentiment(text, show_probabilities=False):
                 detailed_results += "\n"
             else:
                 detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
-        # Aggregate results by averaging probabilities
-        avg_probabilities = sum(all_probabilities) / len(all_probabilities)
-        final_class = avg_probabilities.argmax()
-        final_sentiment = SENTIMENT_MAP[final_class]
-        final_confidence = avg_probabilities[final_class]
         result = f"**Overall Sentiment: {final_sentiment}**\nConfidence: {final_confidence:.2%}\n\n"
         if show_probabilities:
             result += "### Detailed Analysis:\n" + detailed_results
         else:
             result += "### Detailed Analysis:\n" + detailed_results
         return result
     except Exception as e:
         return f"An error occurred during sentiment analysis: {str(e)}"
@@ -117,7 +136,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: **Very Negative**, **Negative**, **Neutral**, **Positive**, or **Very Positive**.
         """
     )
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(
@@ -132,27 +151,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             analyze_button = gr.Button("Analyze Sentiment")
         with gr.Column():
             output = gr.Markdown(label="Result")
     with gr.Accordion("Examples", open=False):
         examples = [
             ["I absolutely loved this movie! The acting was superb and the plot was engaging.", True],
             ["The service at this restaurant was terrible. I'll never go back.", False],
             ["The product works as expected. Nothing special, but it gets the job done.", True],
             ["I'm somewhat disappointed with my purchase. It's not as good as I hoped.", False],
-            ["This book changed my life! I couldn't put it down and learned so much.", True]
         ]
         gr.Examples(
             examples=examples,
             inputs=[input_text, show_probs],
             label="Predefined Examples"
         )
     analyze_button.click(
         fn=analyze_sentiment,
         inputs=[input_text, show_probs],
         outputs=output
     )
     gr.Markdown(
         """
         ---

     sentences = text.split('. ')
     chunks = []
     current_chunk = ""
     for sentence in sentences:
         # Add the period back if it was removed during splitting
         if not sentence.endswith('.'):
             sentence += '.'
         # Check if adding the sentence exceeds the max_length
         encoded = tokenizer.encode(current_chunk + " " + sentence, truncation=False)
         if len(encoded) > max_length:
                 current_chunk = ""
         else:
             current_chunk += " " + sentence
     if current_chunk:
         chunks.append(current_chunk.strip())
     return chunks
+def aggregate_sentiments(all_probabilities, threshold=0.7):
+    """
+    Aggregates the sentiment probabilities from all chunks.
+    Prioritizes extreme sentiments if any chunk has a high confidence in them.
+    Otherwise, uses weighted voting based on confidence scores.
+    """
+    aggregated_probs = torch.tensor(all_probabilities).mean(dim=0).numpy()
+    aggregated_confidence = torch.tensor(all_probabilities).mean(dim=0).max().item()
+    predicted_class = aggregated_probs.argmax()
+    final_sentiment = SENTIMENT_MAP[predicted_class]
+    final_confidence = aggregated_probs[predicted_class]
+    # Check for extreme sentiments with high confidence
+    for idx, prob in enumerate(aggregated_probs):
+        if (idx == 0 or idx == 4) and prob > threshold:
+            final_sentiment = SENTIMENT_MAP[idx]
+            final_confidence = prob
+            break
+    return final_sentiment, final_confidence, aggregated_probs
 @spaces.GPU
 def analyze_sentiment(text, show_probabilities=False):
     """
     Analyzes the sentiment of the input text. If the text exceeds the token limit,
+    it splits the text into chunks and aggregates the results intelligently.
     """
     try:
         chunks = split_text(text)
         all_probabilities = []
         detailed_results = ""
         for idx, chunk in enumerate(chunks, 1):
             inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
             with torch.no_grad():
                 outputs = model(**inputs)
             probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
             predicted_class = probabilities.argmax()
             predicted_sentiment = SENTIMENT_MAP[predicted_class]
             confidence = probabilities[predicted_class]
             all_probabilities.append(probabilities)
             if show_probabilities:
                 detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
                 for cls, prob in zip(SENTIMENT_MAP.values(), probabilities):
                 detailed_results += "\n"
             else:
                 detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
+        # Aggregate results
+        final_sentiment, final_confidence, aggregated_probs = aggregate_sentiments(all_probabilities)
         result = f"**Overall Sentiment: {final_sentiment}**\nConfidence: {final_confidence:.2%}\n\n"
         if show_probabilities:
             result += "### Detailed Analysis:\n" + detailed_results
+            result += "### Aggregated Probabilities:\n"
+            for cls, prob in zip(SENTIMENT_MAP.values(), aggregated_probs):
+                result += f"{cls}: {prob:.2%}\n"
         else:
             result += "### Detailed Analysis:\n" + detailed_results
         return result
     except Exception as e:
         return f"An error occurred during sentiment analysis: {str(e)}"
         Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: **Very Negative**, **Negative**, **Neutral**, **Positive**, or **Very Positive**.
         """
     )
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(
             analyze_button = gr.Button("Analyze Sentiment")
         with gr.Column():
             output = gr.Markdown(label="Result")
     with gr.Accordion("Examples", open=False):
         examples = [
             ["I absolutely loved this movie! The acting was superb and the plot was engaging.", True],
             ["The service at this restaurant was terrible. I'll never go back.", False],
             ["The product works as expected. Nothing special, but it gets the job done.", True],
             ["I'm somewhat disappointed with my purchase. It's not as good as I hoped.", False],
+            ["This book changed my life! I couldn't put it down and learned so much.", True],
+            [
+                """Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: Very Negative, Negative, Neutral, Positive, or Very Positive.
+                Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: Very Negative, Negative, Neutral, Positive, or Very Positive.
+                FUCK YOU BITCH""",
+                True
+            ]
         ]
         gr.Examples(
             examples=examples,
             inputs=[input_text, show_probs],
             label="Predefined Examples"
         )
     analyze_button.click(
         fn=analyze_sentiment,
         inputs=[input_text, show_probs],
         outputs=output
     )
     gr.Markdown(
         """
         ---