Vadim Borisov commited on
Commit
0e52f59
1 Parent(s): 88d5272

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -25
app.py CHANGED
@@ -31,12 +31,12 @@ def split_text(text, max_length=512):
31
  sentences = text.split('. ')
32
  chunks = []
33
  current_chunk = ""
34
-
35
  for sentence in sentences:
36
  # Add the period back if it was removed during splitting
37
  if not sentence.endswith('.'):
38
  sentence += '.'
39
-
40
  # Check if adding the sentence exceeds the max_length
41
  encoded = tokenizer.encode(current_chunk + " " + sentence, truncation=False)
42
  if len(encoded) > max_length:
@@ -52,38 +52,57 @@ def split_text(text, max_length=512):
52
  current_chunk = ""
53
  else:
54
  current_chunk += " " + sentence
55
-
56
  if current_chunk:
57
  chunks.append(current_chunk.strip())
58
-
59
  return chunks
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  @spaces.GPU
62
  def analyze_sentiment(text, show_probabilities=False):
63
  """
64
  Analyzes the sentiment of the input text. If the text exceeds the token limit,
65
- it splits the text into chunks and aggregates the results.
66
  """
67
  try:
68
  chunks = split_text(text)
69
  all_probabilities = []
70
- all_predictions = []
71
  detailed_results = ""
72
 
73
  for idx, chunk in enumerate(chunks, 1):
74
  inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
75
-
76
  with torch.no_grad():
77
  outputs = model(**inputs)
78
-
79
  probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
80
  predicted_class = probabilities.argmax()
81
  predicted_sentiment = SENTIMENT_MAP[predicted_class]
82
  confidence = probabilities[predicted_class]
83
-
84
  all_probabilities.append(probabilities)
85
- all_predictions.append(predicted_class)
86
-
87
  if show_probabilities:
88
  detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
89
  for cls, prob in zip(SENTIMENT_MAP.values(), probabilities):
@@ -91,20 +110,20 @@ def analyze_sentiment(text, show_probabilities=False):
91
  detailed_results += "\n"
92
  else:
93
  detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
94
-
95
- # Aggregate results by averaging probabilities
96
- avg_probabilities = sum(all_probabilities) / len(all_probabilities)
97
- final_class = avg_probabilities.argmax()
98
- final_sentiment = SENTIMENT_MAP[final_class]
99
- final_confidence = avg_probabilities[final_class]
100
-
101
  result = f"**Overall Sentiment: {final_sentiment}**\nConfidence: {final_confidence:.2%}\n\n"
102
-
103
  if show_probabilities:
104
  result += "### Detailed Analysis:\n" + detailed_results
 
 
 
105
  else:
106
  result += "### Detailed Analysis:\n" + detailed_results
107
-
108
  return result
109
  except Exception as e:
110
  return f"An error occurred during sentiment analysis: {str(e)}"
@@ -117,7 +136,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
117
  Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: **Very Negative**, **Negative**, **Neutral**, **Positive**, or **Very Positive**.
118
  """
119
  )
120
-
121
  with gr.Row():
122
  with gr.Column():
123
  input_text = gr.Textbox(
@@ -132,27 +151,35 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
132
  analyze_button = gr.Button("Analyze Sentiment")
133
  with gr.Column():
134
  output = gr.Markdown(label="Result")
135
-
136
  with gr.Accordion("Examples", open=False):
137
  examples = [
138
  ["I absolutely loved this movie! The acting was superb and the plot was engaging.", True],
139
  ["The service at this restaurant was terrible. I'll never go back.", False],
140
  ["The product works as expected. Nothing special, but it gets the job done.", True],
141
  ["I'm somewhat disappointed with my purchase. It's not as good as I hoped.", False],
142
- ["This book changed my life! I couldn't put it down and learned so much.", True]
 
 
 
 
 
 
 
 
143
  ]
144
  gr.Examples(
145
  examples=examples,
146
  inputs=[input_text, show_probs],
147
  label="Predefined Examples"
148
  )
149
-
150
  analyze_button.click(
151
  fn=analyze_sentiment,
152
  inputs=[input_text, show_probs],
153
  outputs=output
154
  )
155
-
156
  gr.Markdown(
157
  """
158
  ---
 
31
  sentences = text.split('. ')
32
  chunks = []
33
  current_chunk = ""
34
+
35
  for sentence in sentences:
36
  # Add the period back if it was removed during splitting
37
  if not sentence.endswith('.'):
38
  sentence += '.'
39
+
40
  # Check if adding the sentence exceeds the max_length
41
  encoded = tokenizer.encode(current_chunk + " " + sentence, truncation=False)
42
  if len(encoded) > max_length:
 
52
  current_chunk = ""
53
  else:
54
  current_chunk += " " + sentence
55
+
56
  if current_chunk:
57
  chunks.append(current_chunk.strip())
58
+
59
  return chunks
60
 
61
+ def aggregate_sentiments(all_probabilities, threshold=0.7):
62
+ """
63
+ Aggregates the sentiment probabilities from all chunks.
64
+ Prioritizes extreme sentiments if any chunk has a high confidence in them.
65
+ Otherwise, uses weighted voting based on confidence scores.
66
+ """
67
+ aggregated_probs = torch.tensor(all_probabilities).mean(dim=0).numpy()
68
+ aggregated_confidence = torch.tensor(all_probabilities).mean(dim=0).max().item()
69
+ predicted_class = aggregated_probs.argmax()
70
+ final_sentiment = SENTIMENT_MAP[predicted_class]
71
+ final_confidence = aggregated_probs[predicted_class]
72
+
73
+ # Check for extreme sentiments with high confidence
74
+ for idx, prob in enumerate(aggregated_probs):
75
+ if (idx == 0 or idx == 4) and prob > threshold:
76
+ final_sentiment = SENTIMENT_MAP[idx]
77
+ final_confidence = prob
78
+ break
79
+
80
+ return final_sentiment, final_confidence, aggregated_probs
81
+
82
  @spaces.GPU
83
  def analyze_sentiment(text, show_probabilities=False):
84
  """
85
  Analyzes the sentiment of the input text. If the text exceeds the token limit,
86
+ it splits the text into chunks and aggregates the results intelligently.
87
  """
88
  try:
89
  chunks = split_text(text)
90
  all_probabilities = []
 
91
  detailed_results = ""
92
 
93
  for idx, chunk in enumerate(chunks, 1):
94
  inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
95
+
96
  with torch.no_grad():
97
  outputs = model(**inputs)
98
+
99
  probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
100
  predicted_class = probabilities.argmax()
101
  predicted_sentiment = SENTIMENT_MAP[predicted_class]
102
  confidence = probabilities[predicted_class]
103
+
104
  all_probabilities.append(probabilities)
105
+
 
106
  if show_probabilities:
107
  detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
108
  for cls, prob in zip(SENTIMENT_MAP.values(), probabilities):
 
110
  detailed_results += "\n"
111
  else:
112
  detailed_results += f"**Chunk {idx}: {predicted_sentiment} ({confidence:.2%})**\n"
113
+
114
+ # Aggregate results
115
+ final_sentiment, final_confidence, aggregated_probs = aggregate_sentiments(all_probabilities)
116
+
 
 
 
117
  result = f"**Overall Sentiment: {final_sentiment}**\nConfidence: {final_confidence:.2%}\n\n"
118
+
119
  if show_probabilities:
120
  result += "### Detailed Analysis:\n" + detailed_results
121
+ result += "### Aggregated Probabilities:\n"
122
+ for cls, prob in zip(SENTIMENT_MAP.values(), aggregated_probs):
123
+ result += f"{cls}: {prob:.2%}\n"
124
  else:
125
  result += "### Detailed Analysis:\n" + detailed_results
126
+
127
  return result
128
  except Exception as e:
129
  return f"An error occurred during sentiment analysis: {str(e)}"
 
136
  Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: **Very Negative**, **Negative**, **Neutral**, **Positive**, or **Very Positive**.
137
  """
138
  )
139
+
140
  with gr.Row():
141
  with gr.Column():
142
  input_text = gr.Textbox(
 
151
  analyze_button = gr.Button("Analyze Sentiment")
152
  with gr.Column():
153
  output = gr.Markdown(label="Result")
154
+
155
  with gr.Accordion("Examples", open=False):
156
  examples = [
157
  ["I absolutely loved this movie! The acting was superb and the plot was engaging.", True],
158
  ["The service at this restaurant was terrible. I'll never go back.", False],
159
  ["The product works as expected. Nothing special, but it gets the job done.", True],
160
  ["I'm somewhat disappointed with my purchase. It's not as good as I hoped.", False],
161
+ ["This book changed my life! I couldn't put it down and learned so much.", True],
162
+ [
163
+ """Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: Very Negative, Negative, Neutral, Positive, or Very Positive.
164
+
165
+ Discover the emotional tone behind any text with our advanced AI model! This app uses a state-of-the-art language model to analyze the sentiment of your text, classifying it into one of five categories: Very Negative, Negative, Neutral, Positive, or Very Positive.
166
+
167
+ FUCK YOU BITCH""",
168
+ True
169
+ ]
170
  ]
171
  gr.Examples(
172
  examples=examples,
173
  inputs=[input_text, show_probs],
174
  label="Predefined Examples"
175
  )
176
+
177
  analyze_button.click(
178
  fn=analyze_sentiment,
179
  inputs=[input_text, show_probs],
180
  outputs=output
181
  )
182
+
183
  gr.Markdown(
184
  """
185
  ---