call-sentiment-demo

Build error

App Files Files Community

enoreyes commited on Mar 20, 2023

Commit

6f93304

•

1 Parent(s): 5d9cd28

Update utils.py

Browse files

Files changed (1) hide show

utils.py +96 -78

utils.py CHANGED Viewed

@@ -47,116 +47,134 @@ def split_into_sentences(text):
     sentences = [s.strip() for s in sentences]
     return sentences
-def summarize(diarized, check, summarization_pipeline):
     """
     diarized: a list of tuples. Each tuple has a string to be displayed and a label for highlighting.
         The start/end times are not highlighted [(speaker text, speaker id), (start time/end time, None)]
-    check is a list of speaker ids whose speech will get summarized
-    """
-    if not check:
-        return ""
-    # Combine text based on the speaker id
-    text_lines = [f"{d[1]}: {d[0]}" if len(check) == 2 and d[1] is not None else d[0] for d in diarized if d[1] in check]
-    text = "\n".join(text_lines)
-    # Cache the inner function because the outer function cannot be cached
-    @functools.lru_cache(maxsize=128)
-    def call_summarize_api(text):
-        return summarization_pipeline(text)[0]["summary_text"]
-    return call_summarize_api(text)
-# display if the sentiment value is above these thresholds
-thresholds = {
-    "joy": 0.99,
-    "anger": 0.95,
-    "surprise": 0.95,
-    "sadness": 0.98,
-    "fear": 0.95,
-    "love": 0.99,
-}
-color_map = {
-    "joy": "green",
-    "anger": "red",
-    "surprise": "yellow",
-    "sadness": "blue",
-    "fear": "orange",
-    "love": "purple",
-}
-def sentiment(diarized, emotion_pipeline):
-    def split_into_intervals(speaker_speech, start_time, end_time):
-        sentences = split_into_sentences(speaker_speech)
-        interval_size = (end_time - start_time) / len(sentences)
-        return sentences, interval_size
-    def process_customer_emotion(outputs, sentences, start_time, interval_size):
-        sentiments = []
-        for idx, (o, t) in enumerate(zip(outputs, sentences)):
-            sent = "neutral"
-            if o["score"] > thresholds[o["label"]]:
-                sentiments.append((t + f"({round(idx*interval_size+start_time,1)} s)", o["label"]))
-                if o["label"] in {"joy", "love", "surprise"}:
-                    sent = "positive"
-                elif o["label"] in {"sadness", "anger", "fear"}:
-                    sent = "negative"
-            if sent != "neutral":
-                to_plot.append((start_time + idx * interval_size, sent))
-                plot_sentences.append(t)
-        return sentiments
     x_min = 100
     x_max = 0
-    customer_sentiments, to_plot, plot_sentences = [], [], []
     for i in range(0, len(diarized), 2):
         speaker_speech, speaker_id = diarized[i]
         times, _ = diarized[i + 1]
-        start_time, end_time = map(float, times[5:].split("-"))
-        x_min, x_max = min(x_min, start_time), max(x_max, end_time)
         if "Customer" in speaker_id:
-            sentences, interval_size = split_into_intervals(speaker_speech, start_time, end_time)
             outputs = emotion_pipeline(sentences)
-            customer_sentiments.extend(process_customer_emotion(outputs, sentences, start_time, interval_size))
-    plot_df = pd.DataFrame(data={"x": [x for x, _ in to_plot], "y": [y for _, y in to_plot], "sentence": plot_sentences})
-    fig = px.line(plot_df, x="x", y="y", hover_data={"sentence": True, "x": True, "y": False}, labels={"x": "time (seconds)", "y": "sentiment"}, title=f"Customer sentiment over time", markers=True)
-    fig.update_yaxes(categoryorder="category ascending")
-    fig.update_layout(font=dict(size=18), xaxis_range=[x_min - 5, x_max + 5])
     return customer_sentiments, fig
-def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
-    def process_chunks(turn, chunks):
-        diarized = ""
-        i = 0
-        while i < len(chunks) and chunks[i]["end"] <= turn.end:
-            diarized += chunks[i]["text"] + " "
-            i += 1
-        return diarized, i
     speaker_output = speaker_segmentation(speech_file)
     result = whisper.transcribe(speech_file)
     chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
     diarized_output = []
     i = 0
     speaker_counter = 0
     for turn, _, _ in speaker_output.itertracks(yield_label=True):
         speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
-        diarized, i = process_chunks(turn, chunks[i:])
-        if diarized:
-            diarized_output.extend([(diarized, speaker), (f"from {turn.start:.2f}-{turn.end:.2f}", None)])
             speaker_counter += 1
     return diarized_output

     sentences = [s.strip() for s in sentences]
     return sentences
+# display if the sentiment value is above these thresholds
+thresholds = {"joy": 0.99,"anger": 0.95,"surprise": 0.95,"sadness": 0.98,"fear": 0.95,"love": 0.99,}
+color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}
+def create_fig(x_min, x_max, plot_sentences):
+    x, y = list(zip(*to_plot))
+    plot_df = pd.DataFrame(
+        data={
+            "x": x,
+            "y": y,
+            "sentence": plot_sentences,
+        }
+    )
+    fig = px.line(
+        plot_df,
+        x="x",
+        y="y",
+        hover_data={
+            "sentence": True,
+            "x": True,
+            "y": False,
+        },
+        labels={"x": "time (seconds)", "y": "sentiment"},
+        title=f"Customer sentiment over time",
+        markers=True,
+    )
+    fig = fig.update_yaxes(categoryorder="category ascending")
+    fig = fig.update_layout(
+        font=dict(
+            size=18,
+        ),
+        xaxis_range=[x_min, x_max],
+    )
+    return fig
+def sentiment(diarized, emotion_pipeline):
     """
     diarized: a list of tuples. Each tuple has a string to be displayed and a label for highlighting.
         The start/end times are not highlighted [(speaker text, speaker id), (start time/end time, None)]
+    This function gets the customer's sentiment and returns a list for highlighted text as well
+    as a plot of sentiment over time.
+    """
+    customer_sentiments = []
+    to_plot = []
+    plot_sentences = []
+    # used to set the x range of ticks on the plot
     x_min = 100
     x_max = 0
     for i in range(0, len(diarized), 2):
         speaker_speech, speaker_id = diarized[i]
         times, _ = diarized[i + 1]
+        sentences = split_into_sentences(speaker_speech)
+        start_time, end_time = times[5:].split("-")
+        start_time, end_time = float(start_time), float(end_time)
+        interval_size = (end_time - start_time) / len(sentences)
         if "Customer" in speaker_id:
             outputs = emotion_pipeline(sentences)
+            for idx, (o, t) in enumerate(zip(outputs, sentences)):
+                sent = "neutral"
+                if o["score"] > thresholds[o["label"]]:
+                    customer_sentiments.append(
+                        (t + f"({round(idx*interval_size+start_time,1)} s)", o["label"])
+                    )
+                    if o["label"] in {"joy", "love", "surprise"}:
+                        sent = "positive"
+                    elif o["label"] in {"sadness", "anger", "fear"}:
+                        sent = "negative"
+                if sent != "neutral":
+                    to_plot.append((start_time + idx * interval_size, sent))
+                    plot_sentences.append(t)
+            if start_time < x_min:
+                x_min = start_time
+            if end_time > x_max:
+                x_max = end_time
+    x_min -= 5
+    x_max += 5
+    fig = create_fig(x_min, x_max, plot_sentences)
     return customer_sentiments, fig
+def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
     speaker_output = speaker_segmentation(speech_file)
     result = whisper.transcribe(speech_file)
     chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
     diarized_output = []
     i = 0
     speaker_counter = 0
+    # New iteration every time the speaker changes
     for turn, _, _ in speaker_output.itertracks(yield_label=True):
         speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
+        diarized = ""
+        while i < len(chunks) and chunks[i]["end"] <= turn.end:
+            diarized += chunks[i]["text"] + " "
+            i += 1
+        if diarized != "":
+            # diarized = rpunct.punctuate(re.sub(eng_pattern, "", diarized), lang="en")
+            diarized_output.extend(
+                [
+                    (diarized, speaker),
+                    ("from {:.2f}-{:.2f}".format(turn.start, turn.end), None),
+                ]
+            )
             speaker_counter += 1
     return diarized_output