enoreyes commited on
Commit
6f93304
1 Parent(s): 5d9cd28

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +96 -78
utils.py CHANGED
@@ -47,116 +47,134 @@ def split_into_sentences(text):
47
  sentences = [s.strip() for s in sentences]
48
  return sentences
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- def summarize(diarized, check, summarization_pipeline):
52
  """
53
  diarized: a list of tuples. Each tuple has a string to be displayed and a label for highlighting.
54
  The start/end times are not highlighted [(speaker text, speaker id), (start time/end time, None)]
55
- check is a list of speaker ids whose speech will get summarized
56
- """
57
-
58
- if not check:
59
- return ""
60
-
61
- # Combine text based on the speaker id
62
- text_lines = [f"{d[1]}: {d[0]}" if len(check) == 2 and d[1] is not None else d[0] for d in diarized if d[1] in check]
63
- text = "\n".join(text_lines)
64
-
65
- # Cache the inner function because the outer function cannot be cached
66
- @functools.lru_cache(maxsize=128)
67
- def call_summarize_api(text):
68
- return summarization_pipeline(text)[0]["summary_text"]
69
-
70
- return call_summarize_api(text)
71
-
72
 
73
- # display if the sentiment value is above these thresholds
74
- thresholds = {
75
- "joy": 0.99,
76
- "anger": 0.95,
77
- "surprise": 0.95,
78
- "sadness": 0.98,
79
- "fear": 0.95,
80
- "love": 0.99,
81
- }
82
-
83
- color_map = {
84
- "joy": "green",
85
- "anger": "red",
86
- "surprise": "yellow",
87
- "sadness": "blue",
88
- "fear": "orange",
89
- "love": "purple",
90
- }
91
 
 
92
 
93
- def sentiment(diarized, emotion_pipeline):
94
- def split_into_intervals(speaker_speech, start_time, end_time):
95
- sentences = split_into_sentences(speaker_speech)
96
- interval_size = (end_time - start_time) / len(sentences)
97
- return sentences, interval_size
98
-
99
- def process_customer_emotion(outputs, sentences, start_time, interval_size):
100
- sentiments = []
101
- for idx, (o, t) in enumerate(zip(outputs, sentences)):
102
- sent = "neutral"
103
- if o["score"] > thresholds[o["label"]]:
104
- sentiments.append((t + f"({round(idx*interval_size+start_time,1)} s)", o["label"]))
105
- if o["label"] in {"joy", "love", "surprise"}:
106
- sent = "positive"
107
- elif o["label"] in {"sadness", "anger", "fear"}:
108
- sent = "negative"
109
- if sent != "neutral":
110
- to_plot.append((start_time + idx * interval_size, sent))
111
- plot_sentences.append(t)
112
- return sentiments
113
 
 
114
  x_min = 100
115
  x_max = 0
116
 
117
- customer_sentiments, to_plot, plot_sentences = [], [], []
118
-
119
  for i in range(0, len(diarized), 2):
120
  speaker_speech, speaker_id = diarized[i]
121
  times, _ = diarized[i + 1]
122
- start_time, end_time = map(float, times[5:].split("-"))
123
- x_min, x_max = min(x_min, start_time), max(x_max, end_time)
 
 
 
124
 
125
  if "Customer" in speaker_id:
126
- sentences, interval_size = split_into_intervals(speaker_speech, start_time, end_time)
127
  outputs = emotion_pipeline(sentences)
128
- customer_sentiments.extend(process_customer_emotion(outputs, sentences, start_time, interval_size))
129
 
130
- plot_df = pd.DataFrame(data={"x": [x for x, _ in to_plot], "y": [y for _, y in to_plot], "sentence": plot_sentences})
131
- fig = px.line(plot_df, x="x", y="y", hover_data={"sentence": True, "x": True, "y": False}, labels={"x": "time (seconds)", "y": "sentiment"}, title=f"Customer sentiment over time", markers=True)
132
- fig.update_yaxes(categoryorder="category ascending")
133
- fig.update_layout(font=dict(size=18), xaxis_range=[x_min - 5, x_max + 5])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  return customer_sentiments, fig
136
 
137
- def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
138
-
139
- def process_chunks(turn, chunks):
140
- diarized = ""
141
- i = 0
142
- while i < len(chunks) and chunks[i]["end"] <= turn.end:
143
- diarized += chunks[i]["text"] + " "
144
- i += 1
145
- return diarized, i
146
 
 
147
  speaker_output = speaker_segmentation(speech_file)
148
  result = whisper.transcribe(speech_file)
 
149
  chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
150
 
151
  diarized_output = []
152
  i = 0
153
  speaker_counter = 0
154
 
 
155
  for turn, _, _ in speaker_output.itertracks(yield_label=True):
 
156
  speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
157
- diarized, i = process_chunks(turn, chunks[i:])
158
- if diarized:
159
- diarized_output.extend([(diarized, speaker), (f"from {turn.start:.2f}-{turn.end:.2f}", None)])
 
 
 
 
 
 
 
 
 
 
 
 
160
  speaker_counter += 1
161
 
162
  return diarized_output
 
47
  sentences = [s.strip() for s in sentences]
48
  return sentences
49
 
50
+ # display if the sentiment value is above these thresholds
51
+ thresholds = {"joy": 0.99,"anger": 0.95,"surprise": 0.95,"sadness": 0.98,"fear": 0.95,"love": 0.99,}
52
+
53
+ color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}
54
+
55
+
56
+ def create_fig(x_min, x_max, plot_sentences):
57
+ x, y = list(zip(*to_plot))
58
+
59
+ plot_df = pd.DataFrame(
60
+ data={
61
+ "x": x,
62
+ "y": y,
63
+ "sentence": plot_sentences,
64
+ }
65
+ )
66
+
67
+ fig = px.line(
68
+ plot_df,
69
+ x="x",
70
+ y="y",
71
+ hover_data={
72
+ "sentence": True,
73
+ "x": True,
74
+ "y": False,
75
+ },
76
+ labels={"x": "time (seconds)", "y": "sentiment"},
77
+ title=f"Customer sentiment over time",
78
+ markers=True,
79
+ )
80
+
81
+ fig = fig.update_yaxes(categoryorder="category ascending")
82
+ fig = fig.update_layout(
83
+ font=dict(
84
+ size=18,
85
+ ),
86
+ xaxis_range=[x_min, x_max],
87
+ )
88
+
89
+ return fig
90
 
91
+ def sentiment(diarized, emotion_pipeline):
92
  """
93
  diarized: a list of tuples. Each tuple has a string to be displayed and a label for highlighting.
94
  The start/end times are not highlighted [(speaker text, speaker id), (start time/end time, None)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ This function gets the customer's sentiment and returns a list for highlighted text as well
97
+ as a plot of sentiment over time.
98
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ customer_sentiments = []
101
 
102
+ to_plot = []
103
+ plot_sentences = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # used to set the x range of ticks on the plot
106
  x_min = 100
107
  x_max = 0
108
 
 
 
109
  for i in range(0, len(diarized), 2):
110
  speaker_speech, speaker_id = diarized[i]
111
  times, _ = diarized[i + 1]
112
+
113
+ sentences = split_into_sentences(speaker_speech)
114
+ start_time, end_time = times[5:].split("-")
115
+ start_time, end_time = float(start_time), float(end_time)
116
+ interval_size = (end_time - start_time) / len(sentences)
117
 
118
  if "Customer" in speaker_id:
119
+
120
  outputs = emotion_pipeline(sentences)
 
121
 
122
+ for idx, (o, t) in enumerate(zip(outputs, sentences)):
123
+ sent = "neutral"
124
+ if o["score"] > thresholds[o["label"]]:
125
+ customer_sentiments.append(
126
+ (t + f"({round(idx*interval_size+start_time,1)} s)", o["label"])
127
+ )
128
+ if o["label"] in {"joy", "love", "surprise"}:
129
+ sent = "positive"
130
+ elif o["label"] in {"sadness", "anger", "fear"}:
131
+ sent = "negative"
132
+ if sent != "neutral":
133
+ to_plot.append((start_time + idx * interval_size, sent))
134
+ plot_sentences.append(t)
135
+
136
+ if start_time < x_min:
137
+ x_min = start_time
138
+ if end_time > x_max:
139
+ x_max = end_time
140
+
141
+ x_min -= 5
142
+ x_max += 5
143
+
144
+ fig = create_fig(x_min, x_max, plot_sentences)
145
 
146
  return customer_sentiments, fig
147
 
 
 
 
 
 
 
 
 
 
148
 
149
+ def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
150
  speaker_output = speaker_segmentation(speech_file)
151
  result = whisper.transcribe(speech_file)
152
+
153
  chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
154
 
155
  diarized_output = []
156
  i = 0
157
  speaker_counter = 0
158
 
159
+ # New iteration every time the speaker changes
160
  for turn, _, _ in speaker_output.itertracks(yield_label=True):
161
+
162
  speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
163
+ diarized = ""
164
+ while i < len(chunks) and chunks[i]["end"] <= turn.end:
165
+ diarized += chunks[i]["text"] + " "
166
+ i += 1
167
+
168
+ if diarized != "":
169
+ # diarized = rpunct.punctuate(re.sub(eng_pattern, "", diarized), lang="en")
170
+
171
+ diarized_output.extend(
172
+ [
173
+ (diarized, speaker),
174
+ ("from {:.2f}-{:.2f}".format(turn.start, turn.end), None),
175
+ ]
176
+ )
177
+
178
  speaker_counter += 1
179
 
180
  return diarized_output