Spaces:
Build error
Build error
Update utils.py
Browse files
utils.py
CHANGED
@@ -47,116 +47,134 @@ def split_into_sentences(text):
|
|
47 |
sentences = [s.strip() for s in sentences]
|
48 |
return sentences
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
def
|
52 |
"""
|
53 |
diarized: a list of tuples. Each tuple has a string to be displayed and a label for highlighting.
|
54 |
The start/end times are not highlighted [(speaker text, speaker id), (start time/end time, None)]
|
55 |
-
check is a list of speaker ids whose speech will get summarized
|
56 |
-
"""
|
57 |
-
|
58 |
-
if not check:
|
59 |
-
return ""
|
60 |
-
|
61 |
-
# Combine text based on the speaker id
|
62 |
-
text_lines = [f"{d[1]}: {d[0]}" if len(check) == 2 and d[1] is not None else d[0] for d in diarized if d[1] in check]
|
63 |
-
text = "\n".join(text_lines)
|
64 |
-
|
65 |
-
# Cache the inner function because the outer function cannot be cached
|
66 |
-
@functools.lru_cache(maxsize=128)
|
67 |
-
def call_summarize_api(text):
|
68 |
-
return summarization_pipeline(text)[0]["summary_text"]
|
69 |
-
|
70 |
-
return call_summarize_api(text)
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
"
|
76 |
-
"anger": 0.95,
|
77 |
-
"surprise": 0.95,
|
78 |
-
"sadness": 0.98,
|
79 |
-
"fear": 0.95,
|
80 |
-
"love": 0.99,
|
81 |
-
}
|
82 |
-
|
83 |
-
color_map = {
|
84 |
-
"joy": "green",
|
85 |
-
"anger": "red",
|
86 |
-
"surprise": "yellow",
|
87 |
-
"sadness": "blue",
|
88 |
-
"fear": "orange",
|
89 |
-
"love": "purple",
|
90 |
-
}
|
91 |
|
|
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
sentences = split_into_sentences(speaker_speech)
|
96 |
-
interval_size = (end_time - start_time) / len(sentences)
|
97 |
-
return sentences, interval_size
|
98 |
-
|
99 |
-
def process_customer_emotion(outputs, sentences, start_time, interval_size):
|
100 |
-
sentiments = []
|
101 |
-
for idx, (o, t) in enumerate(zip(outputs, sentences)):
|
102 |
-
sent = "neutral"
|
103 |
-
if o["score"] > thresholds[o["label"]]:
|
104 |
-
sentiments.append((t + f"({round(idx*interval_size+start_time,1)} s)", o["label"]))
|
105 |
-
if o["label"] in {"joy", "love", "surprise"}:
|
106 |
-
sent = "positive"
|
107 |
-
elif o["label"] in {"sadness", "anger", "fear"}:
|
108 |
-
sent = "negative"
|
109 |
-
if sent != "neutral":
|
110 |
-
to_plot.append((start_time + idx * interval_size, sent))
|
111 |
-
plot_sentences.append(t)
|
112 |
-
return sentiments
|
113 |
|
|
|
114 |
x_min = 100
|
115 |
x_max = 0
|
116 |
|
117 |
-
customer_sentiments, to_plot, plot_sentences = [], [], []
|
118 |
-
|
119 |
for i in range(0, len(diarized), 2):
|
120 |
speaker_speech, speaker_id = diarized[i]
|
121 |
times, _ = diarized[i + 1]
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
124 |
|
125 |
if "Customer" in speaker_id:
|
126 |
-
|
127 |
outputs = emotion_pipeline(sentences)
|
128 |
-
customer_sentiments.extend(process_customer_emotion(outputs, sentences, start_time, interval_size))
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
return customer_sentiments, fig
|
136 |
|
137 |
-
def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
|
138 |
-
|
139 |
-
def process_chunks(turn, chunks):
|
140 |
-
diarized = ""
|
141 |
-
i = 0
|
142 |
-
while i < len(chunks) and chunks[i]["end"] <= turn.end:
|
143 |
-
diarized += chunks[i]["text"] + " "
|
144 |
-
i += 1
|
145 |
-
return diarized, i
|
146 |
|
|
|
147 |
speaker_output = speaker_segmentation(speech_file)
|
148 |
result = whisper.transcribe(speech_file)
|
|
|
149 |
chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
|
150 |
|
151 |
diarized_output = []
|
152 |
i = 0
|
153 |
speaker_counter = 0
|
154 |
|
|
|
155 |
for turn, _, _ in speaker_output.itertracks(yield_label=True):
|
|
|
156 |
speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
|
157 |
-
diarized
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
speaker_counter += 1
|
161 |
|
162 |
return diarized_output
|
|
|
47 |
sentences = [s.strip() for s in sentences]
|
48 |
return sentences
|
49 |
|
50 |
+
# display if the sentiment value is above these thresholds
|
51 |
+
thresholds = {"joy": 0.99,"anger": 0.95,"surprise": 0.95,"sadness": 0.98,"fear": 0.95,"love": 0.99,}
|
52 |
+
|
53 |
+
color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}
|
54 |
+
|
55 |
+
|
56 |
+
def create_fig(x_min, x_max, plot_sentences):
|
57 |
+
x, y = list(zip(*to_plot))
|
58 |
+
|
59 |
+
plot_df = pd.DataFrame(
|
60 |
+
data={
|
61 |
+
"x": x,
|
62 |
+
"y": y,
|
63 |
+
"sentence": plot_sentences,
|
64 |
+
}
|
65 |
+
)
|
66 |
+
|
67 |
+
fig = px.line(
|
68 |
+
plot_df,
|
69 |
+
x="x",
|
70 |
+
y="y",
|
71 |
+
hover_data={
|
72 |
+
"sentence": True,
|
73 |
+
"x": True,
|
74 |
+
"y": False,
|
75 |
+
},
|
76 |
+
labels={"x": "time (seconds)", "y": "sentiment"},
|
77 |
+
title=f"Customer sentiment over time",
|
78 |
+
markers=True,
|
79 |
+
)
|
80 |
+
|
81 |
+
fig = fig.update_yaxes(categoryorder="category ascending")
|
82 |
+
fig = fig.update_layout(
|
83 |
+
font=dict(
|
84 |
+
size=18,
|
85 |
+
),
|
86 |
+
xaxis_range=[x_min, x_max],
|
87 |
+
)
|
88 |
+
|
89 |
+
return fig
|
90 |
|
91 |
+
def sentiment(diarized, emotion_pipeline):
|
92 |
"""
|
93 |
diarized: a list of tuples. Each tuple has a string to be displayed and a label for highlighting.
|
94 |
The start/end times are not highlighted [(speaker text, speaker id), (start time/end time, None)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
This function gets the customer's sentiment and returns a list for highlighted text as well
|
97 |
+
as a plot of sentiment over time.
|
98 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
customer_sentiments = []
|
101 |
|
102 |
+
to_plot = []
|
103 |
+
plot_sentences = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
+
# used to set the x range of ticks on the plot
|
106 |
x_min = 100
|
107 |
x_max = 0
|
108 |
|
|
|
|
|
109 |
for i in range(0, len(diarized), 2):
|
110 |
speaker_speech, speaker_id = diarized[i]
|
111 |
times, _ = diarized[i + 1]
|
112 |
+
|
113 |
+
sentences = split_into_sentences(speaker_speech)
|
114 |
+
start_time, end_time = times[5:].split("-")
|
115 |
+
start_time, end_time = float(start_time), float(end_time)
|
116 |
+
interval_size = (end_time - start_time) / len(sentences)
|
117 |
|
118 |
if "Customer" in speaker_id:
|
119 |
+
|
120 |
outputs = emotion_pipeline(sentences)
|
|
|
121 |
|
122 |
+
for idx, (o, t) in enumerate(zip(outputs, sentences)):
|
123 |
+
sent = "neutral"
|
124 |
+
if o["score"] > thresholds[o["label"]]:
|
125 |
+
customer_sentiments.append(
|
126 |
+
(t + f"({round(idx*interval_size+start_time,1)} s)", o["label"])
|
127 |
+
)
|
128 |
+
if o["label"] in {"joy", "love", "surprise"}:
|
129 |
+
sent = "positive"
|
130 |
+
elif o["label"] in {"sadness", "anger", "fear"}:
|
131 |
+
sent = "negative"
|
132 |
+
if sent != "neutral":
|
133 |
+
to_plot.append((start_time + idx * interval_size, sent))
|
134 |
+
plot_sentences.append(t)
|
135 |
+
|
136 |
+
if start_time < x_min:
|
137 |
+
x_min = start_time
|
138 |
+
if end_time > x_max:
|
139 |
+
x_max = end_time
|
140 |
+
|
141 |
+
x_min -= 5
|
142 |
+
x_max += 5
|
143 |
+
|
144 |
+
fig = create_fig(x_min, x_max, plot_sentences)
|
145 |
|
146 |
return customer_sentiments, fig
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
|
150 |
speaker_output = speaker_segmentation(speech_file)
|
151 |
result = whisper.transcribe(speech_file)
|
152 |
+
|
153 |
chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
|
154 |
|
155 |
diarized_output = []
|
156 |
i = 0
|
157 |
speaker_counter = 0
|
158 |
|
159 |
+
# New iteration every time the speaker changes
|
160 |
for turn, _, _ in speaker_output.itertracks(yield_label=True):
|
161 |
+
|
162 |
speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
|
163 |
+
diarized = ""
|
164 |
+
while i < len(chunks) and chunks[i]["end"] <= turn.end:
|
165 |
+
diarized += chunks[i]["text"] + " "
|
166 |
+
i += 1
|
167 |
+
|
168 |
+
if diarized != "":
|
169 |
+
# diarized = rpunct.punctuate(re.sub(eng_pattern, "", diarized), lang="en")
|
170 |
+
|
171 |
+
diarized_output.extend(
|
172 |
+
[
|
173 |
+
(diarized, speaker),
|
174 |
+
("from {:.2f}-{:.2f}".format(turn.start, turn.end), None),
|
175 |
+
]
|
176 |
+
)
|
177 |
+
|
178 |
speaker_counter += 1
|
179 |
|
180 |
return diarized_output
|