rajistics enoreyes commited on
Commit
6152174
0 Parent(s):

Duplicate from enoreyes/call-sentiment-demo

Browse files

Co-authored-by: Eno Reyes <enoreyes@users.noreply.huggingface.co>

Files changed (9) hide show
  1. .gitattributes +31 -0
  2. Customer_Support_Call.wav +3 -0
  3. README.md +13 -0
  4. app.py +117 -0
  5. example_audio.wav +3 -0
  6. packages.txt +2 -0
  7. requirements.txt +12 -0
  8. short-take-1.wav +3 -0
  9. utils.py +116 -0
.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.wasm filter=lfs diff=lfs merge=lfs -text
25
+ *.xz filter=lfs diff=lfs merge=lfs -text
26
+ *.zip filter=lfs diff=lfs merge=lfs -text
27
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
28
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
29
+ example_audio.wav filter=lfs diff=lfs merge=lfs -text
30
+ short-take-1.wav filter=lfs diff=lfs merge=lfs -text
31
+ Customer_Support_Call.wav filter=lfs diff=lfs merge=lfs -text
Customer_Support_Call.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6489658bb04f84503531d628a67028de9d754ee0b18cf229f39deec7828001
3
+ size 31497612
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Call Sentiment Blocks 2
3
+ emoji: 🐠
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.11.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: enoreyes/call-sentiment-demo
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import functools
4
+ from functools import partial
5
+
6
+ import requests
7
+ import pandas as pd
8
+ import plotly.express as px
9
+
10
+ import torch
11
+ import gradio as gr
12
+ from transformers import pipeline, Wav2Vec2ProcessorWithLM
13
+ from pyannote.audio import Pipeline
14
+ import whisperx
15
+
16
+ from utils import split, create_fig
17
+ from utils import speech_to_text as stt
18
+
19
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
20
+ device = 0 if torch.cuda.is_available() else -1
21
+
22
+ # display if the sentiment value is above these thresholds
23
+ thresholds = {"joy": 0.99,"anger": 0.95,"surprise": 0.95,"sadness": 0.98,"fear": 0.95,"love": 0.99,}
24
+
25
+ color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}
26
+
27
+ # Audio components
28
+ whisper_device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ whisper = whisperx.load_model("tiny.en", whisper_device)
30
+ alignment_model, metadata = whisperx.load_align_model(language_code="en", device=whisper_device)
31
+ speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1",
32
+ use_auth_token=os.environ['ENO_TOKEN'])
33
+
34
+
35
+ # Text components
36
+ emotion_pipeline = pipeline(
37
+ "text-classification",
38
+ model="bhadresh-savani/distilbert-base-uncased-emotion",
39
+ device=device,
40
+ )
41
+ summarization_pipeline = pipeline(
42
+ "summarization",
43
+ model="knkarthick/MEETING_SUMMARY",
44
+ device=device
45
+ )
46
+
47
+ EXAMPLES = [["Customer_Support_Call.wav"]]
48
+
49
+
50
+ speech_to_text = partial(
51
+ stt,
52
+ speaker_segmentation=speaker_segmentation,
53
+ whisper=whisper,
54
+ alignment_model=alignment_model,
55
+ metadata=metadata,
56
+ whisper_device=whisper_device
57
+ )
58
+
59
+ def summarize(diarized, summarization_pipeline):
60
+ text = ""
61
+ for d in diarized:
62
+ text += f"\n{d[1]}: {d[0]}"
63
+
64
+ return summarization_pipeline(text)[0]["summary_text"]
65
+
66
+ def sentiment(diarized, emotion_pipeline):
67
+ customer_sentiments = []
68
+
69
+ for i in range(0, len(diarized), 2):
70
+ speaker_speech, speaker_id = diarized[i]
71
+ sentences = split(speaker_speech)
72
+
73
+ if "Customer" in speaker_id:
74
+ outputs = emotion_pipeline(sentences)
75
+ for idx, (o, t) in enumerate(zip(outputs, sentences)):
76
+ if o["score"] > thresholds[o["label"]]:
77
+ customer_sentiments.append((t, o["label"]))
78
+
79
+ return customer_sentiments
80
+
81
+ with gr.Blocks() as demo:
82
+
83
+ with gr.Row():
84
+ with gr.Column():
85
+ audio = gr.Audio(label="Audio file", type="filepath")
86
+ btn = gr.Button("Transcribe and Diarize")
87
+
88
+ gr.Markdown("**Call Transcript:**")
89
+ diarized = gr.HighlightedText(label="Call Transcript")
90
+ gr.Markdown("Summarize Speaker")
91
+ sum_btn = gr.Button("Get Summary")
92
+ summary = gr.Textbox(lines=4)
93
+ sentiment_btn = gr.Button("Get Customer Sentiment")
94
+ analyzed = gr.HighlightedText(color_map=color_map)
95
+
96
+ with gr.Column():
97
+ gr.Markdown("## Example Files")
98
+ gr.Examples(
99
+ examples=EXAMPLES,
100
+ inputs=[audio],
101
+ outputs=[diarized],
102
+ fn=speech_to_text,
103
+ cache_examples=True
104
+ )
105
+ # when example button is clicked, convert audio file to text and diarize
106
+ btn.click(
107
+ fn=speech_to_text,
108
+ inputs=audio,
109
+ outputs=diarized,
110
+ )
111
+ # when summarize checkboxes are changed, create summary
112
+ sum_btn.click(fn=partial(summarize, summarization_pipeline=summarization_pipeline), inputs=[diarized], outputs=summary)
113
+
114
+ # when sentiment button clicked, display highlighted text and plot
115
+ sentiment_btn.click(fn=partial(sentiment, emotion_pipeline=emotion_pipeline), inputs=diarized, outputs=[analyzed])
116
+
117
+ demo.launch(debug=1)
example_audio.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43166418f743e61807c7681944bf344c4720924adb4e5879dfa954dc7ecc82b2
3
+ size 3202638
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==1.11
2
+ transformers==4.26.1
3
+ torchvision==0.12.0
4
+ torchaudio==0.11.0
5
+ torchtext==0.12.0
6
+ speechbrain==0.5.12
7
+ pyannote.audio
8
+ librosa
9
+ requests
10
+ speechbrain
11
+ plotly
12
+ git+https://github.com/m-bain/whisperx.git
short-take-1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf15193510fc5a5680fdfdffda6c7cc5b8595bdde3d267b9ef5223e62035a952
3
+ size 20079500
utils.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import functools
3
+ import requests
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import torch
7
+ import gradio as gr
8
+ from transformers import pipeline, Wav2Vec2ProcessorWithLM
9
+ from pyannote.audio import Pipeline
10
+ from librosa import load, resample
11
+ import whisperx
12
+
13
+ import re
14
+ alphabets= "([A-Za-z])"
15
+ prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
16
+ suffixes = "(Inc|Ltd|Jr|Sr|Co)"
17
+ starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
18
+ acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
19
+ websites = "[.](com|net|org|io|gov)"
20
+
21
+ def split(text):
22
+ text = " " + text + " "
23
+ text = text.replace("\n"," ")
24
+ text = re.sub(prefixes,"\\1<prd>",text)
25
+ text = re.sub(websites,"<prd>\\1",text)
26
+ if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
27
+ text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
28
+ text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
29
+ text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
30
+ text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
31
+ text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
32
+ text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
33
+ text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
34
+ if "”" in text: text = text.replace(".”","”.")
35
+ if "\"" in text: text = text.replace(".\"","\".")
36
+ if "!" in text: text = text.replace("!\"","\"!")
37
+ if "?" in text: text = text.replace("?\"","\"?")
38
+ text = text.replace(".",".<stop>")
39
+ text = text.replace("?","?<stop>")
40
+ text = text.replace("!","!<stop>")
41
+ text = text.replace("<prd>",".")
42
+ sentences = text.split("<stop>")
43
+ sentences = sentences[:-1]
44
+ sentences = [s.strip() for s in sentences]
45
+ return sentences
46
+
47
+ def create_fig(x_min, x_max, to_plot, plot_sentences):
48
+ x, y = list(zip(*to_plot))
49
+
50
+ x_min -= 5
51
+ x_max += 5
52
+
53
+ plot_df = pd.DataFrame(
54
+ data={
55
+ "x": x,
56
+ "y": y,
57
+ "sentence": plot_sentences,
58
+ }
59
+ )
60
+
61
+ fig = px.line(
62
+ plot_df,
63
+ x="x",
64
+ y="y",
65
+ hover_data={
66
+ "sentence": True,
67
+ "x": True,
68
+ "y": False,
69
+ },
70
+ labels={"x": "time (seconds)", "y": "sentiment"},
71
+ title=f"Customer sentiment over time",
72
+ markers=True,
73
+ )
74
+
75
+ fig = fig.update_yaxes(categoryorder="category ascending")
76
+ fig = fig.update_layout(
77
+ font=dict(
78
+ size=18,
79
+ ),
80
+ xaxis_range=[x_min, x_max],
81
+ )
82
+
83
+ return fig
84
+
85
+ def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device):
86
+ speaker_output = speaker_segmentation(speech_file)
87
+ result = whisper.transcribe(speech_file)
88
+
89
+ chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"]
90
+
91
+ diarized_output = []
92
+ i = 0
93
+ speaker_counter = 0
94
+
95
+ # New iteration every time the speaker changes
96
+ for turn, _, _ in speaker_output.itertracks(yield_label=True):
97
+
98
+ speaker = "Customer" if speaker_counter % 2 == 0 else "Support"
99
+ diarized = ""
100
+ while i < len(chunks) and chunks[i]["end"] <= turn.end:
101
+ diarized += chunks[i]["text"] + " "
102
+ i += 1
103
+
104
+ if diarized != "":
105
+ # diarized = rpunct.punctuate(re.sub(eng_pattern, "", diarized), lang="en")
106
+
107
+ diarized_output.extend(
108
+ [
109
+ (diarized, speaker),
110
+ ("from {:.2f}-{:.2f}".format(turn.start, turn.end), None),
111
+ ]
112
+ )
113
+
114
+ speaker_counter += 1
115
+
116
+ return diarized_output