akashkumarbtc commited on
Commit
4519e61
1 Parent(s): 7f0685b

app.py file

Browse files
Files changed (1) hide show
  1. app.py +153 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import wave
3
+ import nltk
4
+ import torch
5
+ import torch
6
+ import openai
7
+ import whisper
8
+ import datetime
9
+ import requests
10
+ import subprocess
11
+ import contextlib
12
+ import numpy as np
13
+ import gradio as gr
14
+ from pyannote.audio import Audio
15
+ from pyannote.core import Segment
16
+ from sklearn.cluster import AgglomerativeClustering
17
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
18
+ from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
19
+
20
+
21
+ embedding_model = PretrainedSpeakerEmbedding(
22
+ "speechbrain/spkrec-ecapa-voxceleb",
23
+ device=torch.device("cpu"))
24
+
25
+ nltk.download('vader_lexicon')
26
+ sid = SentimentIntensityAnalyzer()
27
+ model = whisper.load_model('models/medium.pt')
28
+ audio = Audio()
29
+ openai.api_key = os.environ['OPEN_AI_API_KEY']
30
+
31
+ example_files = [
32
+ "https://pdf.bluetickconsultants.com/customer_support.mp3",
33
+ "https://pdf.bluetickconsultants.com/product_refund.mp3",
34
+ ]
35
+
36
+
37
+ file_names = []
38
+
39
+
40
+ def download_file(url, save_name):
41
+ url = url
42
+ if not os.path.exists(save_name):
43
+ file = requests.get(url)
44
+ open(save_name, 'wb').write(file.content)
45
+
46
+
47
+ for url in example_files:
48
+ save_name = str(url).split("/")[-1]
49
+ download_file(url, str(url).split("/")[-1])
50
+ file_names.append([save_name, 2])
51
+
52
+
53
+ def segment_embedding(segment, duration, audio_file):
54
+ start = segment["start"]
55
+ # Whisper overshoots the end timestamp in the last segment
56
+ end = min(duration, segment["end"])
57
+ clip = Segment(start, end)
58
+ waveform, sample_rate = audio.crop(audio_file, clip)
59
+ waveform = waveform.mean(dim=0, keepdim=True)
60
+ return embedding_model(waveform.unsqueeze(0))
61
+
62
+
63
+ def speech_to_text_and_sentiment(audio_file, number_of_speakers=2):
64
+
65
+ if audio_file[-3:] != 'wav':
66
+ audio_file_name = audio_file.split("/")[-1]
67
+ audio_file_name = audio_file_name.split(".")[0] + ".wav"
68
+ subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y'])
69
+ audio_file = audio_file_name
70
+
71
+ result = model.transcribe(audio_file)
72
+ segments = result["segments"]
73
+
74
+ with contextlib.closing(wave.open(audio_file, 'r')) as f:
75
+ frames = f.getnframes()
76
+ rate = f.getframerate()
77
+ duration = frames / float(rate)
78
+
79
+ embeddings = np.zeros(shape=(len(segments), 192))
80
+ for i, segment in enumerate(segments):
81
+ embeddings[i] = segment_embedding(segment, duration, audio_file)
82
+
83
+ embeddings = np.nan_to_num(embeddings)
84
+
85
+ clustering = AgglomerativeClustering(
86
+ int(number_of_speakers)).fit(embeddings)
87
+ labels = clustering.labels_
88
+ for i in range(len(segments)):
89
+ segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
90
+
91
+ def time(secs):
92
+ return datetime.timedelta(seconds=round(secs))
93
+
94
+ conv = ""
95
+
96
+ for (i, segment) in enumerate(segments):
97
+ if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
98
+ conv += "\n" + segment["speaker"] + ' ' + \
99
+ str(time(segment["start"])) + '\n'
100
+ conv += segment["text"][1:] + ' '
101
+
102
+ sentiment_scores = sid.polarity_scores(conv)
103
+
104
+ messages = [
105
+ {
106
+ "role": "system",
107
+ "content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points.
108
+ Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available.
109
+ """
110
+ },
111
+ {
112
+ "role": "user",
113
+ "content": conv
114
+ }
115
+ ]
116
+
117
+ response = openai.ChatCompletion.create(
118
+ model="gpt-3.5-turbo",
119
+ messages=messages,
120
+ temperature=0,
121
+ max_tokens=1000,
122
+ top_p=1,
123
+ frequency_penalty=0,
124
+ presence_penalty=0
125
+ )
126
+ call_summary = ""
127
+ call_summary += f"Sentiment Analysis:\nPositive: {sentiment_scores['pos']} | Negative: {sentiment_scores['neg']} | Neutral: {sentiment_scores['neu']}\n\n"
128
+ call_summary += response["choices"][0]["message"]["content"]
129
+
130
+ return conv, call_summary
131
+
132
+
133
+ demo = gr.Interface(
134
+ title="Bluetick Sales Call Evaluator",
135
+ description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis",
136
+ fn=speech_to_text_and_sentiment,
137
+ inputs=[
138
+ gr.Audio(label="Select audio file", type="filepath"),
139
+ gr.Number(label="Select number of speakers (1-5)",
140
+ default=2, type="number", min=1, max=5)
141
+ ],
142
+ outputs=[
143
+ gr.Textbox(label="Transcript"),
144
+ gr.Textbox(label="Analysis")
145
+ ],
146
+ examples=file_names,
147
+ theme=gr.themes.Default(primary_hue=gr.themes.colors.red,
148
+ secondary_hue=gr.themes.colors.pink),
149
+ css=" .gradio-title, .gradio-description {color: black;}",
150
+
151
+ )
152
+
153
+ demo.launch(debug=True)