File size: 5,245 Bytes
4519e61
 
 
 
 
 
 
97d1106
4519e61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc2de00
4519e61
 
 
 
de695ce
4519e61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97d1106
 
 
4519e61
97d1106
 
 
 
 
 
 
4519e61
 
97d1106
4519e61
 
 
 
 
 
 
 
 
 
 
 
4cfc376
4519e61
97d1106
4519e61
 
ddf065d
 
 
 
4519e61
 
 
 
57e04eb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import wave
import nltk
import torch
import torch
import openai
import whisper
import textstat
import datetime
import requests
import subprocess
import contextlib
import numpy as np
import gradio as gr
from pyannote.audio import Audio
from pyannote.core import Segment
from sklearn.cluster import AgglomerativeClustering
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding


embedding_model = PretrainedSpeakerEmbedding(
    "speechbrain/spkrec-ecapa-voxceleb",
    device=torch.device("cpu"))

nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()
model = whisper.load_model('base')
audio = Audio()
openai.api_key = os.environ['OPEN_AI_API_KEY']

example_files = [
    "https://pdf.bluetickconsultants.com/e-commerce-call.mp3",
    "https://pdf.bluetickconsultants.com/customer_support.mp3",
    "https://pdf.bluetickconsultants.com/product_refund.mp3",
]


file_names = []


def download_file(url, save_name):
    url = url
    if not os.path.exists(save_name):
        file = requests.get(url)
        open(save_name, 'wb').write(file.content)


for url in example_files:
    save_name = str(url).split("/")[-1]
    download_file(url, str(url).split("/")[-1])
    file_names.append([save_name, 2])


def segment_embedding(segment, duration, audio_file):
    start = segment["start"]
    # Whisper overshoots the end timestamp in the last segment
    end = min(duration, segment["end"])
    clip = Segment(start, end)
    waveform, sample_rate = audio.crop(audio_file, clip)
    waveform = waveform.mean(dim=0, keepdim=True)
    return embedding_model(waveform.unsqueeze(0))


def speech_to_text_and_sentiment(audio_file, number_of_speakers=2):

    if audio_file[-3:] != 'wav':
        audio_file_name = audio_file.split("/")[-1]
        audio_file_name = audio_file_name.split(".")[0] + ".wav"
        subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y'])
        audio_file = audio_file_name

    result = model.transcribe(audio_file)
    segments = result["segments"]

    with contextlib.closing(wave.open(audio_file, 'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)

    embeddings = np.zeros(shape=(len(segments), 192))
    for i, segment in enumerate(segments):
        embeddings[i] = segment_embedding(segment, duration, audio_file)

    embeddings = np.nan_to_num(embeddings)

    clustering = AgglomerativeClustering(
        int(number_of_speakers)).fit(embeddings)
    labels = clustering.labels_
    for i in range(len(segments)):
        segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)

    def time(secs):
        return datetime.timedelta(seconds=round(secs))

    conv = ""

    for (i, segment) in enumerate(segments):
        if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
            conv += "\n" + segment["speaker"] + ' ' + \
                str(time(segment["start"])) + '\n'
        conv += segment["text"][1:] + ' '

    sentiment_scores = sid.polarity_scores(conv)

    messages = [
        {
            "role": "system",
            "content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points. 
        Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available.
        """
        },
        {
            "role": "user",
            "content": conv
        }
    ]

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=0,
        max_tokens=1000,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    
    readability_score = textstat.flesch_reading_ease(conv)
    
    call_summary = ""
    call_summary += "Sentiment Analysis:\n" + "-------------------------------------\n"
    call_summary += f"Positive: {sentiment_scores['pos']} | Negative: {sentiment_scores['neg']} | Neutral: {sentiment_scores['neu']}\n\n"

    call_summary += "Readability/ Clarity of speach:\n" + "-------------------------------------\n"
    call_summary += f"Readability Score (Flesch-Kincaid): {readability_score}\n\n"

    call_summary += "Call Summary:\n" + "-------------------------------------\n"
    call_summary += response["choices"][0]["message"]["content"]

    return call_summary, conv


demo = gr.Interface(
    title="Bluetick Sales Call Evaluator",
    description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis",
    fn=speech_to_text_and_sentiment,
    inputs=[
        gr.Audio(label="Select audio file", type="filepath"),
        gr.Number(label="Select number of speakers (1-5)",
                  default=2, type="number", min=1, max=5)
    ],
    outputs=[
        gr.Textbox(label="Analysis & Summary"),
        gr.Textbox(label="Transcript"),
        
    ],
    examples=file_names,
    theme=gr.themes.Soft().set(
        body_text_color="black"
    ),
    css=" .gradio-container {background-color: white !important;} .prose  h1{color: black !important;} p {color: black !important;}",

)

demo.launch(debug=True)