Spaces:

akashkumarbtc
/

bluetick-sales-call-evaluator

Runtime error

App Files Files Community

akashkumarbtc commited on Oct 11, 2023

Commit

4519e61

•

1 Parent(s): 7f0685b

app.py file

Browse files

Files changed (1) hide show

app.py +153 -0

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import os
+import wave
+import nltk
+import torch
+import torch
+import openai
+import whisper
+import datetime
+import requests
+import subprocess
+import contextlib
+import numpy as np
+import gradio as gr
+from pyannote.audio import Audio
+from pyannote.core import Segment
+from sklearn.cluster import AgglomerativeClustering
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
+embedding_model = PretrainedSpeakerEmbedding(
+    "speechbrain/spkrec-ecapa-voxceleb",
+    device=torch.device("cpu"))
+nltk.download('vader_lexicon')
+sid = SentimentIntensityAnalyzer()
+model = whisper.load_model('models/medium.pt')
+audio = Audio()
+openai.api_key = os.environ['OPEN_AI_API_KEY']
+example_files = [
+    "https://pdf.bluetickconsultants.com/customer_support.mp3",
+    "https://pdf.bluetickconsultants.com/product_refund.mp3",
+]
+file_names = []
+def download_file(url, save_name):
+    url = url
+    if not os.path.exists(save_name):
+        file = requests.get(url)
+        open(save_name, 'wb').write(file.content)
+for url in example_files:
+    save_name = str(url).split("/")[-1]
+    download_file(url, str(url).split("/")[-1])
+    file_names.append([save_name, 2])
+def segment_embedding(segment, duration, audio_file):
+    start = segment["start"]
+    # Whisper overshoots the end timestamp in the last segment
+    end = min(duration, segment["end"])
+    clip = Segment(start, end)
+    waveform, sample_rate = audio.crop(audio_file, clip)
+    waveform = waveform.mean(dim=0, keepdim=True)
+    return embedding_model(waveform.unsqueeze(0))
+def speech_to_text_and_sentiment(audio_file, number_of_speakers=2):
+    if audio_file[-3:] != 'wav':
+        audio_file_name = audio_file.split("/")[-1]
+        audio_file_name = audio_file_name.split(".")[0] + ".wav"
+        subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y'])
+        audio_file = audio_file_name
+    result = model.transcribe(audio_file)
+    segments = result["segments"]
+    with contextlib.closing(wave.open(audio_file, 'r')) as f:
+        frames = f.getnframes()
+        rate = f.getframerate()
+        duration = frames / float(rate)
+    embeddings = np.zeros(shape=(len(segments), 192))
+    for i, segment in enumerate(segments):
+        embeddings[i] = segment_embedding(segment, duration, audio_file)
+    embeddings = np.nan_to_num(embeddings)
+    clustering = AgglomerativeClustering(
+        int(number_of_speakers)).fit(embeddings)
+    labels = clustering.labels_
+    for i in range(len(segments)):
+        segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
+    def time(secs):
+        return datetime.timedelta(seconds=round(secs))
+    conv = ""
+    for (i, segment) in enumerate(segments):
+        if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
+            conv += "\n" + segment["speaker"] + ' ' + \
+                str(time(segment["start"])) + '\n'
+        conv += segment["text"][1:] + ' '
+    sentiment_scores = sid.polarity_scores(conv)
+    messages = [
+        {
+            "role": "system",
+            "content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points.
+        Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available.
+        """
+        },
+        {
+            "role": "user",
+            "content": conv
+        }
+    ]
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        temperature=0,
+        max_tokens=1000,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0
+    )
+    call_summary = ""
+    call_summary += f"Sentiment Analysis:\nPositive: {sentiment_scores['pos']} | Negative: {sentiment_scores['neg']} | Neutral: {sentiment_scores['neu']}\n\n"
+    call_summary += response["choices"][0]["message"]["content"]
+    return conv, call_summary
+demo = gr.Interface(
+    title="Bluetick Sales Call Evaluator",
+    description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis",
+    fn=speech_to_text_and_sentiment,
+    inputs=[
+        gr.Audio(label="Select audio file", type="filepath"),
+        gr.Number(label="Select number of speakers (1-5)",
+                  default=2, type="number", min=1, max=5)
+    ],
+    outputs=[
+        gr.Textbox(label="Transcript"),
+        gr.Textbox(label="Analysis")
+    ],
+    examples=file_names,
+    theme=gr.themes.Default(primary_hue=gr.themes.colors.red,
+                            secondary_hue=gr.themes.colors.pink),
+    css=" .gradio-title, .gradio-description {color: black;}",
+)
+demo.launch(debug=True)