Spaces:
Runtime error
Runtime error
import os | |
import wave | |
import nltk | |
import torch | |
import torch | |
import openai | |
import whisper | |
import datetime | |
import requests | |
import subprocess | |
import contextlib | |
import numpy as np | |
import gradio as gr | |
from pyannote.audio import Audio | |
from pyannote.core import Segment | |
from sklearn.cluster import AgglomerativeClustering | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding | |
embedding_model = PretrainedSpeakerEmbedding( | |
"speechbrain/spkrec-ecapa-voxceleb", | |
device=torch.device("cpu")) | |
nltk.download('vader_lexicon') | |
sid = SentimentIntensityAnalyzer() | |
model = whisper.load_model('base') | |
audio = Audio() | |
openai.api_key = os.environ['OPEN_AI_API_KEY'] | |
example_files = [ | |
"https://pdf.bluetickconsultants.com/e-commerce-call.mp3", | |
"https://pdf.bluetickconsultants.com/customer_support.mp3", | |
"https://pdf.bluetickconsultants.com/product_refund.mp3", | |
] | |
file_names = [] | |
def download_file(url, save_name): | |
url = url | |
if not os.path.exists(save_name): | |
file = requests.get(url) | |
open(save_name, 'wb').write(file.content) | |
for url in example_files: | |
save_name = str(url).split("/")[-1] | |
download_file(url, str(url).split("/")[-1]) | |
file_names.append([save_name, 2]) | |
def segment_embedding(segment, duration, audio_file): | |
start = segment["start"] | |
# Whisper overshoots the end timestamp in the last segment | |
end = min(duration, segment["end"]) | |
clip = Segment(start, end) | |
waveform, sample_rate = audio.crop(audio_file, clip) | |
waveform = waveform.mean(dim=0, keepdim=True) | |
return embedding_model(waveform.unsqueeze(0)) | |
def speech_to_text_and_sentiment(audio_file, number_of_speakers=2): | |
if audio_file[-3:] != 'wav': | |
audio_file_name = audio_file.split("/")[-1] | |
audio_file_name = audio_file_name.split(".")[0] + ".wav" | |
subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y']) | |
audio_file = audio_file_name | |
result = model.transcribe(audio_file) | |
segments = result["segments"] | |
with contextlib.closing(wave.open(audio_file, 'r')) as f: | |
frames = f.getnframes() | |
rate = f.getframerate() | |
duration = frames / float(rate) | |
embeddings = np.zeros(shape=(len(segments), 192)) | |
for i, segment in enumerate(segments): | |
embeddings[i] = segment_embedding(segment, duration, audio_file) | |
embeddings = np.nan_to_num(embeddings) | |
clustering = AgglomerativeClustering( | |
int(number_of_speakers)).fit(embeddings) | |
labels = clustering.labels_ | |
for i in range(len(segments)): | |
segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1) | |
def time(secs): | |
return datetime.timedelta(seconds=round(secs)) | |
conv = "" | |
for (i, segment) in enumerate(segments): | |
if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]: | |
conv += "\n" + segment["speaker"] + ' ' + \ | |
str(time(segment["start"])) + '\n' | |
conv += segment["text"][1:] + ' ' | |
sentiment_scores = sid.polarity_scores(conv) | |
messages = [ | |
{ | |
"role": "system", | |
"content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points. | |
Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available. | |
""" | |
}, | |
{ | |
"role": "user", | |
"content": conv | |
} | |
] | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=messages, | |
temperature=0, | |
max_tokens=1000, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
) | |
call_summary = "" | |
call_summary += f"Sentiment Analysis:\nPositive: {sentiment_scores['pos']} | Negative: {sentiment_scores['neg']} | Neutral: {sentiment_scores['neu']}\n\n" | |
call_summary += response["choices"][0]["message"]["content"] | |
return conv, call_summary | |
demo = gr.Interface( | |
title="Bluetick Sales Call Evaluator", | |
description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis", | |
fn=speech_to_text_and_sentiment, | |
inputs=[ | |
gr.Audio(label="Select audio file", type="filepath"), | |
gr.Number(label="Select number of speakers (1-5)", | |
default=2, type="number", min=1, max=5) | |
], | |
outputs=[ | |
gr.Textbox(label="Transcript"), | |
gr.Textbox(label="Analysis") | |
], | |
examples=file_names, | |
theme=gr.themes.Default(primary_hue=gr.themes.colors.red, | |
secondary_hue=gr.themes.colors.pink), | |
css=" .gradio-title, .gradio-description {color: black;}", | |
) | |
demo.launch(debug=True) | |