Spaces:

akashkumarbtc
/

bluetick-sales-call-evaluator

Runtime error

App Files Files Community

bluetick-sales-call-evaluator / app.py

akashkumarbtc

added new audio example

de695ce 12 months ago

raw

history blame

No virus

4.79 kB

	import os
	import wave
	import nltk
	import torch
	import torch
	import openai
	import whisper
	import datetime
	import requests
	import subprocess
	import contextlib
	import numpy as np
	import gradio as gr
	from pyannote.audio import Audio
	from pyannote.core import Segment
	from sklearn.cluster import AgglomerativeClustering
	from nltk.sentiment.vader import SentimentIntensityAnalyzer
	from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding


	embedding_model = PretrainedSpeakerEmbedding(
	"speechbrain/spkrec-ecapa-voxceleb",
	device=torch.device("cpu"))

	nltk.download('vader_lexicon')
	sid = SentimentIntensityAnalyzer()
	model = whisper.load_model('base')
	audio = Audio()
	openai.api_key = os.environ['OPEN_AI_API_KEY']

	example_files = [
	"https://pdf.bluetickconsultants.com/e-commerce-call.mp3",
	"https://pdf.bluetickconsultants.com/customer_support.mp3",
	"https://pdf.bluetickconsultants.com/product_refund.mp3",
	]


	file_names = []


	def download_file(url, save_name):
	url = url
	if not os.path.exists(save_name):
	file = requests.get(url)
	open(save_name, 'wb').write(file.content)


	for url in example_files:
	save_name = str(url).split("/")[-1]
	download_file(url, str(url).split("/")[-1])
	file_names.append([save_name, 2])


	def segment_embedding(segment, duration, audio_file):
	start = segment["start"]
	# Whisper overshoots the end timestamp in the last segment
	end = min(duration, segment["end"])
	clip = Segment(start, end)
	waveform, sample_rate = audio.crop(audio_file, clip)
	waveform = waveform.mean(dim=0, keepdim=True)
	return embedding_model(waveform.unsqueeze(0))


	def speech_to_text_and_sentiment(audio_file, number_of_speakers=2):

	if audio_file[-3:] != 'wav':
	audio_file_name = audio_file.split("/")[-1]
	audio_file_name = audio_file_name.split(".")[0] + ".wav"
	subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y'])
	audio_file = audio_file_name

	result = model.transcribe(audio_file)
	segments = result["segments"]

	with contextlib.closing(wave.open(audio_file, 'r')) as f:
	frames = f.getnframes()
	rate = f.getframerate()
	duration = frames / float(rate)

	embeddings = np.zeros(shape=(len(segments), 192))
	for i, segment in enumerate(segments):
	embeddings[i] = segment_embedding(segment, duration, audio_file)

	embeddings = np.nan_to_num(embeddings)

	clustering = AgglomerativeClustering(
	int(number_of_speakers)).fit(embeddings)
	labels = clustering.labels_
	for i in range(len(segments)):
	segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)

	def time(secs):
	return datetime.timedelta(seconds=round(secs))

	conv = ""

	for (i, segment) in enumerate(segments):
	if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
	conv += "\n" + segment["speaker"] + ' ' + \
	str(time(segment["start"])) + '\n'
	conv += segment["text"][1:] + ' '

	sentiment_scores = sid.polarity_scores(conv)

	messages = [
	{
	"role": "system",
	"content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points.
	Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available.
	"""
	},
	{
	"role": "user",
	"content": conv
	}
	]

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages,
	temperature=0,
	max_tokens=1000,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)
	call_summary = ""
	call_summary += f"Sentiment Analysis:\nPositive: {sentiment_scores['pos']} \| Negative: {sentiment_scores['neg']} \| Neutral: {sentiment_scores['neu']}\n\n"
	call_summary += response["choices"][0]["message"]["content"]

	return conv, call_summary


	demo = gr.Interface(
	title="Bluetick Sales Call Evaluator",
	description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis",
	fn=speech_to_text_and_sentiment,
	inputs=[
	gr.Audio(label="Select audio file", type="filepath"),
	gr.Number(label="Select number of speakers (1-5)",
	default=2, type="number", min=1, max=5)
	],
	outputs=[
	gr.Textbox(label="Transcript"),
	gr.Textbox(label="Analysis")
	],
	examples=file_names,
	theme=gr.themes.Default(primary_hue=gr.themes.colors.red,
	secondary_hue=gr.themes.colors.pink),
	css=" .gradio-title, .gradio-description {color: black;}",

	)

	demo.launch(debug=True)