Spaces:

akashkumarbtc
/

bluetick-sales-call-evaluator

Runtime error

App Files Files Community

bluetick-sales-call-evaluator / app.py

akashkumarbtc

Update app.py

57e04eb 7 months ago

raw history blame contribute delete

No virus

5.25 kB

	import os
	import wave
	import nltk
	import torch
	import torch
	import openai
	import whisper
	import textstat
	import datetime
	import requests
	import subprocess
	import contextlib
	import numpy as np
	import gradio as gr
	from pyannote.audio import Audio
	from pyannote.core import Segment
	from sklearn.cluster import AgglomerativeClustering
	from nltk.sentiment.vader import SentimentIntensityAnalyzer
	from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding


	embedding_model = PretrainedSpeakerEmbedding(
	"speechbrain/spkrec-ecapa-voxceleb",
	device=torch.device("cpu"))

	nltk.download('vader_lexicon')
	sid = SentimentIntensityAnalyzer()
	model = whisper.load_model('base')
	audio = Audio()
	openai.api_key = os.environ['OPEN_AI_API_KEY']

	example_files = [
	"https://pdf.bluetickconsultants.com/e-commerce-call.mp3",
	"https://pdf.bluetickconsultants.com/customer_support.mp3",
	"https://pdf.bluetickconsultants.com/product_refund.mp3",
	]


	file_names = []


	def download_file(url, save_name):
	url = url
	if not os.path.exists(save_name):
	file = requests.get(url)
	open(save_name, 'wb').write(file.content)


	for url in example_files:
	save_name = str(url).split("/")[-1]
	download_file(url, str(url).split("/")[-1])
	file_names.append([save_name, 2])


	def segment_embedding(segment, duration, audio_file):
	start = segment["start"]
	# Whisper overshoots the end timestamp in the last segment
	end = min(duration, segment["end"])
	clip = Segment(start, end)
	waveform, sample_rate = audio.crop(audio_file, clip)
	waveform = waveform.mean(dim=0, keepdim=True)
	return embedding_model(waveform.unsqueeze(0))


	def speech_to_text_and_sentiment(audio_file, number_of_speakers=2):

	if audio_file[-3:] != 'wav':
	audio_file_name = audio_file.split("/")[-1]
	audio_file_name = audio_file_name.split(".")[0] + ".wav"
	subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y'])
	audio_file = audio_file_name

	result = model.transcribe(audio_file)
	segments = result["segments"]

	with contextlib.closing(wave.open(audio_file, 'r')) as f:
	frames = f.getnframes()
	rate = f.getframerate()
	duration = frames / float(rate)

	embeddings = np.zeros(shape=(len(segments), 192))
	for i, segment in enumerate(segments):
	embeddings[i] = segment_embedding(segment, duration, audio_file)

	embeddings = np.nan_to_num(embeddings)

	clustering = AgglomerativeClustering(
	int(number_of_speakers)).fit(embeddings)
	labels = clustering.labels_
	for i in range(len(segments)):
	segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)

	def time(secs):
	return datetime.timedelta(seconds=round(secs))

	conv = ""

	for (i, segment) in enumerate(segments):
	if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
	conv += "\n" + segment["speaker"] + ' ' + \
	str(time(segment["start"])) + '\n'
	conv += segment["text"][1:] + ' '

	sentiment_scores = sid.polarity_scores(conv)

	messages = [
	{
	"role": "system",
	"content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points.
	Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available.
	"""
	},
	{
	"role": "user",
	"content": conv
	}
	]

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages,
	temperature=0,
	max_tokens=1000,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)

	readability_score = textstat.flesch_reading_ease(conv)

	call_summary = ""
	call_summary += "Sentiment Analysis:\n" + "-------------------------------------\n"
	call_summary += f"Positive: {sentiment_scores['pos']} \| Negative: {sentiment_scores['neg']} \| Neutral: {sentiment_scores['neu']}\n\n"

	call_summary += "Readability/ Clarity of speach:\n" + "-------------------------------------\n"
	call_summary += f"Readability Score (Flesch-Kincaid): {readability_score}\n\n"

	call_summary += "Call Summary:\n" + "-------------------------------------\n"
	call_summary += response["choices"][0]["message"]["content"]

	return call_summary, conv


	demo = gr.Interface(
	title="Bluetick Sales Call Evaluator",
	description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis",
	fn=speech_to_text_and_sentiment,
	inputs=[
	gr.Audio(label="Select audio file", type="filepath"),
	gr.Number(label="Select number of speakers (1-5)",
	default=2, type="number", min=1, max=5)
	],
	outputs=[
	gr.Textbox(label="Analysis & Summary"),
	gr.Textbox(label="Transcript"),

	],
	examples=file_names,
	theme=gr.themes.Soft().set(
	body_text_color="black"
	),
	css=" .gradio-container {background-color: white !important;} .prose h1{color: black !important;} p {color: black !important;}",

	)

	demo.launch(debug=True)