import re import functools import requests import pandas as pd import plotly.express as px import torch import gradio as gr from transformers import pipeline, Wav2Vec2ProcessorWithLM from pyannote.audio import Pipeline from librosa import load, resample import whisperx import re alphabets= "([A-Za-z])" prefixes = "(Mr|St|Mrs|Ms|Dr)[.]" suffixes = "(Inc|Ltd|Jr|Sr|Co)" starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)" acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)" websites = "[.](com|net|org|io|gov)" def split_into_sentences(text): text = " " + text + " " text = text.replace("\n"," ") text = re.sub(prefixes,"\\1",text) text = re.sub(websites,"\\1",text) if "Ph.D" in text: text = text.replace("Ph.D.","PhD") text = re.sub("\s" + alphabets + "[.] "," \\1 ",text) text = re.sub(acronyms+" "+starters,"\\1 \\2",text) text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1\\2\\3",text) text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1\\2",text) text = re.sub(" "+suffixes+"[.] "+starters," \\1 \\2",text) text = re.sub(" "+suffixes+"[.]"," \\1",text) text = re.sub(" " + alphabets + "[.]"," \\1",text) if "”" in text: text = text.replace(".”","”.") if "\"" in text: text = text.replace(".\"","\".") if "!" in text: text = text.replace("!\"","\"!") if "?" in text: text = text.replace("?\"","\"?") text = text.replace(".",".") text = text.replace("?","?") text = text.replace("!","!") text = text.replace("",".") sentences = text.split("") sentences = sentences[:-1] sentences = [s.strip() for s in sentences] return sentences # display if the sentiment value is above these thresholds thresholds = {"joy": 0.99,"anger": 0.95,"surprise": 0.95,"sadness": 0.98,"fear": 0.95,"love": 0.99,} color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",} def create_fig(x_min, x_max, to_plot, plot_sentences): x, y = list(zip(*to_plot)) plot_df = pd.DataFrame( data={ "x": x, "y": y, "sentence": plot_sentences, } ) fig = px.line( plot_df, x="x", y="y", hover_data={ "sentence": True, "x": True, "y": False, }, labels={"x": "time (seconds)", "y": "sentiment"}, title=f"Customer sentiment over time", markers=True, ) fig = fig.update_yaxes(categoryorder="category ascending") fig = fig.update_layout( font=dict( size=18, ), xaxis_range=[x_min, x_max], ) return fig def speech_to_text(speech_file, speaker_segmentation, whisper, alignment_model, metadata, whisper_device): speaker_output = speaker_segmentation(speech_file) result = whisper.transcribe(speech_file) chunks = whisperx.align(result["segments"], alignment_model, metadata, speech_file, whisper_device)["word_segments"] diarized_output = [] i = 0 speaker_counter = 0 # New iteration every time the speaker changes for turn, _, _ in speaker_output.itertracks(yield_label=True): speaker = "Customer" if speaker_counter % 2 == 0 else "Support" diarized = "" while i < len(chunks) and chunks[i]["end"] <= turn.end: diarized += chunks[i]["text"] + " " i += 1 if diarized != "": # diarized = rpunct.punctuate(re.sub(eng_pattern, "", diarized), lang="en") diarized_output.extend( [ (diarized, speaker), ("from {:.2f}-{:.2f}".format(turn.start, turn.end), None), ] ) speaker_counter += 1 return diarized_output