rajistics's picture
Update app.py
4ad2748
raw
history blame
3.41 kB
import os
#import re
#import functools
from functools import partial
#import requests
#import pandas as pd
import torch
import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from pyannote.audio import Pipeline
import whisperx
from utils import split
from utils import speech_to_text as stt
os.environ["TOKENIZERS_PARALLELISM"] = "false"
device = 0 if torch.cuda.is_available() else -1
color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}
# Audio components
whisper_device = "cuda" if torch.cuda.is_available() else "cpu"
whisper = whisperx.load_model("tiny.en", whisper_device)
alignment_model, metadata = whisperx.load_align_model(language_code="en", device=whisper_device)
speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1", use_auth_token=os.environ['ENO_TOKEN'])
speech_to_text = partial(
stt,
speaker_segmentation=speaker_segmentation,
whisper=whisper,
alignment_model=alignment_model,
metadata=metadata,
whisper_device=whisper_device
)
# Text components
emotion_pipeline = pipeline(
"text-classification",
model="bhadresh-savani/distilbert-base-uncased-emotion",
)
summarization_pipeline = pipeline(
"summarization",
model="knkarthick/MEETING_SUMMARY",
)
def summarize(diarized, summarization_pipeline):
text = ""
for d in diarized:
text += f"\n{d[1]}: {d[0]}"
return summarization_pipeline(text)[0]["summary_text"]
def sentiment(diarized, emotion_pipeline):
customer_sentiments = []
for i in range(0, len(diarized), 2):
speaker_speech, speaker_id = diarized[i]
sentences = split(speaker_speech)
if "Customer" in speaker_id:
outputs = emotion_pipeline(sentences)
for idx, (o, t) in enumerate(zip(outputs, sentences)):
customer_sentiments.append((t, o["label"]))
return customer_sentiments
EXAMPLES = [["Customer_Support_Call.wav"]]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
audio = gr.Audio(label="Audio file", type="filepath")
btn = gr.Button("Transcribe and Diarize")
gr.Markdown("**Call Transcript:**")
diarized = gr.HighlightedText(label="Call Transcript")
gr.Markdown("Summarize Speaker")
sum_btn = gr.Button("Get Summary")
summary = gr.Textbox(lines=4)
sentiment_btn = gr.Button("Get Customer Sentiment")
analyzed = gr.HighlightedText(color_map=color_map)
with gr.Column():
gr.Markdown("## Example Files")
gr.Examples(
examples=EXAMPLES,
inputs=[audio],
outputs=[diarized],
fn=speech_to_text,
cache_examples=True
)
# when example button is clicked, convert audio file to text and diarize
btn.click(fn=speech_to_text, inputs=audio, outputs=diarized)
# when summarize checkboxes are changed, create summary
sum_btn.click(fn=partial(summarize, summarization_pipeline=summarization_pipeline), inputs=[diarized], outputs=summary)
# when sentiment button clicked, display highlighted text and plot
sentiment_btn.click(fn=partial(sentiment, emotion_pipeline=emotion_pipeline), inputs=diarized, outputs=[analyzed])
demo.launch(debug=1)