from transformers import pipeline

import gradio as gr

asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
classifier = pipeline("text-classification")

def speech_to_text(mic=None, file=None):
    if mic is not None:
        audio = mic
    elif file is not None:
        audio = file
    else:
        return "You must either provide a mic recording or a file"
    text = asr(audio)["text"]
    return text


def text_to_sentiment(text):
    return classifier(text)[0]["label"]

title = "Speech-Text-Sentiment"
description = """
Task: Speech to Text to Sentiment\n
Model: \n
speech to text (Wav2Vec2ForCTC)\n
text to sentiment (DistilBertForSequenceClassification)\n
"""
theme="freddyaboulton/dracula_revamped"

demo = gr.Blocks(
    title=title,
    description=description,
    theme=theme
)

with demo:
    audio_file = [
        gr.Audio(source="microphone",
                 type="filepath",
                 optional=True),
        gr.Audio(source="upload",
                 type="filepath",
                 optional=True),
    ]
    text = gr.Textbox()
    label = gr.Label()

    b1 = gr.Button("Recognize Speech")
    b2 = gr.Button("Classify Sentiment")

    b1.click(speech_to_text, inputs=audio_file, outputs=text)
    b2.click(text_to_sentiment, inputs=text, outputs=label)

demo.launch()