import gradio as gr
import requests
import tensorflow as tf
from transformers import pipeline, AutoModelForCTC, AutoTokenizer

model_name = "facebook/wav2vec2-large-xlsr-53-spanish"
model = AutoModelForCTC.from_pretrained(model_name, from_pt=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

trans = pipeline("automatic-speech-recognition", model=model, tokenizer=tokenizer)

def audio2text(audio):
  text = trans(audio)["text"]
  return text


# text2sentiment
classifier = pipeline("text-classification", model = "pysentimiento/robertuito-sentiment-analysis")

def text2sentiment(text):
  return classifier(text)[0]["label"]


# image_classification
inception_net = tf.keras.applications.MobileNetV2()

answer = requests.get("https://git.io/JJkYN")
labels = answer.text.split("\n")

def image_classification(inp):
  inp = inp.reshape((-1,224,224,3))
  inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp)
  prediction = inception_net.predict(inp).flatten()
  confidences = {labels[i]: float(prediction[i]) for i in range(1000)}
  return confidences


# demo
demo = gr.Blocks()

with demo:
  gr.Markdown("This is the second demo with Blocks")
  with gr.Tabs():
    with gr.TabItem("Transcribe audio in Spanish"):
      with gr.Row():
        audio = gr.Audio(source="microphone", type="filepath")
        transcription = gr.Textbox()
      b1 = gr.Button("Transcribe")

    with gr.TabItem("Sentiment analysis in Spanish"):
      with gr.Row():
        text = gr.Textbox()
        label_sentiment = gr.Label()
      b2 = gr.Button("Sentiment")

    with gr.TabItem("Image classification"):
      with gr.Row():
        image=gr.Image(shape=(224,224))
        label_image=gr.Label(num_top_classes=3)
      b3 = gr.Button("Classify")

    b1.click(audio2text, inputs = audio, outputs=transcription)
    b2.click(text2sentiment, inputs=text, outputs=label_sentiment)
    b3.click(image_classification, inputs=image, outputs=label_image)

demo.launch()