import gradio as gr
import requests
from transformers import pipeline
import tensorflow as tf

# Obteniendo las labels de "https://git.io/JJkYN"
respuesta = requests.get("https://raw.githubusercontent.com/gradio-app/mobilenet-example/master/labels.txt")
etiquetas =respuesta.text.split("\n")

# models
trans = pipeline("automatic-speech-recognition", model = "facebook/wav2vec2-large-xlsr-53-spanish")
clasificador = pipeline("text-classification", model = "pysentimiento/robertuito-sentiment-analysis")
inception_net = tf.keras.applications.MobileNetV2()

def clasifica_imagen(inp):
  inp = inp.reshape((-1,224,224,3))
  inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp)
  prediction = inception_net.predict(inp).flatten()
  confidences ={etiquetas[i]: float(prediction[i]) for i in range(1000)}
  return confidences

def audio_a_text(audio):
  text = trans(audio)["text"]
  return text

def texto_a_sentimiento(text):
  return clasificador(text)[0]["label"]

demo = gr.Blocks()

with demo:
  gr.Markdown("Este es el segundo demo con Blocks")
  with gr.Tabs():
    with gr.TabItem("Transcribe audio en español"):
      with gr.Row():
        audio = gr.Audio(source="microphone", type="filepath")
        transcripcion = gr.Textbox()
      b1 = gr.Button("Transcribe porfa")

    with gr.TabItem("Análisis de sentimiento en español"):
      with gr.Row():
        texto = gr.Textbox()
        label = gr.Label()
      b2 = gr.Button("Sentimiento porfa")

    with gr.TabItem("clasificación de imágenes"):
      with gr.Row():
        image = gr.Image(shape=(224,224))
        label = gr.Label(num_top_classes=3)
      b3 = gr.Button("clasificar")

    b1.click(audio_a_text, inputs = audio, outputs=transcripcion)
    b2.click(texto_a_sentimiento, inputs=texto, outputs=label)
    b3.click(clasifica_imagen, inputs=image, outputs=label)

demo.launch()