import gradio as gr
import tensorflow as tf
from transformers import pipeline

inception_net = tf.keras.applications.MobileNetV2()

def classify_image(inp):
  inp = inp.reshape((-1, 224, 224, 3))
  inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp)
  prediction = inception_net.predict(inp).reshape(1,1000)
  pred_scores = tf.keras.applications.mobilenet_v2.decode_predictions(prediction, top=100)
  confidences = {f'{pred_scores[0][i][1]}': float(pred_scores[0][i][2]) for i in range(100)}
  return confidences

trans = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
def audio_to_text(audio):
  text = trans(audio)["text"]
  return text

classify = pipeline("text-classification", model="pysentimiento/robertuito-sentiment-analysis")
def text_to_sentiment(text):
  return classify(text)[0]["label"]

demo = gr.Blocks()

with demo:
  gr.Markdown("Second Demo with Blocks")
  with gr.Tabs():
    with gr.TabItem("Transcript audio in spanish"):
      with gr.Row():
        audio = gr.Audio(source="microphone", type="filepath")
        transcription = gr.Textbox()
      button1 = gr.Button("Please transcript")

    with gr.TabItem("Sentiment analisys"):
      with gr.Row():
        text = gr.Textbox()
        label = gr.Label()
      button2 = gr.Button("Please sentiment")
    
    button1.click(audio_to_text, inputs=audio, outputs=transcription)
    button2.click(text_to_sentiment, inputs=text, outputs=label)

    with gr.TabItem("Image classify"):
      with gr.Row():
        image = gr.Image(shape=(224,224))
        labelImage = gr.Label(num_top_classes=3)
      button3 = gr.Button("Please classify Image")
    
    button3.click(classify_image, inputs=image, outputs=labelImage)

demo.launch()