import gradio as gr import requests from transformers import pipeline import tensorflow as tf # Obteniendo las labels de "https://git.io/JJkYN" respuesta = requests.get("https://raw.githubusercontent.com/gradio-app/mobilenet-example/master/labels.txt") etiquetas =respuesta.text.split("\n") # models trans = pipeline("automatic-speech-recognition", model = "facebook/wav2vec2-large-xlsr-53-spanish") clasificador = pipeline("text-classification", model = "pysentimiento/robertuito-sentiment-analysis") inception_net = tf.keras.applications.MobileNetV2() def clasifica_imagen(inp): inp = inp.reshape((-1,224,224,3)) inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp) prediction = inception_net.predict(inp).flatten() confidences ={etiquetas[i]: float(prediction[i]) for i in range(1000)} return confidences def audio_a_text(audio): text = trans(audio)["text"] return text def texto_a_sentimiento(text): return clasificador(text)[0]["label"] demo = gr.Blocks() with demo: gr.Markdown("Este es el segundo demo con Blocks") with gr.Tabs(): with gr.TabItem("Transcribe audio en español"): with gr.Row(): audio = gr.Audio(source="microphone", type="filepath") transcripcion = gr.Textbox() b1 = gr.Button("Transcribe porfa") with gr.TabItem("Análisis de sentimiento en español"): with gr.Row(): texto = gr.Textbox() label = gr.Label() b2 = gr.Button("Sentimiento porfa") with gr.TabItem("clasificación de imágenes"): with gr.Row(): image = gr.Image(shape=(224,224)) label = gr.Label(num_top_classes=3) b3 = gr.Button("clasificar") b1.click(audio_a_text, inputs = audio, outputs=transcripcion) b2.click(texto_a_sentimiento, inputs=texto, outputs=label) b3.click(clasifica_imagen, inputs=image, outputs=label) demo.launch()