nestorxyz's picture
add audio and classificator model
25e7e3c
import gradio as gr
import requests
from transformers import pipeline
import tensorflow as tf
# Obteniendo las labels de "https://git.io/JJkYN"
respuesta = requests.get("https://raw.githubusercontent.com/gradio-app/mobilenet-example/master/labels.txt")
etiquetas =respuesta.text.split("\n")
# models
trans = pipeline("automatic-speech-recognition", model = "facebook/wav2vec2-large-xlsr-53-spanish")
clasificador = pipeline("text-classification", model = "pysentimiento/robertuito-sentiment-analysis")
inception_net = tf.keras.applications.MobileNetV2()
def clasifica_imagen(inp):
inp = inp.reshape((-1,224,224,3))
inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp)
prediction = inception_net.predict(inp).flatten()
confidences ={etiquetas[i]: float(prediction[i]) for i in range(1000)}
return confidences
def audio_a_text(audio):
text = trans(audio)["text"]
return text
def texto_a_sentimiento(text):
return clasificador(text)[0]["label"]
demo = gr.Blocks()
with demo:
gr.Markdown("Este es el segundo demo con Blocks")
with gr.Tabs():
with gr.TabItem("Transcribe audio en español"):
with gr.Row():
audio = gr.Audio(source="microphone", type="filepath")
transcripcion = gr.Textbox()
b1 = gr.Button("Transcribe porfa")
with gr.TabItem("Análisis de sentimiento en español"):
with gr.Row():
texto = gr.Textbox()
label = gr.Label()
b2 = gr.Button("Sentimiento porfa")
with gr.TabItem("clasificación de imágenes"):
with gr.Row():
image = gr.Image(shape=(224,224))
label = gr.Label(num_top_classes=3)
b3 = gr.Button("clasificar")
b1.click(audio_a_text, inputs = audio, outputs=transcripcion)
b2.click(texto_a_sentimiento, inputs=texto, outputs=label)
b3.click(clasifica_imagen, inputs=image, outputs=label)
demo.launch()