import gradio as gr import tensorflow as tf from transformers import pipeline inception_net = tf.keras.applications.MobileNetV2() def classify_imagen(inp): inp = inp.reshape((-1, 224, 224, 3)) inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp) prediction = inception_net.predict(inp).reshape(1,1000) pred_scores = tf.keras.applications.mobilenet_v2.decode_predictions(prediction, top=100) confidence = {f'{pred_scores[0][i][1]}': float(pred_scores[0][i][2]) for i in range(100)} return confidence trans = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish") def audio2text(audio): text = trans(audio)["text"] return text classificator = pipeline("text-classification", model="pysentimiento/robertuito-sentiment-analysis") def text2sentiment(text): return classificator(text)[0]['label'] demo = gr.Blocks() with demo: gr.Markdown("Este es un demo con Blocks ") with gr.Tabs(): with gr.TabItem("Transcribe Audio en espaƱol"): with gr.Row(): audio = gr.Audio(source='microphone', type='filepath') transcript = gr.Textbox() b1 = gr.Button("Transcribe") with gr.TabItem("Analisis de sentimientos"): with gr.Row(): texto = gr.Textbox() label = gr.Label() b2 = gr.Button("Sentimientos") b1.click(audio2text, inputs=audio, outputs=transcript) b2.click(text2sentiment, inputs=texto, outputs=label) with gr.TabItem("Clasificador de imagenes"): with gr.Row(): image = gr.Image(shape=(224, 224)) label= gr.Label(num_top_classes=3) bimage= gr.Button("Clasificar") bimage.click(classify_imagen, inputs=image, outputs=label) demo.launch()