import gradio as gr import tensorflow as tf from transformers import pipeline inception_net = tf.keras.applications.MobileNetV2() def classify_image(inp): inp = inp.reshape((-1, 224, 224, 3)) inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp) prediction = inception_net.predict(inp).reshape(1,1000) pred_scores = tf.keras.applications.mobilenet_v2.decode_predictions(prediction, top=100) confidences = {f'{pred_scores[0][i][1]}': float(pred_scores[0][i][2]) for i in range(100)} return confidences trans = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish") def audio_to_text(audio): text = trans(audio)["text"] return text classify = pipeline("text-classification", model="pysentimiento/robertuito-sentiment-analysis") def text_to_sentiment(text): return classify(text)[0]["label"] demo = gr.Blocks() with demo: gr.Markdown("Second Demo with Blocks") with gr.Tabs(): with gr.TabItem("Transcript audio in spanish"): with gr.Row(): audio = gr.Audio(source="microphone", type="filepath") transcription = gr.Textbox() button1 = gr.Button("Please transcript") with gr.TabItem("Sentiment analisys"): with gr.Row(): text = gr.Textbox() label = gr.Label() button2 = gr.Button("Please sentiment") button1.click(audio_to_text, inputs=audio, outputs=transcription) button2.click(text_to_sentiment, inputs=text, outputs=label) with gr.TabItem("Image classify"): with gr.Row(): image = gr.Image(shape=(224,224)) labelImage = gr.Label(num_top_classes=3) button3 = gr.Button("Please classify Image") button3.click(classify_image, inputs=image, outputs=labelImage) demo.launch()