import gradio as gr import requests import tensorflow as tf from transformers import pipeline, AutoModelForCTC, AutoTokenizer model_name = "facebook/wav2vec2-large-xlsr-53-spanish" model = AutoModelForCTC.from_pretrained(model_name, from_pt=True) tokenizer = AutoTokenizer.from_pretrained(model_name) trans = pipeline("automatic-speech-recognition", model=model, tokenizer=tokenizer) def audio2text(audio): text = trans(audio)["text"] return text # text2sentiment classifier = pipeline("text-classification", model = "pysentimiento/robertuito-sentiment-analysis") def text2sentiment(text): return classifier(text)[0]["label"] # image_classification inception_net = tf.keras.applications.MobileNetV2() answer = requests.get("https://git.io/JJkYN") labels = answer.text.split("\n") def image_classification(inp): inp = inp.reshape((-1,224,224,3)) inp = tf.keras.applications.mobilenet_v2.preprocess_input(inp) prediction = inception_net.predict(inp).flatten() confidences = {labels[i]: float(prediction[i]) for i in range(1000)} return confidences # demo demo = gr.Blocks() with demo: gr.Markdown("This is the second demo with Blocks") with gr.Tabs(): with gr.TabItem("Transcribe audio in Spanish"): with gr.Row(): audio = gr.Audio(source="microphone", type="filepath") transcription = gr.Textbox() b1 = gr.Button("Transcribe") with gr.TabItem("Sentiment analysis in Spanish"): with gr.Row(): text = gr.Textbox() label_sentiment = gr.Label() b2 = gr.Button("Sentiment") with gr.TabItem("Image classification"): with gr.Row(): image=gr.Image(shape=(224,224)) label_image=gr.Label(num_top_classes=3) b3 = gr.Button("Classify") b1.click(audio2text, inputs = audio, outputs=transcription) b2.click(text2sentiment, inputs=text, outputs=label_sentiment) b3.click(image_classification, inputs=image, outputs=label_image) demo.launch()