Spaces:
Runtime error
Runtime error
File size: 1,276 Bytes
d6912cc 198fce8 d6912cc 198fce8 d6912cc 198fce8 d6912cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from transformers import ViTFeatureExtractor, ViTForImageClassification
import gradio as gr
from datasets import load_dataset
import torch
dataset = load_dataset("cifar100")
image = dataset["train"]["fine_label"]
def classify(image):
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
inputs = feature_extractor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
return model.config.id2label[predicted_class_idx]
def image2speech(image):
txt = classify(image)
return fastspeech(txt), txt
fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
app = gr.Interface(fn=image2speech,
inputs="image",
title="Image to speech",
description="Classifies and image and tell you what is it",
examples=["remotecontrol.jpg", "calculator.jpg", "cellphone.jpg"],
allow_flagging="never",
outputs=["audio", "text"])
app.launch() |