BuildTools commited on
Commit
d6912cc
1 Parent(s): a4f2fe4

Added app and requirements

Browse files
Files changed (2) hide show
  1. app.py +22 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import ViTFeatureExtractor, ViTForImageClassification
2
+ import gradio as gr
3
+
4
+ def classify(image):
5
+ feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
6
+ model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
7
+ inputs = feature_extractor(images=image, return_tensors="pt")
8
+ outputs = model(**inputs)
9
+ logits = outputs.logits
10
+ # model predicts one of the 1000 ImageNet classes
11
+ predicted_class_idx = logits.argmax(-1).item()
12
+ return model.config.id2label[predicted_class_idx]
13
+
14
+ def image2speech(image):
15
+ txt = classify(image)
16
+ return fastspeech(txt), txt
17
+
18
+ fastspeech = gr.Interface.load("huggingface/facebook/fastspeech2-en-ljspeech")
19
+
20
+ app = gr.Interface(fn=image2speech, inputs="image", outputs=["audio", "text"])
21
+
22
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ gradio