rrquizon1 commited on
Commit
4d1d4a2
1 Parent(s): 608770e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianTokenizer, MarianMTModel
2
+ from gtts import gTTS
3
+ import gradio as gr
4
+ import gradio as gr
5
+ import torch
6
+ import torchvision
7
+ import torchvision.transforms as transforms
8
+ import requests
9
+ from einops import rearrange
10
+ from transformers import AutoFeatureExtractor, DeiTForImageClassificationWithTeacher
11
+ import matplotlib
12
+
13
+
14
+
15
+ def imgtrans(img):
16
+
17
+ feature_extractor = AutoFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-384')
18
+ model = DeiTForImageClassificationWithTeacher.from_pretrained('facebook/deit-base-distilled-patch16-384')
19
+ inputs = feature_extractor(images=img, return_tensors="pt")
20
+ outputs = model(**inputs)
21
+ logits = outputs.logits
22
+ # model predicts one of the 21,841 ImageNet-22k classes
23
+ predicted_class_idx = logits.argmax(-1).item()
24
+ english=model.config.id2label[predicted_class_idx]
25
+ english=english.replace("_", " ")
26
+ english=english.split(',',1)[0]
27
+
28
+
29
+ src = "en" # source language
30
+ trg = "tl" # target language
31
+
32
+ model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
33
+ model = MarianMTModel.from_pretrained(model_name)
34
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
35
+
36
+ sample_text = english.lower()
37
+ batch = tokenizer([sample_text], return_tensors="pt")
38
+
39
+ generated_ids = model.generate(**batch)
40
+ fil=tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0];
41
+ tts=gTTS(text=fil,lang='tl')
42
+ tts.save('filtrans.wav')
43
+ fil_sound='filtrans.wav'
44
+ english=english.lower()
45
+ tts=gTTS(text=english,lang='en')
46
+ tts.save('engtrans.wav')
47
+ eng_sound='engtrans.wav'
48
+ return fil_sound,fil,eng_sound,english
49
+
50
+ interface=gr.Interface(fn=imgtrans,
51
+ inputs=gr.inputs.Image(shape=(224,224),label='Insert Image'),
52
+ outputs=[gr.outputs.Audio(label='Filipino Pronunciation'),gr.outputs.Textbox(label='Filipino Label'),
53
+ gr.outputs.Audio(label='English Pronunciation'),gr.outputs.Textbox(label='English label')],
54
+ examples = ['220px-Modern_British_LED_Traffic_Light.jpg','aki_dog.jpg','cat.jpg','dog.jpg','plasticbag.jpg',
55
+ 'telephone.jpg','vpavic_211006_4796_0061.jpg','watch.jpg','wonder_cat.jpg','hammer.jpg'])
56
+ interface.launch()