Spaces:
Runtime error
Runtime error
Commit
•
03e6f9c
1
Parent(s):
00ca6f9
clean code
Browse files
app.py
CHANGED
@@ -3,21 +3,7 @@ import re
|
|
3 |
import gradio as gr
|
4 |
from pathlib import Path
|
5 |
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
|
6 |
-
# Pattern to ignore all the text after 2 or more full stops
|
7 |
-
regex_pattern = "[.]{2,}"
|
8 |
-
#sample = val_dataset[800]
|
9 |
-
#model = model.cuda()
|
10 |
-
#print(tokenizer.decode(model.generate(sample['pixel_values'].unsqueeze(0).cuda())[0]).replace('<|endoftext|>', '').split('\n')[0],'\n\n\n')
|
11 |
|
12 |
-
|
13 |
-
def post_process(text):
|
14 |
-
try:
|
15 |
-
text = text.strip()
|
16 |
-
text = re.split(regex_pattern, text)[0]
|
17 |
-
except Exception as e:
|
18 |
-
print(e)
|
19 |
-
pass
|
20 |
-
return text
|
21 |
def predict(image, max_length=64, num_beams=4):
|
22 |
image = image.convert('RGB')
|
23 |
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
@@ -25,19 +11,6 @@ def predict(image, max_length=64, num_beams=4):
|
|
25 |
with torch.no_grad():
|
26 |
text = tokenizer.decode(model.generate(pixel_values.cpu())[0])
|
27 |
text = text.replace('<|endoftext|>', '').split('\n')
|
28 |
-
#[0],'\n\n\n'
|
29 |
-
#text[0]
|
30 |
-
#text = model.generate(pixel_values.cpu())
|
31 |
-
#text = tokenizer.decode(text.replace('<|endoftext|>', '').split('\n')[0],'\n\n\n')
|
32 |
-
# output_ids = model.generate(
|
33 |
-
# pixel_values,
|
34 |
-
# max_length=max_length,
|
35 |
-
# num_beams=num_beams,
|
36 |
-
# return_dict_in_generate=True,
|
37 |
-
#).sequences
|
38 |
-
|
39 |
-
#preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
40 |
-
#pred = post_process(preds[0])
|
41 |
return text[0]
|
42 |
|
43 |
model_path = "team-indain-image-caption/hindi-image-captioning"
|
@@ -49,8 +22,6 @@ print("Loaded model")
|
|
49 |
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
50 |
print("Loaded feature_extractor")
|
51 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
52 |
-
#if model.decoder.name_or_path == "gpt2":
|
53 |
-
# tokenizer.pad_token = tokenizer.bos_token
|
54 |
print("Loaded tokenizer")
|
55 |
title = "Hindi Image Captioning"
|
56 |
description = ""
|
@@ -65,6 +36,7 @@ interface = gr.Interface(
|
|
65 |
description=description,
|
66 |
#examples=example_images,
|
67 |
live=True,
|
|
|
68 |
|
69 |
)
|
70 |
interface.launch()
|
|
|
3 |
import gradio as gr
|
4 |
from pathlib import Path
|
5 |
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
|
|
|
|
|
|
|
|
|
|
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def predict(image, max_length=64, num_beams=4):
|
8 |
image = image.convert('RGB')
|
9 |
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
|
|
11 |
with torch.no_grad():
|
12 |
text = tokenizer.decode(model.generate(pixel_values.cpu())[0])
|
13 |
text = text.replace('<|endoftext|>', '').split('\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
return text[0]
|
15 |
|
16 |
model_path = "team-indain-image-caption/hindi-image-captioning"
|
|
|
22 |
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
23 |
print("Loaded feature_extractor")
|
24 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
|
|
|
25 |
print("Loaded tokenizer")
|
26 |
title = "Hindi Image Captioning"
|
27 |
description = ""
|
|
|
36 |
description=description,
|
37 |
#examples=example_images,
|
38 |
live=True,
|
39 |
+
theme="darkpeach"
|
40 |
|
41 |
)
|
42 |
interface.launch()
|