Spaces:

team-indain-image-caption
/

Hindi-image-captioning

Runtime error

seanbenhur commited on Nov 23, 2021

Commit

4045aa3

1 Parent(s): dc7d2f7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,11 @@ from pathlib import Path
 from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
 # Pattern to ignore all the text after 2 or more full stops
 regex_pattern = "[.]{2,}"
 def post_process(text):
     try:
         text = text.strip()
@@ -17,15 +22,18 @@ def predict(image, max_length=64, num_beams=4):
     pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
     pixel_values = pixel_values.to(device)
     with torch.no_grad():
-        output_ids = model.generate(
-            pixel_values,
-            max_length=max_length,
-            num_beams=num_beams,
-            return_dict_in_generate=True,
-        ).sequences
-    preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
-    pred = post_process(preds[0])
-    return pred
 model_path = "team-indain-image-caption/hindi-image-captioning"
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

 from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
 # Pattern to ignore all the text after 2 or more full stops
 regex_pattern = "[.]{2,}"
+#sample = val_dataset[800]
+#model = model.cuda()
+#print(tokenizer.decode(model.generate(sample['pixel_values'].unsqueeze(0).cuda())[0]).replace('<|endoftext|>', '').split('\n')[0],'\n\n\n')
 def post_process(text):
     try:
         text = text.strip()
     pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
     pixel_values = pixel_values.to(device)
     with torch.no_grad():
+        text = model.generate(pixel_values.unsqueeze(0).cuda())
+        text = tokenizer.decode(text.replace('<|endoftext|>', '').split('\n')[0],'\n\n\n')
+       # output_ids = model.generate(
+        #    pixel_values,
+        #    max_length=max_length,
+         #   num_beams=num_beams,
+         #   return_dict_in_generate=True,
+        #).sequences
+    #preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    #pred = post_process(preds[0])
+    return text
 model_path = "team-indain-image-caption/hindi-image-captioning"
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")