iakarshu commited on
Commit
e1c1946
·
1 Parent(s): 97e6b00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -73,6 +73,9 @@ answer = gr.outputs.Textbox(label="Predicted answer")
73
  examples = [["remote.jpg", "what number is the button near the top left?"]]
74
 
75
 
 
 
 
76
  def answer_question(image, question):
77
  image.save('sample_img.jpg')
78
 
@@ -111,6 +114,7 @@ def answer_question(image, question):
111
  tokenized_words = tokenized_words.unsqueeze(0)
112
  question = question.unsqueeze(0)
113
 
 
114
  encoding = {'img': img, 'boxes': boxes, 'tokenized_words': tokenized_words, 'question': question}
115
 
116
  with torch.no_grad():
 
73
  examples = [["remote.jpg", "what number is the button near the top left?"]]
74
 
75
 
76
+ from transformers import ViTFeatureExtractor, ViTModel
77
+ vit_feat_extract = ViTFeatureExtractor("google/vit-base-patch16-224-in21k")
78
+
79
  def answer_question(image, question):
80
  image.save('sample_img.jpg')
81
 
 
114
  tokenized_words = tokenized_words.unsqueeze(0)
115
  question = question.unsqueeze(0)
116
 
117
+ img = vit_feat_extract(img, return_tensors = 'pt')['pixel_values']
118
  encoding = {'img': img, 'boxes': boxes, 'tokenized_words': tokenized_words, 'question': question}
119
 
120
  with torch.no_grad():