SahilJ2 commited on
Commit
20ceca0
1 Parent(s): 1072512

Eigth commit

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -78,9 +78,9 @@ processor = ViTImageProcessor.from_pretrained('microsoft/swin-tiny-patch4-window
78
 
79
  def m1(que, image):
80
  processor3 = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
81
- model3 = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large").to("cuda")
82
 
83
- inputs = processor3(image, que, return_tensors="pt").to("cuda")
84
 
85
  out = model3.generate(**inputs)
86
  return processor3.decode(out[0], skip_special_tokens=True)
@@ -102,7 +102,6 @@ def m3(que, image):
102
  processor3 = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
103
  model3 = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
104
 
105
- device = "cuda" if torch.cuda.is_available() else "cpu"
106
  model3.to(device)
107
 
108
  prompt = "<s_docvqa><s_question>{que}</s_question><s_answer>"
@@ -139,7 +138,7 @@ def m5(que, image):
139
  processor3 = AutoProcessor.from_pretrained("google/pix2struct-ocrvqa-large")
140
  model3 = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-ocrvqa-large")
141
 
142
- inputs = processor3(images=image, text=que, return_tensors="pt").to("cuda")
143
 
144
  predictions = model3.generate(**inputs)
145
  return processor3.decode(predictions[0], skip_special_tokens=True)
@@ -148,7 +147,7 @@ def m6(que, image):
148
  processor3 = AutoProcessor.from_pretrained("google/pix2struct-infographics-vqa-large")
149
  model3 = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-infographics-vqa-large")
150
 
151
- inputs = processor3(images=image, text=que, return_tensors="pt").to("cuda")
152
 
153
  predictions = model3.generate(**inputs)
154
  return processor3.decode(predictions[0], skip_special_tokens=True)
 
78
 
79
  def m1(que, image):
80
  processor3 = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
81
+ model3 = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
82
 
83
+ inputs = processor3(image, que, return_tensors="pt")
84
 
85
  out = model3.generate(**inputs)
86
  return processor3.decode(out[0], skip_special_tokens=True)
 
102
  processor3 = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
103
  model3 = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
104
 
 
105
  model3.to(device)
106
 
107
  prompt = "<s_docvqa><s_question>{que}</s_question><s_answer>"
 
138
  processor3 = AutoProcessor.from_pretrained("google/pix2struct-ocrvqa-large")
139
  model3 = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-ocrvqa-large")
140
 
141
+ inputs = processor3(images=image, text=que, return_tensors="pt")
142
 
143
  predictions = model3.generate(**inputs)
144
  return processor3.decode(predictions[0], skip_special_tokens=True)
 
147
  processor3 = AutoProcessor.from_pretrained("google/pix2struct-infographics-vqa-large")
148
  model3 = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-infographics-vqa-large")
149
 
150
+ inputs = processor3(images=image, text=que, return_tensors="pt")
151
 
152
  predictions = model3.generate(**inputs)
153
  return processor3.decode(predictions[0], skip_special_tokens=True)