xxx1 commited on
Commit
5438377
1 Parent(s): 8359a8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -20,15 +20,15 @@ model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfil
20
 
21
  from transformers import BlipProcessor, BlipForConditionalGeneration
22
 
23
- cap_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
24
- cap_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
25
 
26
 
27
 
28
  def caption(input_image):
29
  inputs = cap_processor(input_image, return_tensors="pt")
30
- inputs["num_beams"] = 1
31
- inputs['num_return_sequences'] =1
32
  out = cap_model.generate(**inputs)
33
  return "\n".join(cap_processor.batch_decode(out, skip_special_tokens=True))
34
  import openai
@@ -111,6 +111,7 @@ def vle(input_image,input_text):
111
  return [vqa['answer'] for vqa in vqa_answers],[vqa['score'] for vqa in vqa_answers]
112
  def inference_chat(input_image,input_text):
113
  cap=caption(input_image)
 
114
  # inputs = processor(images=input_image, text=input_text,return_tensors="pt")
115
  # inputs["max_length"] = 10
116
  # inputs["num_beams"] = 5
 
20
 
21
  from transformers import BlipProcessor, BlipForConditionalGeneration
22
 
23
+ cap_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
24
+ cap_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
25
 
26
 
27
 
28
  def caption(input_image):
29
  inputs = cap_processor(input_image, return_tensors="pt")
30
+ # inputs["num_beams"] = 1
31
+ # inputs['num_return_sequences'] =1
32
  out = cap_model.generate(**inputs)
33
  return "\n".join(cap_processor.batch_decode(out, skip_special_tokens=True))
34
  import openai
 
111
  return [vqa['answer'] for vqa in vqa_answers],[vqa['score'] for vqa in vqa_answers]
112
  def inference_chat(input_image,input_text):
113
  cap=caption(input_image)
114
+ print(cap)
115
  # inputs = processor(images=input_image, text=input_text,return_tensors="pt")
116
  # inputs["max_length"] = 10
117
  # inputs["num_beams"] = 5