aekpic877 commited on
Commit
b2ed79b
1 Parent(s): 8dd40fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -1,18 +1,16 @@
1
  import torch
2
  from PIL import Image
3
- from transformers import AutoModel, AutoTokenizer
4
  import gradio as gr
5
 
6
- # Load a smaller model and tokenizer
7
- model_name = 'google/vit-base-patch16-224' # Example of a smaller model, adjust as needed
8
-
9
  try:
10
- model = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16)
 
11
  model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu')
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
  model.eval()
14
  except Exception as e:
15
- print(f"Error loading model or tokenizer: {e}")
16
  exit()
17
 
18
  def process_image(image, question):
@@ -21,29 +19,23 @@ def process_image(image, question):
21
  # Convert Gradio image to PIL Image
22
  image = Image.fromarray(image).convert('RGB')
23
 
24
- # Create message list
25
- msgs = [{'role': 'user', 'content': question}]
26
 
27
  # Perform inference
28
  try:
29
  with torch.no_grad():
30
- res = model.chat(
31
- image=image,
32
- msgs=msgs,
33
- tokenizer=tokenizer,
34
- sampling=True, # if sampling=False, beam_search will be used by default
35
- temperature=0.7,
36
- stream=False # Set to False for non-streaming output
37
- )
38
- return res
39
  except Exception as e:
40
  return f"Error during model inference: {e}"
41
 
42
  # Define the Gradio interface
43
  interface = gr.Interface(
44
  fn=process_image,
45
- inputs=[gr.inputs.Image(type='numpy'), gr.inputs.Textbox(label="Question")],
46
- outputs="text",
47
  title="Image Question Answering",
48
  description="Upload an image and ask a question about it. The model will provide an answer."
49
  )
 
1
  import torch
2
  from PIL import Image
3
+ from transformers import BlipProcessor, BlipForConditionalGeneration
4
  import gradio as gr
5
 
6
+ # Load the BLIP model and processor
 
 
7
  try:
8
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
9
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
10
  model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu')
 
11
  model.eval()
12
  except Exception as e:
13
+ print(f"Error loading model or processor: {e}")
14
  exit()
15
 
16
  def process_image(image, question):
 
19
  # Convert Gradio image to PIL Image
20
  image = Image.fromarray(image).convert('RGB')
21
 
22
+ # Preprocess the image and question
23
+ inputs = processor(image, question, return_tensors="pt").to(device)
24
 
25
  # Perform inference
26
  try:
27
  with torch.no_grad():
28
+ outputs = model.generate(**inputs)
29
+ answer = processor.decode(outputs[0], skip_special_tokens=True)
30
+ return answer
 
 
 
 
 
 
31
  except Exception as e:
32
  return f"Error during model inference: {e}"
33
 
34
  # Define the Gradio interface
35
  interface = gr.Interface(
36
  fn=process_image,
37
+ inputs=[gr.Image(type='numpy'), gr.Textbox(label="Question")],
38
+ outputs=gr.Textbox(),
39
  title="Image Question Answering",
40
  description="Upload an image and ask a question about it. The model will provide an answer."
41
  )