amberborici commited on
Commit
15d32eb
·
1 Parent(s): 4170066
Files changed (2) hide show
  1. app.py +48 -71
  2. package.txt +0 -5
app.py CHANGED
@@ -1,81 +1,58 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForCausalLM
3
- import torch
4
- from PIL import Image
5
- import io
6
 
7
- # Load the model and processor
8
- def load_model():
9
- """Load the Qwen2-VL model"""
10
- model_id = "Qwen/Qwen2-VL-7B-Instruct"
11
- processor = AutoProcessor.from_pretrained(model_id)
12
- model = AutoModelForCausalLM.from_pretrained(
13
- model_id,
14
- torch_dtype=torch.float16,
15
- device_map="auto"
16
- )
17
- return model, processor
18
-
19
- # Initialize model and processor
20
- print("Loading Qwen2-VL model...")
21
- model, processor = load_model()
22
- print("Model loaded successfully!")
23
-
24
- def process_single_image(image, prompt):
25
- """Process a single image with the model"""
26
- if image is None:
27
- return "Please upload an image."
28
-
29
- try:
30
- # Convert Gradio image to PIL Image
31
- if hasattr(image, 'name'): # Gradio file object
32
- pil_image = Image.open(image.name)
33
- else: # Numpy array
34
- pil_image = Image.fromarray(image)
35
-
36
- # Prepare the prompt
37
- text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
38
-
39
- # Process the image and text
40
- inputs = processor(
41
- text=text,
42
- images=pil_image,
43
- return_tensors="pt"
44
- )
45
-
46
- # Generate response
47
- with torch.no_grad():
48
- generated_ids = model.generate(
49
- **inputs,
50
- max_new_tokens=512,
51
- do_sample=True,
52
- temperature=0.7,
53
- top_p=0.9
54
- )
55
-
56
- # Decode the response
57
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
58
-
59
- # Extract only the assistant's response
60
- response = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
61
-
62
- return response
63
-
64
- except Exception as e:
65
- return f"Error processing image: {str(e)}"
66
 
67
- def process_multiple_images(images, prompt):
68
- """Process multiple images with the same prompt"""
 
 
69
  if not images:
70
  return "Please upload at least one image."
71
 
72
  results = []
 
73
  for i, image in enumerate(images):
74
- if image is not None:
75
- result = process_single_image(image, prompt)
76
- results.append(f"Image {i+1}: {result}")
77
- else:
78
- results.append(f"Image {i+1}: No image provided")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  return "\n\n".join(results)
81
 
@@ -170,7 +147,7 @@ with gr.Blocks(
170
 
171
  # Connect the process button
172
  process_btn.click(
173
- fn=process_multiple_images,
174
  inputs=[images_input, prompt_input],
175
  outputs=[results_output]
176
  )
 
1
  import gradio as gr
 
 
 
 
2
 
3
+ # Load the Qwen2-VL model using gr.load()
4
+ qwen_model = gr.load("models/Qwen/Qwen2-VL-7B-Instruct")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def process_images(images, prompt):
7
+ """
8
+ Process multiple images with the Qwen2-VL model
9
+ """
10
  if not images:
11
  return "Please upload at least one image."
12
 
13
  results = []
14
+
15
  for i, image in enumerate(images):
16
+ if image is None:
17
+ continue
18
+
19
+ try:
20
+ # Prepare the message for the model
21
+ message = [
22
+ {
23
+ "role": "user",
24
+ "content": [
25
+ {
26
+ "type": "text",
27
+ "text": prompt
28
+ },
29
+ {
30
+ "type": "image_url",
31
+ "image_url": {
32
+ "url": image
33
+ }
34
+ }
35
+ ]
36
+ }
37
+ ]
38
+
39
+ # Generate response using the loaded model
40
+ response = qwen_model.chat.completions.create(
41
+ model="Qwen/Qwen2-VL-7B-Instruct",
42
+ messages=message,
43
+ max_tokens=512,
44
+ temperature=0.7
45
+ )
46
+
47
+ # Extract the response text
48
+ description = response.choices[0].message.content
49
+ results.append(f"Image {i+1}: {description}")
50
+
51
+ except Exception as e:
52
+ results.append(f"Image {i+1}: ❌ Error - {str(e)}")
53
+
54
+ if not results:
55
+ return "No valid images processed."
56
 
57
  return "\n\n".join(results)
58
 
 
147
 
148
  # Connect the process button
149
  process_btn.click(
150
+ fn=process_images,
151
  inputs=[images_input, prompt_input],
152
  outputs=[results_output]
153
  )
package.txt DELETED
@@ -1,5 +0,0 @@
1
- gradio>=4.0.0
2
- transformers>=4.36.0
3
- torch>=2.0.0
4
- Pillow>=10.0.0
5
- accelerate>=0.20.0