Spaces:

amberborici
/

Qwen-Qwen2-VL-7B-Instruct

Sleeping

App Files Files Community

amberborici commited on Aug 8

Commit

15d32eb

1 Parent(s): 4170066

asdsadsa

Browse files

Files changed (2) hide show

app.py +48 -71
package.txt +0 -5

app.py CHANGED Viewed

@@ -1,81 +1,58 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForCausalLM
-import torch
-from PIL import Image
-import io
-# Load the model and processor
-def load_model():
-    """Load the Qwen2-VL model"""
-    model_id = "Qwen/Qwen2-VL-7B-Instruct"
-    processor = AutoProcessor.from_pretrained(model_id)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-    return model, processor
-# Initialize model and processor
-print("Loading Qwen2-VL model...")
-model, processor = load_model()
-print("Model loaded successfully!")
-def process_single_image(image, prompt):
-    """Process a single image with the model"""
-    if image is None:
-        return "Please upload an image."
-    try:
-        # Convert Gradio image to PIL Image
-        if hasattr(image, 'name'):  # Gradio file object
-            pil_image = Image.open(image.name)
-        else:  # Numpy array
-            pil_image = Image.fromarray(image)
-        # Prepare the prompt
-        text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
-        # Process the image and text
-        inputs = processor(
-            text=text,
-            images=pil_image,
-            return_tensors="pt"
-        )
-        # Generate response
-        with torch.no_grad():
-            generated_ids = model.generate(
-                **inputs,
-                max_new_tokens=512,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9
-            )
-        # Decode the response
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        # Extract only the assistant's response
-        response = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
-        return response
-    except Exception as e:
-        return f"Error processing image: {str(e)}"
-def process_multiple_images(images, prompt):
-    """Process multiple images with the same prompt"""
     if not images:
         return "Please upload at least one image."
     results = []
     for i, image in enumerate(images):
-        if image is not None:
-            result = process_single_image(image, prompt)
-            results.append(f"Image {i+1}: {result}")
-        else:
-            results.append(f"Image {i+1}: No image provided")
     return "\n\n".join(results)
@@ -170,7 +147,7 @@ with gr.Blocks(
     # Connect the process button
     process_btn.click(
-        fn=process_multiple_images,
         inputs=[images_input, prompt_input],
         outputs=[results_output]
     )

 import gradio as gr
+# Load the Qwen2-VL model using gr.load()
+qwen_model = gr.load("models/Qwen/Qwen2-VL-7B-Instruct")
+def process_images(images, prompt):
+    """
+    Process multiple images with the Qwen2-VL model
+    """
     if not images:
         return "Please upload at least one image."
     results = []
     for i, image in enumerate(images):
+        if image is None:
+            continue
+        try:
+            # Prepare the message for the model
+            message = [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": image
+                            }
+                        }
+                    ]
+                }
+            ]
+            # Generate response using the loaded model
+            response = qwen_model.chat.completions.create(
+                model="Qwen/Qwen2-VL-7B-Instruct",
+                messages=message,
+                max_tokens=512,
+                temperature=0.7
+            )
+            # Extract the response text
+            description = response.choices[0].message.content
+            results.append(f"Image {i+1}: {description}")
+        except Exception as e:
+            results.append(f"Image {i+1}: ❌ Error - {str(e)}")
+    if not results:
+        return "No valid images processed."
     return "\n\n".join(results)
     # Connect the process button
     process_btn.click(
+        fn=process_images,
         inputs=[images_input, prompt_input],
         outputs=[results_output]
     )

package.txt DELETED Viewed

@@ -1,5 +0,0 @@
-gradio>=4.0.0
-transformers>=4.36.0
-torch>=2.0.0
-Pillow>=10.0.0
-accelerate>=0.20.0