Spaces:

amberborici
/

Qwen-Qwen2-VL-7B-Instruct

Sleeping

App Files Files Community

amberborici commited on Aug 8

Commit

9c9f973

1 Parent(s): f03a25b

test

Browse files

Files changed (1) hide show

app.py +155 -188

app.py CHANGED Viewed

@@ -1,211 +1,178 @@
 import gradio as gr
-import requests
-import base64
-import io
 from PIL import Image
-import os
-def encode_image_to_base64(image):
-    """Convert PIL image to base64 string"""
-    buffered = io.BytesIO()
-    image.save(buffered, format="JPEG")
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    return f"data:image/jpeg;base64,{img_str}"
-def process_images_with_api(images, prompt, api_key):
-    """
-    Process multiple images using Hugging Face Inference API
-    Args:
-        images: List of uploaded images
-        prompt: User-provided prompt
-        api_key: Hugging Face API key
-    Returns:
-        Generated descriptions
-    """
     if not images:
         return "Please upload at least one image."
-    if not api_key:
-        return "Please provide your Hugging Face API key."
-    # API endpoint for Qwen2-VL model
-    api_url = "https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B-Instruct"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
-    }
     results = []
     for i, image in enumerate(images):
-        if image is None:
-            continue
-        try:
-            # Convert numpy array to PIL Image
-            pil_image = Image.fromarray(image)
-            # Encode image to base64
-            base64_image = encode_image_to_base64(pil_image)
-            # Prepare the request payload
-            payload = {
-                "inputs": [
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": prompt
-                            },
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": base64_image
-                                }
-                            }
-                        ]
-                    }
-                ]
-            }
-            # Make API request
-            response = requests.post(api_url, headers=headers, json=payload, timeout=60)
-            if response.status_code == 200:
-                result = response.json()
-                if "choices" in result and len(result["choices"]) > 0:
-                    description = result["choices"][0]["message"]["content"]
-                    results.append(f"Image {i+1}: {description}")
-                else:
-                    results.append(f"Image {i+1}: ❌ No response from API")
-            else:
-                error_msg = f"API Error (Status {response.status_code}): {response.text}"
-                results.append(f"Image {i+1}: ❌ {error_msg}")
-        except Exception as e:
-            results.append(f"Image {i+1}: ❌ Error - {str(e)}")
-    if not results:
-        return "No valid images processed."
     return "\n\n".join(results)
-def create_gradio_interface():
-    """Create the Gradio interface for Hugging Face Spaces"""
-    with gr.Blocks(
-        title="Multi-Image AI Processor",
-        theme=gr.themes.Soft(),
-        fill_height=True
-    ) as demo:
-        gr.Markdown("# 🖼️ Multi-Image AI Processor")
-        gr.Markdown("Upload multiple images and get AI-generated descriptions using the Qwen2-VL model via Hugging Face Inference API.")
-        with gr.Row():
-            with gr.Column(scale=2):
-                # Image upload area
-                images_input = gr.File(
-                    file_count="multiple",
-                    file_types=["image"],
-                    label="Upload Images",
-                    height=300
-                )
-                # Prompt input
-                prompt_input = gr.Textbox(
-                    label="Prompt",
-                    placeholder="Describe this image in detail...",
-                    value="Describe this image in detail.",
-                    lines=3
-                )
-                # API key input (required)
-                api_key_input = gr.Textbox(
-                    label="Hugging Face API Key",
-                    placeholder="hf_...",
-                    type="password",
-                    info="Required: Get your API key from https://huggingface.co/settings/tokens"
-                )
-                # Process button
-                process_btn = gr.Button(
-                    "🚀 Process Images",
-                    variant="primary",
-                    size="lg"
-                )
-            with gr.Column(scale=2):
-                # Results area
-                results_output = gr.Textbox(
-                    label="Results",
-                    lines=15,
-                    max_lines=25,
-                    interactive=False
-                )
-        # Examples
-        with gr.Accordion("Example Prompts", open=False):
-            gr.Examples(
-                examples=[
-                    [
-                        "Describe the architectural style and features of this building.",
-                        "Upload images of buildings to analyze their architectural style."
-                    ],
-                    [
-                        "What are the key features and amenities shown in this property?",
-                        "Upload property images to get detailed descriptions of features and amenities."
-                    ],
-                    [
-                        "Describe the interior design and layout of this space.",
-                        "Upload interior photos to get detailed descriptions of design and layout."
-                    ],
-                    [
-                        "What type of property is this and what are its main characteristics?",
-                        "Upload property images to identify type and characteristics."
-                    ],
-                    [
-                        "Describe the condition and quality of this property.",
-                        "Upload property images to assess condition and quality."
-                    ]
-                ],
-                inputs=[prompt_input],
-                outputs=[results_output],
-                label="Example Prompts"
             )
-        # Footer
-        gr.Markdown("---")
-        gr.Markdown("""
-        **How to use:**
-        1. Get your Hugging Face API key from https://huggingface.co/settings/tokens
-        2. Upload one or more images
-        3. Enter a prompt describing what you want to know about the images
-        4. Paste your API key
-        5. Click "Process Images" to get AI-generated descriptions
-        **Tips:**
-        - Use specific prompts for better results
-        - The model works best with clear, high-quality images
-        - You can process multiple images at once
-        - Each image is processed individually with the same prompt
-        """)
-        # Connect the process button
-        process_btn.click(
-            fn=process_images_with_api,
-            inputs=[images_input, prompt_input, api_key_input],
-            outputs=[results_output]
         )
-    return demo
-# Create and launch the interface
-demo = create_gradio_interface()
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from transformers import AutoProcessor, AutoModelForCausalLM
+import torch
 from PIL import Image
+import io
+# Load the model and processor
+def load_model():
+    """Load the Qwen2-VL model"""
+    model_id = "Qwen/Qwen2-VL-7B-Instruct"
+    processor = AutoProcessor.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16,
+        device_map="auto"
+    )
+    return model, processor
+# Initialize model and processor
+model, processor = load_model()
+def process_single_image(image, prompt):
+    """Process a single image with the model"""
+    if image is None:
+        return "Please upload an image."
+    try:
+        # Convert Gradio image to PIL Image
+        if hasattr(image, 'name'):  # Gradio file object
+            pil_image = Image.open(image.name)
+        else:  # Numpy array
+            pil_image = Image.fromarray(image)
+        # Prepare the prompt
+        text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+        # Process the image and text
+        inputs = processor(
+            text=text,
+            images=pil_image,
+            return_tensors="pt"
+        )
+        # Generate response
+        with torch.no_grad():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9
+            )
+        # Decode the response
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        # Extract only the assistant's response
+        response = generated_text.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
+        return response
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+def process_multiple_images(images, prompt):
+    """Process multiple images with the same prompt"""
     if not images:
         return "Please upload at least one image."
     results = []
     for i, image in enumerate(images):
+        if image is not None:
+            result = process_single_image(image, prompt)
+            results.append(f"Image {i+1}: {result}")
+        else:
+            results.append(f"Image {i+1}: No image provided")
     return "\n\n".join(results)
+# Create the Gradio interface
+with gr.Blocks(
+    title="Multi-Image AI Processor",
+    theme=gr.themes.Soft(),
+    fill_height=True
+) as demo:
+    gr.Markdown("# 🖼️ Multi-Image AI Processor")
+    gr.Markdown("Upload multiple images and get AI-generated descriptions using the Qwen2-VL model.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Image upload area
+            images_input = gr.File(
+                file_count="multiple",
+                file_types=["image"],
+                label="Upload Images",
+                height=300
+            )
+            # Prompt input
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Describe this image in detail...",
+                value="Describe this image in detail.",
+                lines=3
+            )
+            # Process button
+            process_btn = gr.Button(
+                "🚀 Process Images",
+                variant="primary",
+                size="lg"
             )
+        with gr.Column(scale=2):
+            # Results area
+            results_output = gr.Textbox(
+                label="Results",
+                lines=15,
+                max_lines=25,
+                interactive=False
+            )
+    # Examples
+    with gr.Accordion("Example Prompts", open=False):
+        gr.Examples(
+            examples=[
+                [
+                    "Describe the architectural style and features of this building.",
+                    "Upload images of buildings to analyze their architectural style."
+                ],
+                [
+                    "What are the key features and amenities shown in this property?",
+                    "Upload property images to get detailed descriptions of features and amenities."
+                ],
+                [
+                    "Describe the interior design and layout of this space.",
+                    "Upload interior photos to get detailed descriptions of design and layout."
+                ],
+                [
+                    "What type of property is this and what are its main characteristics?",
+                    "Upload property images to identify type and characteristics."
+                ],
+                [
+                    "Describe the condition and quality of this property.",
+                    "Upload property images to assess condition and quality."
+                ]
+            ],
+            inputs=[prompt_input],
+            outputs=[results_output],
+            label="Example Prompts"
         )
+    # Footer
+    gr.Markdown("---")
+    gr.Markdown("""
+    **How to use:**
+    1. Upload one or more images
+    2. Enter a prompt describing what you want to know about the images
+    3. Click "Process Images" to get AI-generated descriptions
+    **Tips:**
+    - Use specific prompts for better results
+    - The model works best with clear, high-quality images
+    - You can process multiple images at once
+    - Each image is processed individually with the same prompt
+    """)
+    # Connect the process button
+    process_btn.click(
+        fn=process_multiple_images,
+        inputs=[images_input, prompt_input],
+        outputs=[results_output]
+    )
+# Launch the app
 if __name__ == "__main__":
     demo.launch()