PHI35VISION

Runtime error

App Files Files Community

aiqtech commited on 1 day ago

Commit

bf957c3

•

1 Parent(s): 36163a8

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -39

app.py CHANGED Viewed

@@ -5,16 +5,27 @@ import torch
 import gradio as gr
 from threading import Thread
 from PIL import Image
-from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
-from qwen_vl_utils import process_vision_info
 # Model and processor initialization
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    "Qwen/QVQ-72B-Preview",
-    torch_dtype="auto",
-    device_map="auto"
 )
-processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
 # Footer
 footer = """
@@ -38,47 +49,21 @@ def process_image(image, text_input=None):
         messages = [
             {
                 "role": "system",
-                "content": [
-                    {"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
-                ],
             },
             {
                 "role": "user",
                 "content": [
-                    {"type": "image", "image": image},
-                    {"type": "text", "text": text_input}
-                ],
             }
         ]
         # Process inputs
-        text = processor.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        image_inputs, video_inputs = process_vision_info(messages)
-        inputs = processor(
-            text=[text],
-            images=image_inputs,
-            videos=video_inputs,
-            padding=True,
-            return_tensors="pt",
-        )
-        inputs = inputs.to("cuda")
-        # Generate response
-        generated_ids = model.generate(**inputs, max_new_tokens=8192)
-        generated_ids_trimmed = [
-            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-        ]
-        output_text = processor.batch_decode(
-            generated_ids_trimmed,
-            skip_special_tokens=True,
-            clean_up_tokenization_spaces=False
-        )[0]
-        return output_text
     except Exception as e:
         return f"Error processing image: {str(e)}"

 import gradio as gr
 from threading import Thread
 from PIL import Image
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
 # Model and processor initialization
+model_name = "Qwen/QVQ-72B-Preview"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    trust_remote_code=True,
+    device_map="auto",
+    torch_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    trust_remote_code=True
+)
+processor = AutoProcessor.from_pretrained(
+    model_name,
+    trust_remote_code=True
 )
 # Footer
 footer = """
         messages = [
             {
                 "role": "system",
+                "content": "You are a helpful and harmless assistant."
             },
             {
                 "role": "user",
                 "content": [
+                    {"image": image},
+                    {"text": text_input}
+                ]
             }
         ]
         # Process inputs
+        response = model.chat(tokenizer, messages)
+        return response
     except Exception as e:
         return f"Error processing image: {str(e)}"