SmolVLM-trl-sft-ChartQA

Running on Zero

MaziyarPanahi commited on Aug 29, 2024

Commit

02558d9

verified ·

1 Parent(s): 7890490

Update app.py (#14)

- Update app.py (222ed92e2def4cc06102dfb624f32446cb93ebfb)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,30 @@ from qwen_vl_utils import process_vision_info
 import torch
 from PIL import Image
 import subprocess
 # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # models = {
 #     "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
 # }
 models = {
     "Qwen/Qwen2-VL-2B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto").cuda().eval()
@@ -31,7 +49,9 @@ prompt_suffix = "<|end|>\n"
 @spaces.GPU
 def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-2B-Instruct"):
-    print(image)
     model = models[model_id]
     processor = processors[model_id]
@@ -43,7 +63,7 @@ def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-2B-Instruct"):
             "content": [
                 {
                     "type": "image",
-                    "image": image[0],
                 },
                 {"type": "text", "text": text_input},
             ],

 import torch
 from PIL import Image
 import subprocess
+from datetime import datetime
 # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # models = {
 #     "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
 # }
+def array_to_image_path(image_array):
+    # Convert numpy array to PIL Image
+    img = Image.fromarray(np.uint8(image_array))
+    # Generate a unique filename using timestamp
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"image_{timestamp}.png"
+    # Save the image
+    img.save(filename)
+    # Get the full path of the saved image
+    full_path = os.path.abspath(filename)
+    return full_path
 models = {
     "Qwen/Qwen2-VL-2B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto").cuda().eval()
 @spaces.GPU
 def run_example(image, text_input=None, model_id="Qwen/Qwen2-VL-2B-Instruct"):
+    image_path = array_to_image_path(image)
+    print(image_path)
     model = models[model_id]
     processor = processors[model_id]
             "content": [
                 {
                     "type": "image",
+                    "image": image_path,
                 },
                 {"type": "text", "text": text_input},
             ],