Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -48,7 +48,7 @@ from utils.lua_converter import LuaConverter
|
|
| 48 |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
| 49 |
from qwen_vl_utils import process_vision_info
|
| 50 |
import torch
|
| 51 |
-
from
|
| 52 |
from huggingface_hub import snapshot_download
|
| 53 |
import spaces
|
| 54 |
|
|
@@ -670,8 +670,6 @@ def get_llm_response_with_custom_prompt_stream(image_path, user_prompt, max_new_
|
|
| 670 |
except Exception as e:
|
| 671 |
yield f"❌ Error during inference: {str(e)}"
|
| 672 |
|
| 673 |
-
def process_upload(file):
|
| 674 |
-
return file
|
| 675 |
|
| 676 |
def compact_text(text):
|
| 677 |
"""
|
|
@@ -697,10 +695,20 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
|
|
| 697 |
and format the bounding box coordinates.
|
| 698 |
"""
|
| 699 |
global local_dict
|
| 700 |
-
if annotated_image_dict and annotated_image_dict["boxes"]:
|
| 701 |
# Get the last drawn box
|
| 702 |
input_image = annotated_image_dict["image"]
|
| 703 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 704 |
last_box = annotated_image_dict["boxes"][-1]
|
| 705 |
width, height = pil_image.width, pil_image.height
|
| 706 |
|
|
@@ -708,13 +716,46 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
|
|
| 708 |
ymin = last_box["ymin"] / height
|
| 709 |
xmax = last_box["xmax"] / width
|
| 710 |
ymax = last_box["ymax"] / height
|
| 711 |
-
|
| 712 |
-
local_dict[
|
| 713 |
# Format the coordinates into a string
|
| 714 |
|
| 715 |
-
return str([xmin, ymin, xmax, ymax]), "
|
| 716 |
return "No box drawn", prompt_original
|
| 717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
@spaces.GPU
|
| 719 |
def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
|
| 720 |
"""
|
|
@@ -731,16 +772,29 @@ def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, to
|
|
| 731 |
Yields:
|
| 732 |
list: Updated chat_history for Gradio UI updates (messages format)
|
| 733 |
"""
|
| 734 |
-
if image_dict is None:
|
| 735 |
yield [
|
| 736 |
{"role": "user", "content": "Please upload an image first! 📸"},
|
| 737 |
{"role": "assistant", "content": "I need an image to analyze before I can provide editing recommendations."}
|
| 738 |
-
]
|
| 739 |
return
|
|
|
|
|
|
|
| 740 |
image = image_dict['image']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
if not user_prompt.strip():
|
| 742 |
user_prompt = default_user_prompt
|
| 743 |
-
elif len(local_dict) > 0 and local_dict[image][0] != local_dict[image][2]:
|
| 744 |
user_prompt = user_prompt.replace('<box></box>', f'<box>{str(local_dict[image])}</box>')
|
| 745 |
|
| 746 |
|
|
@@ -1121,11 +1175,8 @@ def create_interface():
|
|
| 1121 |
# Input image upload component
|
| 1122 |
input_image = image_annotator(
|
| 1123 |
label="📸 Upload Your Image & Draw Bounding Box",
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
single_box=True,
|
| 1127 |
-
show_label=True,
|
| 1128 |
-
height=400
|
| 1129 |
)
|
| 1130 |
|
| 1131 |
# Prompt input
|
|
@@ -1280,17 +1331,12 @@ def create_interface():
|
|
| 1280 |
outputs=user_prompt
|
| 1281 |
)
|
| 1282 |
|
| 1283 |
-
# Event binding
|
| 1284 |
|
| 1285 |
input_image.change(
|
| 1286 |
-
fn=
|
| 1287 |
-
inputs=
|
| 1288 |
-
outputs=
|
| 1289 |
-
)
|
| 1290 |
-
input_image.upload(
|
| 1291 |
-
fn=process_upload,
|
| 1292 |
-
inputs=[input_image],
|
| 1293 |
-
outputs=[input_image]
|
| 1294 |
)
|
| 1295 |
# Main processing button - streaming output, pass all parameters
|
| 1296 |
process_btn.click(
|
|
|
|
| 48 |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
| 49 |
from qwen_vl_utils import process_vision_info
|
| 50 |
import torch
|
| 51 |
+
from lua2lrt import lua_to_lrtemplate
|
| 52 |
from huggingface_hub import snapshot_download
|
| 53 |
import spaces
|
| 54 |
|
|
|
|
| 670 |
except Exception as e:
|
| 671 |
yield f"❌ Error during inference: {str(e)}"
|
| 672 |
|
|
|
|
|
|
|
| 673 |
|
| 674 |
def compact_text(text):
|
| 675 |
"""
|
|
|
|
| 695 |
and format the bounding box coordinates.
|
| 696 |
"""
|
| 697 |
global local_dict
|
| 698 |
+
if annotated_image_dict and annotated_image_dict.get("boxes") and len(annotated_image_dict["boxes"]) > 0:
|
| 699 |
# Get the last drawn box
|
| 700 |
input_image = annotated_image_dict["image"]
|
| 701 |
+
|
| 702 |
+
# Handle both PIL Image and file path cases
|
| 703 |
+
if isinstance(input_image, str):
|
| 704 |
+
# If it's a file path
|
| 705 |
+
pil_image = Image.open(input_image)
|
| 706 |
+
image_key = input_image
|
| 707 |
+
else:
|
| 708 |
+
# If it's a PIL Image object
|
| 709 |
+
pil_image = input_image
|
| 710 |
+
image_key = str(input_image) # Use string representation as key
|
| 711 |
+
|
| 712 |
last_box = annotated_image_dict["boxes"][-1]
|
| 713 |
width, height = pil_image.width, pil_image.height
|
| 714 |
|
|
|
|
| 716 |
ymin = last_box["ymin"] / height
|
| 717 |
xmax = last_box["xmax"] / width
|
| 718 |
ymax = last_box["ymax"] / height
|
| 719 |
+
|
| 720 |
+
local_dict[image_key] = [xmin, ymin, xmax, ymax]
|
| 721 |
# Format the coordinates into a string
|
| 722 |
|
| 723 |
+
return str([xmin, ymin, xmax, ymax]), f"In the region <box>{str([xmin, ymin, xmax, ymax])}</box>, {prompt_original}"
|
| 724 |
return "No box drawn", prompt_original
|
| 725 |
|
| 726 |
+
def get_box_coordinates_simple(annotated_image_dict):
|
| 727 |
+
"""
|
| 728 |
+
Simplified version that matches test1.py pattern - only returns coordinates
|
| 729 |
+
"""
|
| 730 |
+
global local_dict
|
| 731 |
+
if annotated_image_dict and annotated_image_dict.get("boxes") and len(annotated_image_dict["boxes"]) > 0:
|
| 732 |
+
# Get the last drawn box
|
| 733 |
+
input_image = annotated_image_dict["image"]
|
| 734 |
+
|
| 735 |
+
# Handle both PIL Image and file path cases
|
| 736 |
+
if isinstance(input_image, str):
|
| 737 |
+
# If it's a file path
|
| 738 |
+
pil_image = Image.open(input_image)
|
| 739 |
+
image_key = input_image
|
| 740 |
+
else:
|
| 741 |
+
# If it's a PIL Image object
|
| 742 |
+
pil_image = input_image
|
| 743 |
+
image_key = str(input_image) # Use string representation as key
|
| 744 |
+
|
| 745 |
+
last_box = annotated_image_dict["boxes"][-1]
|
| 746 |
+
width, height = pil_image.width, pil_image.height
|
| 747 |
+
|
| 748 |
+
xmin = last_box["xmin"] / width
|
| 749 |
+
ymin = last_box["ymin"] / height
|
| 750 |
+
xmax = last_box["xmax"] / width
|
| 751 |
+
ymax = last_box["ymax"] / height
|
| 752 |
+
|
| 753 |
+
local_dict[image_key] = [xmin, ymin, xmax, ymax]
|
| 754 |
+
# Format the coordinates into a string
|
| 755 |
+
|
| 756 |
+
return str([xmin, ymin, xmax, ymax])
|
| 757 |
+
return "No bounding box drawn yet."
|
| 758 |
+
|
| 759 |
@spaces.GPU
|
| 760 |
def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
|
| 761 |
"""
|
|
|
|
| 772 |
Yields:
|
| 773 |
list: Updated chat_history for Gradio UI updates (messages format)
|
| 774 |
"""
|
| 775 |
+
if image_dict is None or image_dict.get('image') is None:
|
| 776 |
yield [
|
| 777 |
{"role": "user", "content": "Please upload an image first! 📸"},
|
| 778 |
{"role": "assistant", "content": "I need an image to analyze before I can provide editing recommendations."}
|
| 779 |
+
], None
|
| 780 |
return
|
| 781 |
+
|
| 782 |
+
# Extract image from the image_dict
|
| 783 |
image = image_dict['image']
|
| 784 |
+
|
| 785 |
+
# Handle the case where image is a PIL Image object - need to save it temporarily
|
| 786 |
+
if not isinstance(image, str):
|
| 787 |
+
import tempfile
|
| 788 |
+
import os
|
| 789 |
+
# Save PIL image to temporary file
|
| 790 |
+
temp_dir = tempfile.gettempdir()
|
| 791 |
+
temp_path = os.path.join(temp_dir, f"temp_image_{hash(str(image))}.png")
|
| 792 |
+
image.save(temp_path)
|
| 793 |
+
image = temp_path
|
| 794 |
+
|
| 795 |
if not user_prompt.strip():
|
| 796 |
user_prompt = default_user_prompt
|
| 797 |
+
elif len(local_dict) > 0 and image in local_dict and local_dict[image][0] != local_dict[image][2]:
|
| 798 |
user_prompt = user_prompt.replace('<box></box>', f'<box>{str(local_dict[image])}</box>')
|
| 799 |
|
| 800 |
|
|
|
|
| 1175 |
# Input image upload component
|
| 1176 |
input_image = image_annotator(
|
| 1177 |
label="📸 Upload Your Image & Draw Bounding Box",
|
| 1178 |
+
label_list=["region"], # 添加标签列表
|
| 1179 |
+
use_default_label=True # 自动使用第一个标签作为默认标签
|
|
|
|
|
|
|
|
|
|
| 1180 |
)
|
| 1181 |
|
| 1182 |
# Prompt input
|
|
|
|
| 1331 |
outputs=user_prompt
|
| 1332 |
)
|
| 1333 |
|
| 1334 |
+
# Event binding - simplified to match test1.py working pattern
|
| 1335 |
|
| 1336 |
input_image.change(
|
| 1337 |
+
fn=get_box_coordinates_simple,
|
| 1338 |
+
inputs=input_image,
|
| 1339 |
+
outputs=coordinates_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1340 |
)
|
| 1341 |
# Main processing button - streaming output, pass all parameters
|
| 1342 |
process_btn.click(
|