LYL1015 commited on
Commit
3910317
·
verified ·
1 Parent(s): 06d6661

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -25
app.py CHANGED
@@ -48,7 +48,7 @@ from utils.lua_converter import LuaConverter
48
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
49
  from qwen_vl_utils import process_vision_info
50
  import torch
51
- from utils.lua2lrt import lua_to_lrtemplate
52
  from huggingface_hub import snapshot_download
53
  import spaces
54
 
@@ -670,8 +670,6 @@ def get_llm_response_with_custom_prompt_stream(image_path, user_prompt, max_new_
670
  except Exception as e:
671
  yield f"❌ Error during inference: {str(e)}"
672
 
673
- def process_upload(file):
674
- return file
675
 
676
  def compact_text(text):
677
  """
@@ -697,10 +695,20 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
697
  and format the bounding box coordinates.
698
  """
699
  global local_dict
700
- if annotated_image_dict and annotated_image_dict["boxes"]:
701
  # Get the last drawn box
702
  input_image = annotated_image_dict["image"]
703
- pil_image = Image.open(input_image)
 
 
 
 
 
 
 
 
 
 
704
  last_box = annotated_image_dict["boxes"][-1]
705
  width, height = pil_image.width, pil_image.height
706
 
@@ -708,13 +716,46 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
708
  ymin = last_box["ymin"] / height
709
  xmax = last_box["xmax"] / width
710
  ymax = last_box["ymax"] / height
711
-
712
- local_dict[input_image] = [xmin, ymin, xmax, ymax]
713
  # Format the coordinates into a string
714
 
715
- return str([xmin, ymin, xmax, ymax]), " In the region <box></box>, xxx"
716
  return "No box drawn", prompt_original
717
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  @spaces.GPU
719
  def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
720
  """
@@ -731,16 +772,29 @@ def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, to
731
  Yields:
732
  list: Updated chat_history for Gradio UI updates (messages format)
733
  """
734
- if image_dict is None:
735
  yield [
736
  {"role": "user", "content": "Please upload an image first! 📸"},
737
  {"role": "assistant", "content": "I need an image to analyze before I can provide editing recommendations."}
738
- ]
739
  return
 
 
740
  image = image_dict['image']
 
 
 
 
 
 
 
 
 
 
 
741
  if not user_prompt.strip():
742
  user_prompt = default_user_prompt
743
- elif len(local_dict) > 0 and local_dict[image][0] != local_dict[image][2]:
744
  user_prompt = user_prompt.replace('<box></box>', f'<box>{str(local_dict[image])}</box>')
745
 
746
 
@@ -1121,11 +1175,8 @@ def create_interface():
1121
  # Input image upload component
1122
  input_image = image_annotator(
1123
  label="📸 Upload Your Image & Draw Bounding Box",
1124
- disable_edit_boxes=True,
1125
- image_type="filepath",
1126
- single_box=True,
1127
- show_label=True,
1128
- height=400
1129
  )
1130
 
1131
  # Prompt input
@@ -1280,17 +1331,12 @@ def create_interface():
1280
  outputs=user_prompt
1281
  )
1282
 
1283
- # Event binding
1284
 
1285
  input_image.change(
1286
- fn=get_box_coordinates,
1287
- inputs=[input_image, user_prompt],
1288
- outputs=[coordinates_output, user_prompt]
1289
- )
1290
- input_image.upload(
1291
- fn=process_upload,
1292
- inputs=[input_image],
1293
- outputs=[input_image]
1294
  )
1295
  # Main processing button - streaming output, pass all parameters
1296
  process_btn.click(
 
48
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
49
  from qwen_vl_utils import process_vision_info
50
  import torch
51
+ from lua2lrt import lua_to_lrtemplate
52
  from huggingface_hub import snapshot_download
53
  import spaces
54
 
 
670
  except Exception as e:
671
  yield f"❌ Error during inference: {str(e)}"
672
 
 
 
673
 
674
  def compact_text(text):
675
  """
 
695
  and format the bounding box coordinates.
696
  """
697
  global local_dict
698
+ if annotated_image_dict and annotated_image_dict.get("boxes") and len(annotated_image_dict["boxes"]) > 0:
699
  # Get the last drawn box
700
  input_image = annotated_image_dict["image"]
701
+
702
+ # Handle both PIL Image and file path cases
703
+ if isinstance(input_image, str):
704
+ # If it's a file path
705
+ pil_image = Image.open(input_image)
706
+ image_key = input_image
707
+ else:
708
+ # If it's a PIL Image object
709
+ pil_image = input_image
710
+ image_key = str(input_image) # Use string representation as key
711
+
712
  last_box = annotated_image_dict["boxes"][-1]
713
  width, height = pil_image.width, pil_image.height
714
 
 
716
  ymin = last_box["ymin"] / height
717
  xmax = last_box["xmax"] / width
718
  ymax = last_box["ymax"] / height
719
+
720
+ local_dict[image_key] = [xmin, ymin, xmax, ymax]
721
  # Format the coordinates into a string
722
 
723
+ return str([xmin, ymin, xmax, ymax]), f"In the region <box>{str([xmin, ymin, xmax, ymax])}</box>, {prompt_original}"
724
  return "No box drawn", prompt_original
725
 
726
+ def get_box_coordinates_simple(annotated_image_dict):
727
+ """
728
+ Simplified version that matches test1.py pattern - only returns coordinates
729
+ """
730
+ global local_dict
731
+ if annotated_image_dict and annotated_image_dict.get("boxes") and len(annotated_image_dict["boxes"]) > 0:
732
+ # Get the last drawn box
733
+ input_image = annotated_image_dict["image"]
734
+
735
+ # Handle both PIL Image and file path cases
736
+ if isinstance(input_image, str):
737
+ # If it's a file path
738
+ pil_image = Image.open(input_image)
739
+ image_key = input_image
740
+ else:
741
+ # If it's a PIL Image object
742
+ pil_image = input_image
743
+ image_key = str(input_image) # Use string representation as key
744
+
745
+ last_box = annotated_image_dict["boxes"][-1]
746
+ width, height = pil_image.width, pil_image.height
747
+
748
+ xmin = last_box["xmin"] / width
749
+ ymin = last_box["ymin"] / height
750
+ xmax = last_box["xmax"] / width
751
+ ymax = last_box["ymax"] / height
752
+
753
+ local_dict[image_key] = [xmin, ymin, xmax, ymax]
754
+ # Format the coordinates into a string
755
+
756
+ return str([xmin, ymin, xmax, ymax])
757
+ return "No bounding box drawn yet."
758
+
759
  @spaces.GPU
760
  def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
761
  """
 
772
  Yields:
773
  list: Updated chat_history for Gradio UI updates (messages format)
774
  """
775
+ if image_dict is None or image_dict.get('image') is None:
776
  yield [
777
  {"role": "user", "content": "Please upload an image first! 📸"},
778
  {"role": "assistant", "content": "I need an image to analyze before I can provide editing recommendations."}
779
+ ], None
780
  return
781
+
782
+ # Extract image from the image_dict
783
  image = image_dict['image']
784
+
785
+ # Handle the case where image is a PIL Image object - need to save it temporarily
786
+ if not isinstance(image, str):
787
+ import tempfile
788
+ import os
789
+ # Save PIL image to temporary file
790
+ temp_dir = tempfile.gettempdir()
791
+ temp_path = os.path.join(temp_dir, f"temp_image_{hash(str(image))}.png")
792
+ image.save(temp_path)
793
+ image = temp_path
794
+
795
  if not user_prompt.strip():
796
  user_prompt = default_user_prompt
797
+ elif len(local_dict) > 0 and image in local_dict and local_dict[image][0] != local_dict[image][2]:
798
  user_prompt = user_prompt.replace('<box></box>', f'<box>{str(local_dict[image])}</box>')
799
 
800
 
 
1175
  # Input image upload component
1176
  input_image = image_annotator(
1177
  label="📸 Upload Your Image & Draw Bounding Box",
1178
+ label_list=["region"], # 添加标签列表
1179
+ use_default_label=True # 自动使用第一个标签作为默认标签
 
 
 
1180
  )
1181
 
1182
  # Prompt input
 
1331
  outputs=user_prompt
1332
  )
1333
 
1334
+ # Event binding - simplified to match test1.py working pattern
1335
 
1336
  input_image.change(
1337
+ fn=get_box_coordinates_simple,
1338
+ inputs=input_image,
1339
+ outputs=coordinates_output
 
 
 
 
 
1340
  )
1341
  # Main processing button - streaming output, pass all parameters
1342
  process_btn.click(