LYL1015 commited on
Commit
9d03193
·
verified ·
1 Parent(s): cf37e59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -525
app.py CHANGED
@@ -2,33 +2,6 @@ import os
2
  import subprocess
3
  import os
4
  from pathlib import Path
5
- BASE_DIR = Path("/home/user/app")
6
- commands = [
7
- ("python -V", BASE_DIR),
8
- ("pip install -r my_requirements.txt", BASE_DIR)
9
- ]
10
-
11
- def run_command(cmd, cwd=None):
12
- try:
13
- result = subprocess.run(
14
- cmd, # 注意:这里不再使用 shlex.split()
15
- cwd=str(cwd) if cwd else None,
16
- shell=True, # 需要 shell=True 来支持 && 等操作符
17
- check=True,
18
- stdout=subprocess.PIPE,
19
- stderr=subprocess.PIPE,
20
- text=True
21
- )
22
- print(f"[SUCCESS] {cmd}")
23
- if result.stdout: print(result.stdout)
24
- return True
25
- except subprocess.CalledProcessError as e:
26
- print(f"[FAILED] {cmd}")
27
- print(f"Error: {e.stderr}")
28
- return False
29
-
30
- for cmd, cwd in commands:
31
- run_command(cmd, cwd)
32
 
33
  import re
34
  import gradio as gr
@@ -45,12 +18,7 @@ from gradio_image_annotation import image_annotator
45
  from werkzeug.utils import secure_filename # Add this import
46
  from utils.system_prompt import SHORT_SYSTEM_PROMPT_WITH_THINKING
47
  from utils.lua_converter import LuaConverter
48
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
49
- from qwen_vl_utils import process_vision_info
50
- import torch
51
  from utils.lua2lrt import lua_to_lrtemplate
52
- from huggingface_hub import snapshot_download
53
- import spaces
54
 
55
 
56
  def extract_json_from_answer(answer):
@@ -178,40 +146,6 @@ def json_to_lua(json_data, save_folder, filename="config.lua"):
178
  except Exception as e:
179
  return None, f"Error in json_to_lua: {str(e)}"
180
 
181
- # Model downloader
182
- def download_tools_ckpts(target_dir, url):
183
- from huggingface_hub import snapshot_download
184
- import os
185
- import shutil
186
-
187
- tmp_dir = "hf_temp_download"
188
- os.makedirs(tmp_dir, exist_ok=True)
189
-
190
- snapshot_download(
191
- repo_id="JarvisArt/JarvisArt-Preview",
192
- repo_type="model",
193
- local_dir=tmp_dir,
194
- allow_patterns=os.path.join(url, "**"),
195
- local_dir_use_symlinks=False,
196
- )
197
-
198
- src_dir = os.path.join(tmp_dir, url)
199
-
200
-
201
- shutil.copytree(src_dir, target_dir)
202
-
203
- shutil.rmtree(tmp_dir)
204
- def download_model(model_path):
205
- """
206
- Download model from HuggingFace if not exists locally
207
-
208
- Args:
209
- model_path (str): Path to save the model
210
- """
211
- if not os.path.exists(model_path):
212
- download_tools_ckpts(model_path, "pretrained/preview")
213
- else:
214
- print(f"✅ Model already exists at {model_path}")
215
 
216
  # Local model client class
217
  class LocalModelClient:
@@ -226,42 +160,13 @@ class LocalModelClient:
226
  self.model = None
227
  self.processor = None
228
  self.model_loaded = False
229
-
230
- # Download model if needed
231
- download_model(model_path)
232
 
233
  # Load model
234
  self._load_model()
235
 
236
  def _load_model(self):
237
- """
238
- Load the model and processor
239
- """
240
- try:
241
- print(f"🔄 Loading model from {self.model_path}...")
242
-
243
- # Model configuration
244
- min_pixels = 256 * 28 * 28
245
- max_pixels = 1280 * 28 * 28
246
-
247
- # Load model
248
- self.model = Qwen2VLForConditionalGeneration.from_pretrained(
249
- self.model_path,
250
- torch_dtype="auto",
251
- device_map="auto",
252
- min_pixels=min_pixels,
253
- max_pixels=max_pixels
254
- )
255
-
256
- # Load processor
257
- self.processor = AutoProcessor.from_pretrained(self.model_path)
258
-
259
- print(f"✅ Model loaded successfully from {self.model_path}")
260
- self.model_loaded = True
261
-
262
- except Exception as e:
263
- print(f"❌ Model loading failed: {str(e)}")
264
- self.model_loaded = False
265
 
266
  def chat(self, messages, system=None, images=None, **kwargs):
267
  """
@@ -328,31 +233,8 @@ class LocalModelClient:
328
  formatted_messages, tokenize=False, add_generation_prompt=True
329
  )
330
 
331
- # Process vision info
332
- image_inputs, video_inputs = process_vision_info(formatted_messages)
333
-
334
- # Prepare inputs
335
- inputs = self.processor(
336
- text=[text],
337
- images=image_inputs,
338
- videos=video_inputs,
339
- padding=True,
340
- return_tensors="pt",
341
- )
342
-
343
- # Move inputs to device
344
- device = next(self.model.parameters()).device
345
- inputs = inputs.to(device)
346
-
347
- # Generate response
348
- generated_ids = self.model.generate(**inputs, max_new_tokens=10240)
349
- generated_ids_trimmed = [
350
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
351
- ]
352
- output_text = self.processor.batch_decode(
353
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
354
- )
355
 
 
356
  # Create Response object for compatibility
357
  class Response:
358
  def __init__(self, text):
@@ -484,13 +366,13 @@ def parse_args():
484
  parser.add_argument(
485
  "--server_port",
486
  type=int,
487
- default=7860, # Change to standard Gradio port
488
  help="Port for the Gradio server"
489
  )
490
  parser.add_argument(
491
  "--server_name",
492
  type=str,
493
- default="0.0.0.0",
494
  help="Server name/IP for the Gradio server"
495
  )
496
  parser.add_argument(
@@ -710,7 +592,7 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
710
  image_key = str(input_image) # Use string representation as key
711
 
712
  last_box = annotated_image_dict["boxes"][-1]
713
- width, height = pil_image.width, pil_image.height
714
 
715
  xmin = last_box["xmin"] / width
716
  ymin = last_box["ymin"] / height
@@ -719,7 +601,7 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
719
 
720
  local_dict[image_key] = [xmin, ymin, xmax, ymax]
721
  # Format the coordinates into a string
722
-
723
  return str([xmin, ymin, xmax, ymax]), f"In the region <box>{str([xmin, ymin, xmax, ymax])}</box>, {prompt_original}"
724
  return "No box drawn", prompt_original
725
 
@@ -743,7 +625,7 @@ def get_box_coordinates_simple(annotated_image_dict):
743
  image_key = str(input_image) # Use string representation as key
744
 
745
  last_box = annotated_image_dict["boxes"][-1]
746
- width, height = pil_image.width, pil_image.height
747
 
748
  xmin = last_box["xmin"] / width
749
  ymin = last_box["ymin"] / height
@@ -752,406 +634,12 @@ def get_box_coordinates_simple(annotated_image_dict):
752
 
753
  local_dict[image_key] = [xmin, ymin, xmax, ymax]
754
  # Format the coordinates into a string
755
-
756
  return str([xmin, ymin, xmax, ymax])
757
  return "No bounding box drawn yet."
758
 
759
- @spaces.GPU
760
  def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
761
- """
762
- Main analysis pipeline with streaming output, modern chat interface style, and image display support
763
-
764
- Args:
765
- image (str): Path to the input image
766
- user_prompt (str): User-defined prompt for analysis
767
- max_new_tokens (int): Maximum number of new tokens to generate
768
- top_k (int): Top-k sampling parameter
769
- top_p (float): Top-p (nucleus) sampling parameter
770
- temperature (float): Temperature for sampling
771
-
772
- Yields:
773
- list: Updated chat_history for Gradio UI updates (messages format)
774
- """
775
- if image_dict is None or image_dict.get('image') is None:
776
- yield [
777
- {"role": "user", "content": "Please upload an image first! 📸"},
778
- {"role": "assistant", "content": "I need an image to analyze before I can provide editing recommendations."}
779
- ], None
780
- return
781
-
782
- # Extract image from the image_dict
783
- image = image_dict['image']
784
-
785
- # Handle the case where image is a PIL Image object - need to save it temporarily
786
- if not isinstance(image, str):
787
- import tempfile
788
- import os
789
- # Save PIL image to temporary file
790
- temp_dir = tempfile.gettempdir()
791
- temp_path = os.path.join(temp_dir, f"temp_image_{hash(str(image))}.png")
792
- image.save(temp_path)
793
- image = temp_path
794
-
795
- if not user_prompt.strip():
796
- user_prompt = default_user_prompt
797
- elif len(local_dict) > 0 and image in local_dict and local_dict[image][0] != local_dict[image][2]:
798
- user_prompt = user_prompt.replace('<box></box>', f'<box>{str(local_dict[image])}</box>')
799
-
800
-
801
- try:
802
- # Initialize chat history with user message including image
803
- chat_history = []
804
-
805
- # Create user message with image and instructions - using messages format
806
- user_message_text = f"**Instructions:** {user_prompt}".replace('<box>', f'(').replace('</box>', f')')
807
-
808
- # Add user message with image
809
- if image_dict:
810
- # For messages format, we need to handle images differently
811
- # First add the image
812
- chat_history.append({
813
- "role": "user",
814
- "content": {
815
- "path": image,
816
- "mime_type": "image/jpeg"
817
- }
818
- })
819
- # Then add text message
820
- chat_history.append({
821
- "role": "user",
822
- "content": user_message_text
823
- })
824
- else:
825
- chat_history.append({
826
- "role": "user",
827
- "content": user_message_text
828
- })
829
- yield chat_history, None
830
-
831
- # JarvisArt starts responding
832
- chat_history.append({
833
- "role": "assistant",
834
- "content": "<div style='margin:0;padding:0'>🎨 <strong style='margin:0;padding:0'>JarvisArt is analyzing your image...</strong><br/><em>Please wait while I examine the details and understand your vision.</em></div>"
835
- })
836
- ai_message_index = len(chat_history) - 1 # Record AI message index position
837
- recommendations_index = None # Initialize recommendations message index
838
- yield chat_history, None
839
-
840
- # Get streaming response
841
- full_response = ""
842
- token_count = 0
843
- update_frequency = 8 # Reduce update frequency for smoother experience
844
-
845
- # Stage marker
846
- stage = "starting" # starting, thinking, answer, completed
847
- answer_completed = False # Flag to track if answer is completed
848
-
849
- for new_token in get_llm_response_with_custom_prompt_stream(
850
- image, user_prompt, max_new_tokens, top_k, top_p, temperature
851
- ):
852
- full_response += new_token
853
- token_count += 1
854
-
855
- # Detect thinking stage
856
- if "<think>" in full_response and stage == "starting":
857
- stage = "thinking"
858
- chat_history[ai_message_index] = {
859
- "role": "assistant",
860
- "content": "💭 **Thinking Process**\n*Analyzing image characteristics and understanding your creative vision...*"
861
- }
862
- yield chat_history, None
863
- continue
864
-
865
- # Thinking completed
866
- if "</think>" in full_response and stage == "thinking":
867
- stage = "between"
868
- think_match = re.search(r'<think>(.*?)</think>', full_response, re.DOTALL)
869
- if think_match:
870
- thinking_content = think_match.group(1).strip()
871
- # Use the compact_text function to process text
872
- thinking_content = compact_text(thinking_content).replace('<box>', f'(').replace('</box>', f')')
873
- # Use special formatting to force eliminate spacing
874
- formatted_thinking = f"<div style='margin:0;padding:0'>💭 <strong style='margin:0;padding:0'>Thinking</strong><div style='margin:0;padding:0'>{thinking_content}</div></div>"
875
- chat_history[ai_message_index] = {
876
- "role": "assistant",
877
- "content": formatted_thinking
878
- }
879
- yield chat_history, None
880
- continue
881
-
882
- # Detect answer stage
883
- if "<answer>" in full_response and stage in ["between", "thinking"]:
884
- stage = "answer"
885
- # Use special formatting to force eliminate spacing
886
- initial_recommendations = "<div style='margin:0;padding:0;margin-top:-30px'>✨ <strong style='margin:0;padding:0'>Professional Editing Recommendations</strong><div style='margin:0;padding:0'>*Generating personalized editing suggestions...*</div></div>"
887
- chat_history.append({
888
- "role": "assistant",
889
- "content": initial_recommendations
890
- })
891
- recommendations_index = len(chat_history) - 1 # Record recommendations message index
892
- yield chat_history, None
893
- continue
894
-
895
- # Answer completed
896
- if "</answer>" in full_response and stage == "answer" and not answer_completed:
897
- stage = "completed"
898
- answer_completed = True
899
- answer_match = re.search(r'<answer>(.*?)</answer>', full_response, re.DOTALL)
900
- if answer_match:
901
- answer_content = answer_match.group(1).strip()
902
- # Use the compact_text function to process text
903
- answer_content = compact_text(answer_content)
904
-
905
- # Use special formatting to force eliminate spacing
906
- formatted_answer = f"<div style='margin:0;padding:0;margin-top:-30px'>✨ <strong style='margin:0;padding:0'>Professional Editing Recommendations</strong><div style='margin:0;padding:0'>{answer_content}</div></div>"
907
-
908
- chat_history[recommendations_index] = {
909
- "role": "assistant",
910
- "content": formatted_answer
911
- }
912
- yield chat_history
913
- # Don't break here - continue to Final completion for JSON extraction
914
-
915
- # Real-time content updates (reduced frequency) - only if answer not completed
916
- if token_count % update_frequency == 0 and not answer_completed:
917
- if stage == "thinking":
918
- current_thinking = full_response.split("<think>")[-1].replace("</think>", "").strip()
919
- if current_thinking and len(current_thinking) > 20: # Avoid displaying too short content
920
- # Use the compact_text function to process text
921
- current_thinking = compact_text(current_thinking)
922
- # Use special formatting to force eliminate spacing
923
- formatted_thinking = f"<div style='margin:0;padding:0'>💭 <strong style='margin:0;padding:0'>Thinking</strong><div style='margin:0;padding:0'>{current_thinking}...<br/><em>Still analyzing...</em></div></div>"
924
- chat_history[ai_message_index] = {
925
- "role": "assistant",
926
- "content": formatted_thinking
927
- }
928
- yield chat_history
929
-
930
- elif stage == "answer":
931
- current_answer = full_response.split("<answer>")[-1].replace("</answer>", "").strip()
932
- if current_answer and len(current_answer) > 30: # Avoid displaying too short content
933
- # Use the compact_text function to process text
934
- current_answer = compact_text(current_answer)
935
- # Use special formatting to force eliminate spacing
936
- formatted_answer = f"<div style='margin:0;padding:0;margin-top:-30px'>✨ <strong style='margin:0;padding:0'>JarvisArt Recommendations</strong><div style='margin:0;padding:0'>{current_answer}...<br/><em>Generating more suggestions...</em></div></div>"
937
- if recommendations_index is not None:
938
- chat_history[recommendations_index] = {
939
- "role": "assistant",
940
- "content": formatted_answer
941
- }
942
- else:
943
- chat_history.append({
944
- "role": "assistant",
945
- "content": formatted_answer
946
- })
947
- recommendations_index = len(chat_history) - 1
948
- yield chat_history, None
949
-
950
- # Final completion
951
- if stage == "completed":
952
- # Analysis is complete, now process and save lua files
953
- print(f"🔍 Debug: Final completion stage reached")
954
- answer_match = re.search(r'<answer>(.*?)</answer>', full_response, re.DOTALL)
955
- if answer_match:
956
- answer_content = answer_match.group(1).strip()
957
- print(f"🔍 Debug: Extracted answer content (first 200 chars): {answer_content[:200]}...")
958
-
959
- # Extract JSON objects from the answer
960
- json_objects = extract_json_from_answer(answer_content)
961
- print(f"🔍 Debug: Found {len(json_objects)} JSON objects")
962
-
963
- # Save JSON objects as lua files
964
- if json_objects:
965
- print(f"🔍 Debug: Processing {len(json_objects)} JSON objects for conversion")
966
- conversion_index = None
967
- chat_history.append({
968
- "role": "assistant",
969
- "content": "<div style='margin:0;padding:0;margin-top:-20px'>⚙️ <strong style='margin:0;padding:0'>Lightroom Configuration Converting...</strong><br/><em>Converting editing parameters to Lightroom-compatible format...</em></div>"
970
- })
971
- conversion_index = len(chat_history) - 1
972
- yield chat_history
973
-
974
- # Create lua_results folder in the same directory as this script
975
- script_dir = os.path.dirname(os.path.abspath(__file__))
976
- results_dir = os.path.join(script_dir, "results")
977
- os.makedirs(results_dir, exist_ok=True)
978
-
979
- # Generate timestamp for unique session folder name
980
- timestamp = int(time.time())
981
- session_folder_name = f"example_{timestamp}"
982
- session_dir = os.path.join(results_dir, session_folder_name)
983
- os.makedirs(session_dir, exist_ok=True)
984
-
985
- # Copy the uploaded image to the session folder
986
- import shutil
987
- # Use secure_filename and hash to generate unique original image filename, avoiding conflicts with processed images
988
- original_filename = secure_filename(os.path.basename(image))
989
- file_hash = hashlib.md5(f"{original_filename}_{time.time()}".encode()).hexdigest()
990
-
991
- # Keep original extension
992
- file_ext = os.path.splitext(original_filename)[1] or '.jpg'
993
- unique_original_filename = f"original_{file_hash}{file_ext}"
994
-
995
- image_dest_path = os.path.join(session_dir, unique_original_filename)
996
- shutil.copy2(image, image_dest_path)
997
-
998
- # Save the full model response to a text file
999
- response_file_path = os.path.join(session_dir, "full_response.txt")
1000
- with open(response_file_path, "w", encoding="utf-8") as f:
1001
- f.write(full_response)
1002
-
1003
- # Save user prompt to a text file
1004
- prompt_file_path = os.path.join(session_dir, "user_prompt.txt")
1005
- with open(prompt_file_path, "w", encoding="utf-8") as f:
1006
- f.write(user_prompt)
1007
-
1008
- saved_files = []
1009
- lrtemplate_files = []
1010
- for i, json_obj in enumerate(json_objects):
1011
- filename = f"config_{i+1}.lua"
1012
- lua_path, error = json_to_lua(json_obj, session_dir, filename)
1013
-
1014
- if lua_path:
1015
- saved_files.append(lua_path)
1016
- print(f"✅ Saved Lua config: {lua_path}")
1017
-
1018
- # Convert lua to lrtemplate
1019
- try:
1020
- lrtemplate_path = lua_to_lrtemplate(lua_path)
1021
- lrtemplate_files.append(lrtemplate_path)
1022
- print(f"✅ Converted to Lightroom preset: {lrtemplate_path}")
1023
- except Exception as e:
1024
- print(f"⚠️ Failed to convert {lua_path} to lrtemplate: {str(e)}")
1025
- else:
1026
- print(f"❌ Failed to save Lua config {i+1}: {error}")
1027
-
1028
-
1029
- # Update file save notification
1030
- if saved_files:
1031
- save_notification = "<div style='margin:0;padding:0;margin-top:-20px'>"
1032
- save_notification += "✅ <strong style='margin:0;padding:0'>Files saved successfully!</strong><br/>"
1033
- save_notification += "📁 <strong>Save location:</strong> <code>results/" + session_folder_name + "/</code><br/>"
1034
- save_notification += "📄 <strong>Generated files:</strong><br/>"
1035
- save_notification += " • Original image: <code>" + unique_original_filename + "</code><br/>"
1036
- save_notification += " • Full response: <code>full_response.txt</code><br/>"
1037
- save_notification += " • User prompt: <code>user_prompt.txt</code><br/>"
1038
- save_notification += " • Config files: " + str(len(saved_files)) + " files"
1039
- save_notification += "<br/> • Lightroom presets: " + str(len(lrtemplate_files)) + " files"
1040
-
1041
- save_notification += "<br/><strong>Config files:</strong>"
1042
- for i, file_path in enumerate(saved_files):
1043
- filename = os.path.basename(file_path)
1044
- save_notification += "<br/> • <code>" + filename + "</code>"
1045
-
1046
- save_notification += "<br/><strong>Lightroom Presets:</strong>"
1047
- for i, file_path in enumerate(lrtemplate_files):
1048
- filename = os.path.basename(file_path)
1049
- save_notification += "<br/> • <code>" + filename + "</code>"
1050
-
1051
- save_notification += "<br/><br/>🎨 <strong>How to use Lightroom Presets:</strong>"
1052
- save_notification += "<br/>1. Open Adobe Lightroom"
1053
- save_notification += "<br/>2. Go to the <strong>Presets</strong> panel"
1054
- save_notification += "<br/>3. Click on the <strong>+</strong> icon"
1055
- save_notification += "<br/>4. Select <strong>Import Presets</strong>"
1056
- save_notification += "<br/>5. Choose the <code>.lrtemplate</code> file(s) and click <strong>Import</strong>"
1057
- save_notification += "<br/><br/>The imported presets will now be available in your Presets panel for use on your photos."
1058
- save_notification += "</div>"
1059
-
1060
- # Use the compact_text function to process text
1061
- save_notification = compact_text(save_notification)
1062
-
1063
- # Update conversion message
1064
- if conversion_index is not None:
1065
- chat_history[conversion_index] = {
1066
- "role": "assistant",
1067
- "content": save_notification
1068
- }
1069
- else:
1070
- # Show conversion failed message
1071
- if conversion_index is not None:
1072
- chat_history[conversion_index] = {
1073
- "role": "assistant",
1074
- "content": "<div style='margin:0;padding:0;margin-top:-20px'>❌ <strong style='margin:0;padding:0'>Lightroom config conversion failed</strong><br/><em>No valid configuration data found in recommendations.</em></div>"
1075
- }
1076
- else:
1077
- print(f"🔍 Debug: No JSON objects found, adding debug message to chat")
1078
- # Add debug message to show what was found
1079
- debug_msg = "<div style='margin:0;padding:0;margin-top:-20px'>"
1080
- debug_msg += "🔍 <strong style='margin:0;padding:0'>Debug Information</strong><br/>"
1081
- debug_msg += "<strong>Answer Content Preview:</strong><br/><pre style='margin:0;padding:4px'>" + answer_content[:500] + "...</pre><br/>"
1082
- debug_msg += "<strong>Extraction Attempted:</strong> No valid JSON objects found in the recommendations."
1083
- debug_msg += "</div>"
1084
-
1085
- # Use the compact_text function to process text
1086
- debug_msg = compact_text(debug_msg)
1087
-
1088
- chat_history.append({
1089
- "role": "assistant",
1090
- "content": debug_msg
1091
- })
1092
- else:
1093
- print(f"🔍 Debug: No answer match found in full_response")
1094
- else:
1095
- # If not ended normally, try to parse and format final response
1096
- print(f"🔍 Debug: Non-normal completion, stage: {stage}")
1097
- think_match = re.search(r'<think>(.*?)</think>', full_response, re.DOTALL)
1098
- answer_match = re.search(r'<answer>(.*?)</answer>', full_response, re.DOTALL)
1099
-
1100
- if think_match:
1101
- thinking_content = think_match.group(1).strip()
1102
- formatted_thinking = f"💭 **Thinking**\n{thinking_content}"
1103
- chat_history[ai_message_index] = {
1104
- "role": "assistant",
1105
- "content": formatted_thinking
1106
- }
1107
-
1108
- if answer_match:
1109
- answer_content = answer_match.group(1).strip()
1110
- formatted_answer = f"✨ **Professional Editing Recommendations**\n{answer_content}"
1111
- if recommendations_index is not None:
1112
- chat_history[recommendations_index] = {
1113
- "role": "assistant",
1114
- "content": formatted_answer
1115
- }
1116
- else:
1117
- chat_history.append({
1118
- "role": "assistant",
1119
- "content": formatted_answer
1120
- })
1121
-
1122
- # Extract and save JSON objects from answer even if not completed normally
1123
- json_objects = extract_json_from_answer(answer_content)
1124
- print(f"🔍 Debug: Non-normal completion found {len(json_objects)} JSON objects")
1125
-
1126
- if json_objects:
1127
- # Show Lightroom configuration conversion in progress
1128
- conversion_index = None
1129
- chat_history.append({
1130
- "role": "assistant",
1131
- "content": "<div style='margin:0;padding:0;margin-top:-20px'>⚙️ <strong style='margin:0;padding:0'>Lightroom Configuration Converting...</strong><br/><em>Converting editing parameters to Lightroom-compatible format...</em></div>"
1132
- })
1133
- conversion_index = len(chat_history) - 1
1134
- yield chat_history, None
1135
-
1136
- # Same processing logic... (omitting repetitive code here for brevity)
1137
- # [Continue processing logic, format as above]
1138
-
1139
- # Prepare download files if available
1140
- download_file_list = []
1141
- if latest_session_dir and os.path.exists(latest_session_dir):
1142
- for file in os.listdir(latest_session_dir):
1143
- if file.endswith('.lrtemplate'):
1144
- download_file_list.append(os.path.join(latest_session_dir, file))
1145
-
1146
- yield chat_history, download_file_list if download_file_list else None
1147
-
1148
- except Exception as e:
1149
- error_msg = f"❌ **Oops! Something went wrong**\n\n```\nError: {str(e)}\n```\n\n💡 **Try again with:**\n- A different image format\n- A simpler description\n- Refreshing the page"
1150
- chat_history = [
1151
- {"role": "user", "content": "Image analysis request"},
1152
- {"role": "assistant", "content": error_msg}
1153
- ]
1154
- yield chat_history, None
1155
 
1156
  # Create Gradio interface
1157
  def create_interface():
@@ -1334,9 +822,9 @@ def create_interface():
1334
  # Event binding - simplified to match test1.py working pattern
1335
 
1336
  input_image.change(
1337
- fn=get_box_coordinates_simple,
1338
- inputs=input_image,
1339
- outputs=coordinates_output
1340
  )
1341
  # Main processing button - streaming output, pass all parameters
1342
  process_btn.click(
 
2
  import subprocess
3
  import os
4
  from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  import re
7
  import gradio as gr
 
18
  from werkzeug.utils import secure_filename # Add this import
19
  from utils.system_prompt import SHORT_SYSTEM_PROMPT_WITH_THINKING
20
  from utils.lua_converter import LuaConverter
 
 
 
21
  from utils.lua2lrt import lua_to_lrtemplate
 
 
22
 
23
 
24
  def extract_json_from_answer(answer):
 
146
  except Exception as e:
147
  return None, f"Error in json_to_lua: {str(e)}"
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  # Local model client class
151
  class LocalModelClient:
 
160
  self.model = None
161
  self.processor = None
162
  self.model_loaded = False
163
+
 
 
164
 
165
  # Load model
166
  self._load_model()
167
 
168
  def _load_model(self):
169
+ print(f"🔍 Loading model from {self.model_path}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  def chat(self, messages, system=None, images=None, **kwargs):
172
  """
 
233
  formatted_messages, tokenize=False, add_generation_prompt=True
234
  )
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ output_text = "hello world" # Placeholder for actual model output
238
  # Create Response object for compatibility
239
  class Response:
240
  def __init__(self, text):
 
366
  parser.add_argument(
367
  "--server_port",
368
  type=int,
369
+ default=7861, # Change to standard Gradio port
370
  help="Port for the Gradio server"
371
  )
372
  parser.add_argument(
373
  "--server_name",
374
  type=str,
375
+ default="127.0.0.1",
376
  help="Server name/IP for the Gradio server"
377
  )
378
  parser.add_argument(
 
592
  image_key = str(input_image) # Use string representation as key
593
 
594
  last_box = annotated_image_dict["boxes"][-1]
595
+ width, height = pil_image.shape[:2]
596
 
597
  xmin = last_box["xmin"] / width
598
  ymin = last_box["ymin"] / height
 
601
 
602
  local_dict[image_key] = [xmin, ymin, xmax, ymax]
603
  # Format the coordinates into a string
604
+ print( str([xmin, ymin, xmax, ymax]))
605
  return str([xmin, ymin, xmax, ymax]), f"In the region <box>{str([xmin, ymin, xmax, ymax])}</box>, {prompt_original}"
606
  return "No box drawn", prompt_original
607
 
 
625
  image_key = str(input_image) # Use string representation as key
626
 
627
  last_box = annotated_image_dict["boxes"][-1]
628
+ height, width = pil_image.shape[:2]
629
 
630
  xmin = last_box["xmin"] / width
631
  ymin = last_box["ymin"] / height
 
634
 
635
  local_dict[image_key] = [xmin, ymin, xmax, ymax]
636
  # Format the coordinates into a string
637
+ print(str([xmin, ymin, xmax, ymax]))
638
  return str([xmin, ymin, xmax, ymax])
639
  return "No bounding box drawn yet."
640
 
 
641
  def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
642
+ print("🔍 Processing analysis pipeline with streaming output...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
 
644
  # Create Gradio interface
645
  def create_interface():
 
822
  # Event binding - simplified to match test1.py working pattern
823
 
824
  input_image.change(
825
+ fn=get_box_coordinates,
826
+ inputs=[input_image, user_prompt],
827
+ outputs=[coordinates_output, user_prompt]
828
  )
829
  # Main processing button - streaming output, pass all parameters
830
  process_btn.click(