Spaces:

LYL1015
/

JarvisArt-Preview

Running on Zero

App Files Files Community

LYL1015 commited on Jul 7

Commit

9d03193

verified ·

1 Parent(s): cf37e59

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -525

app.py CHANGED Viewed

@@ -2,33 +2,6 @@ import os
 import subprocess
 import os
 from pathlib import Path
-BASE_DIR = Path("/home/user/app")
-commands = [
-    ("python -V", BASE_DIR),
-    ("pip install -r my_requirements.txt", BASE_DIR)
-]
-def run_command(cmd, cwd=None):
-    try:
-        result = subprocess.run(
-            cmd,  # 注意：这里不再使用 shlex.split()
-            cwd=str(cwd) if cwd else None,
-            shell=True,  # 需要 shell=True 来支持 && 等操作符
-            check=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
-        print(f"[SUCCESS] {cmd}")
-        if result.stdout: print(result.stdout)
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"[FAILED] {cmd}")
-        print(f"Error: {e.stderr}")
-        return False
-for cmd, cwd in commands:
-    run_command(cmd, cwd)
 import re
 import gradio as gr
@@ -45,12 +18,7 @@ from gradio_image_annotation import image_annotator
 from werkzeug.utils import secure_filename  # Add this import
 from utils.system_prompt import SHORT_SYSTEM_PROMPT_WITH_THINKING
 from utils.lua_converter import LuaConverter
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
-from qwen_vl_utils import process_vision_info
-import torch
 from utils.lua2lrt import lua_to_lrtemplate
-from huggingface_hub import snapshot_download
-import spaces
 def extract_json_from_answer(answer):
@@ -178,40 +146,6 @@ def json_to_lua(json_data, save_folder, filename="config.lua"):
     except Exception as e:
         return None, f"Error in json_to_lua: {str(e)}"
-# Model downloader
-def download_tools_ckpts(target_dir, url):
-    from huggingface_hub import snapshot_download
-    import os
-    import shutil
-    tmp_dir = "hf_temp_download"
-    os.makedirs(tmp_dir, exist_ok=True)
-    snapshot_download(
-        repo_id="JarvisArt/JarvisArt-Preview",
-        repo_type="model",
-        local_dir=tmp_dir,
-        allow_patterns=os.path.join(url, "**"),
-        local_dir_use_symlinks=False,
-    )
-    src_dir = os.path.join(tmp_dir, url)
-    shutil.copytree(src_dir, target_dir)
-    shutil.rmtree(tmp_dir)
-def download_model(model_path):
-    """
-    Download model from HuggingFace if not exists locally
-    Args:
-        model_path (str): Path to save the model
-    """
-    if not os.path.exists(model_path):
-        download_tools_ckpts(model_path, "pretrained/preview")
-    else:
-        print(f"✅ Model already exists at {model_path}")
 # Local model client class
 class LocalModelClient:
@@ -226,42 +160,13 @@ class LocalModelClient:
         self.model = None
         self.processor = None
         self.model_loaded = False
-        # Download model if needed
-        download_model(model_path)
         # Load model
         self._load_model()
     def _load_model(self):
-        """
-        Load the model and processor
-        """
-        try:
-            print(f"🔄 Loading model from {self.model_path}...")
-            # Model configuration
-            min_pixels = 256 * 28 * 28
-            max_pixels = 1280 * 28 * 28
-            # Load model
-            self.model = Qwen2VLForConditionalGeneration.from_pretrained(
-                self.model_path,
-                torch_dtype="auto",
-                device_map="auto",
-                min_pixels=min_pixels,
-                max_pixels=max_pixels
-            )
-            # Load processor
-            self.processor = AutoProcessor.from_pretrained(self.model_path)
-            print(f"✅ Model loaded successfully from {self.model_path}")
-            self.model_loaded = True
-        except Exception as e:
-            print(f"❌ Model loading failed: {str(e)}")
-            self.model_loaded = False
     def chat(self, messages, system=None, images=None, **kwargs):
         """
@@ -328,31 +233,8 @@ class LocalModelClient:
                 formatted_messages, tokenize=False, add_generation_prompt=True
             )
-            # Process vision info
-            image_inputs, video_inputs = process_vision_info(formatted_messages)
-            # Prepare inputs
-            inputs = self.processor(
-                text=[text],
-                images=image_inputs,
-                videos=video_inputs,
-                padding=True,
-                return_tensors="pt",
-            )
-            # Move inputs to device
-            device = next(self.model.parameters()).device
-            inputs = inputs.to(device)
-            # Generate response
-            generated_ids = self.model.generate(**inputs, max_new_tokens=10240)
-            generated_ids_trimmed = [
-                out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-            ]
-            output_text = self.processor.batch_decode(
-                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-            )
             # Create Response object for compatibility
             class Response:
                 def __init__(self, text):
@@ -484,13 +366,13 @@ def parse_args():
     parser.add_argument(
         "--server_port",
         type=int,
-        default=7860,  # Change to standard Gradio port
         help="Port for the Gradio server"
     )
     parser.add_argument(
         "--server_name",
         type=str,
-        default="0.0.0.0",
         help="Server name/IP for the Gradio server"
     )
     parser.add_argument(
@@ -710,7 +592,7 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
             image_key = str(input_image)  # Use string representation as key
         last_box = annotated_image_dict["boxes"][-1]
-        width, height = pil_image.width, pil_image.height
         xmin = last_box["xmin"] / width
         ymin = last_box["ymin"] / height
@@ -719,7 +601,7 @@ def get_box_coordinates(annotated_image_dict, prompt_original):
         local_dict[image_key] = [xmin, ymin, xmax, ymax]
         # Format the coordinates into a string
         return str([xmin, ymin, xmax, ymax]), f"In the region <box>{str([xmin, ymin, xmax, ymax])}</box>, {prompt_original}"
     return "No box drawn", prompt_original
@@ -743,7 +625,7 @@ def get_box_coordinates_simple(annotated_image_dict):
             image_key = str(input_image)  # Use string representation as key
         last_box = annotated_image_dict["boxes"][-1]
-        width, height = pil_image.width, pil_image.height
         xmin = last_box["xmin"] / width
         ymin = last_box["ymin"] / height
@@ -752,406 +634,12 @@ def get_box_coordinates_simple(annotated_image_dict):
         local_dict[image_key] = [xmin, ymin, xmax, ymax]
         # Format the coordinates into a string
         return str([xmin, ymin, xmax, ymax])
     return "No bounding box drawn yet."
-@spaces.GPU
 def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
-    """
-    Main analysis pipeline with streaming output, modern chat interface style, and image display support
-    Args:
-        image (str): Path to the input image
-        user_prompt (str): User-defined prompt for analysis
-        max_new_tokens (int): Maximum number of new tokens to generate
-        top_k (int): Top-k sampling parameter
-        top_p (float): Top-p (nucleus) sampling parameter
-        temperature (float): Temperature for sampling
-    Yields:
-        list: Updated chat_history for Gradio UI updates (messages format)
-    """
-    if image_dict is None or image_dict.get('image') is None:
-        yield [
-            {"role": "user", "content": "Please upload an image first! 📸"},
-            {"role": "assistant", "content": "I need an image to analyze before I can provide editing recommendations."}
-        ], None
-        return
-    # Extract image from the image_dict
-    image = image_dict['image']
-    # Handle the case where image is a PIL Image object - need to save it temporarily
-    if not isinstance(image, str):
-        import tempfile
-        import os
-        # Save PIL image to temporary file
-        temp_dir = tempfile.gettempdir()
-        temp_path = os.path.join(temp_dir, f"temp_image_{hash(str(image))}.png")
-        image.save(temp_path)
-        image = temp_path
-    if not user_prompt.strip():
-        user_prompt = default_user_prompt
-    elif len(local_dict) > 0 and image in local_dict and local_dict[image][0] != local_dict[image][2]:
-        user_prompt = user_prompt.replace('<box></box>', f'<box>{str(local_dict[image])}</box>')
-    try:
-        # Initialize chat history with user message including image
-        chat_history = []
-        # Create user message with image and instructions - using messages format
-        user_message_text = f"**Instructions:** {user_prompt}".replace('<box>', f'(').replace('</box>', f')')
-        # Add user message with image
-        if image_dict:
-            # For messages format, we need to handle images differently
-            # First add the image
-            chat_history.append({
-                "role": "user",
-                "content": {
-                    "path": image,
-                    "mime_type": "image/jpeg"
-                }
-            })
-            # Then add text message
-            chat_history.append({
-                "role": "user",
-                "content": user_message_text
-            })
-        else:
-            chat_history.append({
-                "role": "user",
-                "content": user_message_text
-            })
-        yield chat_history, None
-        # JarvisArt starts responding
-        chat_history.append({
-            "role": "assistant",
-            "content": "<div style='margin:0;padding:0'>🎨 <strong style='margin:0;padding:0'>JarvisArt is analyzing your image...</strong><br/><em>Please wait while I examine the details and understand your vision.</em></div>"
-        })
-        ai_message_index = len(chat_history) - 1  # Record AI message index position
-        recommendations_index = None  # Initialize recommendations message index
-        yield chat_history, None
-        # Get streaming response
-        full_response = ""
-        token_count = 0
-        update_frequency = 8  # Reduce update frequency for smoother experience
-        # Stage marker
-        stage = "starting"  # starting, thinking, answer, completed
-        answer_completed = False  # Flag to track if answer is completed
-        for new_token in get_llm_response_with_custom_prompt_stream(
-            image, user_prompt, max_new_tokens, top_k, top_p, temperature
-        ):
-            full_response += new_token
-            token_count += 1
-            # Detect thinking stage
-            if "<think>" in full_response and stage == "starting":
-                stage = "thinking"
-                chat_history[ai_message_index] = {
-                    "role": "assistant",
-                    "content": "💭 **Thinking Process**\n*Analyzing image characteristics and understanding your creative vision...*"
-                }
-                yield chat_history, None
-                continue
-            # Thinking completed
-            if "</think>" in full_response and stage == "thinking":
-                stage = "between"
-                think_match = re.search(r'<think>(.*?)</think>', full_response, re.DOTALL)
-                if think_match:
-                    thinking_content = think_match.group(1).strip()
-                    # Use the compact_text function to process text
-                    thinking_content = compact_text(thinking_content).replace('<box>', f'(').replace('</box>', f')')
-                    # Use special formatting to force eliminate spacing
-                    formatted_thinking = f"<div style='margin:0;padding:0'>💭 <strong style='margin:0;padding:0'>Thinking</strong><div style='margin:0;padding:0'>{thinking_content}</div></div>"
-                    chat_history[ai_message_index] = {
-                        "role": "assistant",
-                        "content": formatted_thinking
-                    }
-                    yield chat_history, None
-                continue
-            # Detect answer stage
-            if "<answer>" in full_response and stage in ["between", "thinking"]:
-                stage = "answer"
-                # Use special formatting to force eliminate spacing
-                initial_recommendations = "<div style='margin:0;padding:0;margin-top:-30px'>✨ <strong style='margin:0;padding:0'>Professional Editing Recommendations</strong><div style='margin:0;padding:0'>*Generating personalized editing suggestions...*</div></div>"
-                chat_history.append({
-                    "role": "assistant",
-                    "content": initial_recommendations
-                })
-                recommendations_index = len(chat_history) - 1  # Record recommendations message index
-                yield chat_history, None
-                continue
-            # Answer completed
-            if "</answer>" in full_response and stage == "answer" and not answer_completed:
-                stage = "completed"
-                answer_completed = True
-                answer_match = re.search(r'<answer>(.*?)</answer>', full_response, re.DOTALL)
-                if answer_match:
-                    answer_content = answer_match.group(1).strip()
-                    # Use the compact_text function to process text
-                    answer_content = compact_text(answer_content)
-                    # Use special formatting to force eliminate spacing
-                    formatted_answer = f"<div style='margin:0;padding:0;margin-top:-30px'>✨ <strong style='margin:0;padding:0'>Professional Editing Recommendations</strong><div style='margin:0;padding:0'>{answer_content}</div></div>"
-                    chat_history[recommendations_index] = {
-                        "role": "assistant",
-                        "content": formatted_answer
-                    }
-                    yield chat_history
-                # Don't break here - continue to Final completion for JSON extraction
-            # Real-time content updates (reduced frequency) - only if answer not completed
-            if token_count % update_frequency == 0 and not answer_completed:
-                if stage == "thinking":
-                    current_thinking = full_response.split("<think>")[-1].replace("</think>", "").strip()
-                    if current_thinking and len(current_thinking) > 20:  # Avoid displaying too short content
-                        # Use the compact_text function to process text
-                        current_thinking = compact_text(current_thinking)
-                        # Use special formatting to force eliminate spacing
-                        formatted_thinking = f"<div style='margin:0;padding:0'>💭 <strong style='margin:0;padding:0'>Thinking</strong><div style='margin:0;padding:0'>{current_thinking}...<br/><em>Still analyzing...</em></div></div>"
-                        chat_history[ai_message_index] = {
-                            "role": "assistant",
-                            "content": formatted_thinking
-                        }
-                        yield chat_history
-                elif stage == "answer":
-                    current_answer = full_response.split("<answer>")[-1].replace("</answer>", "").strip()
-                    if current_answer and len(current_answer) > 30:  # Avoid displaying too short content
-                        # Use the compact_text function to process text
-                        current_answer = compact_text(current_answer)
-                        # Use special formatting to force eliminate spacing
-                        formatted_answer = f"<div style='margin:0;padding:0;margin-top:-30px'>✨ <strong style='margin:0;padding:0'>JarvisArt Recommendations</strong><div style='margin:0;padding:0'>{current_answer}...<br/><em>Generating more suggestions...</em></div></div>"
-                        if recommendations_index is not None:
-                            chat_history[recommendations_index] = {
-                                "role": "assistant",
-                                "content": formatted_answer
-                            }
-                        else:
-                            chat_history.append({
-                                "role": "assistant",
-                                "content": formatted_answer
-                            })
-                            recommendations_index = len(chat_history) - 1
-                        yield chat_history, None
-        # Final completion
-        if stage == "completed":
-            # Analysis is complete, now process and save lua files
-            print(f"🔍 Debug: Final completion stage reached")
-            answer_match = re.search(r'<answer>(.*?)</answer>', full_response, re.DOTALL)
-            if answer_match:
-                answer_content = answer_match.group(1).strip()
-                print(f"🔍 Debug: Extracted answer content (first 200 chars): {answer_content[:200]}...")
-                # Extract JSON objects from the answer
-                json_objects = extract_json_from_answer(answer_content)
-                print(f"🔍 Debug: Found {len(json_objects)} JSON objects")
-                # Save JSON objects as lua files
-                if json_objects:
-                    print(f"🔍 Debug: Processing {len(json_objects)} JSON objects for conversion")
-                    conversion_index = None
-                    chat_history.append({
-                        "role": "assistant",
-                        "content": "<div style='margin:0;padding:0;margin-top:-20px'>⚙️ <strong style='margin:0;padding:0'>Lightroom Configuration Converting...</strong><br/><em>Converting editing parameters to Lightroom-compatible format...</em></div>"
-                    })
-                    conversion_index = len(chat_history) - 1
-                    yield chat_history
-                    # Create lua_results folder in the same directory as this script
-                    script_dir = os.path.dirname(os.path.abspath(__file__))
-                    results_dir = os.path.join(script_dir, "results")
-                    os.makedirs(results_dir, exist_ok=True)
-                    # Generate timestamp for unique session folder name
-                    timestamp = int(time.time())
-                    session_folder_name = f"example_{timestamp}"
-                    session_dir = os.path.join(results_dir, session_folder_name)
-                    os.makedirs(session_dir, exist_ok=True)
-                    # Copy the uploaded image to the session folder
-                    import shutil
-                    # Use secure_filename and hash to generate unique original image filename, avoiding conflicts with processed images
-                    original_filename = secure_filename(os.path.basename(image))
-                    file_hash = hashlib.md5(f"{original_filename}_{time.time()}".encode()).hexdigest()
-                    # Keep original extension
-                    file_ext = os.path.splitext(original_filename)[1] or '.jpg'
-                    unique_original_filename = f"original_{file_hash}{file_ext}"
-                    image_dest_path = os.path.join(session_dir, unique_original_filename)
-                    shutil.copy2(image, image_dest_path)
-                    # Save the full model response to a text file
-                    response_file_path = os.path.join(session_dir, "full_response.txt")
-                    with open(response_file_path, "w", encoding="utf-8") as f:
-                        f.write(full_response)
-                    # Save user prompt to a text file
-                    prompt_file_path = os.path.join(session_dir, "user_prompt.txt")
-                    with open(prompt_file_path, "w", encoding="utf-8") as f:
-                        f.write(user_prompt)
-                    saved_files = []
-                    lrtemplate_files = []
-                    for i, json_obj in enumerate(json_objects):
-                        filename = f"config_{i+1}.lua"
-                        lua_path, error = json_to_lua(json_obj, session_dir, filename)
-                        if lua_path:
-                            saved_files.append(lua_path)
-                            print(f"✅ Saved Lua config: {lua_path}")
-                            # Convert lua to lrtemplate
-                            try:
-                                lrtemplate_path = lua_to_lrtemplate(lua_path)
-                                lrtemplate_files.append(lrtemplate_path)
-                                print(f"✅ Converted to Lightroom preset: {lrtemplate_path}")
-                            except Exception as e:
-                                print(f"⚠️ Failed to convert {lua_path} to lrtemplate: {str(e)}")
-                        else:
-                            print(f"❌ Failed to save Lua config {i+1}: {error}")
-                    # Update file save notification
-                    if saved_files:
-                        save_notification = "<div style='margin:0;padding:0;margin-top:-20px'>"
-                        save_notification += "✅ <strong style='margin:0;padding:0'>Files saved successfully!</strong><br/>"
-                        save_notification += "📁 <strong>Save location:</strong> <code>results/" + session_folder_name + "/</code><br/>"
-                        save_notification += "📄 <strong>Generated files:</strong><br/>"
-                        save_notification += "   • Original image: <code>" + unique_original_filename + "</code><br/>"
-                        save_notification += "   • Full response: <code>full_response.txt</code><br/>"
-                        save_notification += "   • User prompt: <code>user_prompt.txt</code><br/>"
-                        save_notification += "   • Config files: " + str(len(saved_files)) + " files"
-                        save_notification += "<br/>   • Lightroom presets: " + str(len(lrtemplate_files)) + " files"
-                        save_notification += "<br/><strong>Config files:</strong>"
-                        for i, file_path in enumerate(saved_files):
-                            filename = os.path.basename(file_path)
-                            save_notification += "<br/>   • <code>" + filename + "</code>"
-                        save_notification += "<br/><strong>Lightroom Presets:</strong>"
-                        for i, file_path in enumerate(lrtemplate_files):
-                            filename = os.path.basename(file_path)
-                            save_notification += "<br/>   • <code>" + filename + "</code>"
-                        save_notification += "<br/><br/>🎨 <strong>How to use Lightroom Presets:</strong>"
-                        save_notification += "<br/>1. Open Adobe Lightroom"
-                        save_notification += "<br/>2. Go to the <strong>Presets</strong> panel"
-                        save_notification += "<br/>3. Click on the <strong>+</strong> icon"
-                        save_notification += "<br/>4. Select <strong>Import Presets</strong>"
-                        save_notification += "<br/>5. Choose the <code>.lrtemplate</code> file(s) and click <strong>Import</strong>"
-                        save_notification += "<br/><br/>The imported presets will now be available in your Presets panel for use on your photos."
-                        save_notification += "</div>"
-                        # Use the compact_text function to process text
-                        save_notification = compact_text(save_notification)
-                        # Update conversion message
-                        if conversion_index is not None:
-                            chat_history[conversion_index] = {
-                                "role": "assistant",
-                                "content": save_notification
-                            }
-                    else:
-                        # Show conversion failed message
-                        if conversion_index is not None:
-                            chat_history[conversion_index] = {
-                                "role": "assistant",
-                                "content": "<div style='margin:0;padding:0;margin-top:-20px'>❌ <strong style='margin:0;padding:0'>Lightroom config conversion failed</strong><br/><em>No valid configuration data found in recommendations.</em></div>"
-                            }
-                else:
-                    print(f"🔍 Debug: No JSON objects found, adding debug message to chat")
-                    # Add debug message to show what was found
-                    debug_msg = "<div style='margin:0;padding:0;margin-top:-20px'>"
-                    debug_msg += "🔍 <strong style='margin:0;padding:0'>Debug Information</strong><br/>"
-                    debug_msg += "<strong>Answer Content Preview:</strong><br/><pre style='margin:0;padding:4px'>" + answer_content[:500] + "...</pre><br/>"
-                    debug_msg += "<strong>Extraction Attempted:</strong> No valid JSON objects found in the recommendations."
-                    debug_msg += "</div>"
-                    # Use the compact_text function to process text
-                    debug_msg = compact_text(debug_msg)
-                    chat_history.append({
-                        "role": "assistant",
-                        "content": debug_msg
-                    })
-            else:
-                print(f"🔍 Debug: No answer match found in full_response")
-        else:
-            # If not ended normally, try to parse and format final response
-            print(f"🔍 Debug: Non-normal completion, stage: {stage}")
-            think_match = re.search(r'<think>(.*?)</think>', full_response, re.DOTALL)
-            answer_match = re.search(r'<answer>(.*?)</answer>', full_response, re.DOTALL)
-            if think_match:
-                thinking_content = think_match.group(1).strip()
-                formatted_thinking = f"💭 **Thinking**\n{thinking_content}"
-                chat_history[ai_message_index] = {
-                    "role": "assistant",
-                    "content": formatted_thinking
-                }
-            if answer_match:
-                answer_content = answer_match.group(1).strip()
-                formatted_answer = f"✨ **Professional Editing Recommendations**\n{answer_content}"
-                if recommendations_index is not None:
-                    chat_history[recommendations_index] = {
-                        "role": "assistant",
-                        "content": formatted_answer
-                    }
-                else:
-                    chat_history.append({
-                        "role": "assistant",
-                        "content": formatted_answer
-                    })
-                # Extract and save JSON objects from answer even if not completed normally
-                json_objects = extract_json_from_answer(answer_content)
-                print(f"🔍 Debug: Non-normal completion found {len(json_objects)} JSON objects")
-                if json_objects:
-                    # Show Lightroom configuration conversion in progress
-                    conversion_index = None
-                    chat_history.append({
-                        "role": "assistant",
-                        "content": "<div style='margin:0;padding:0;margin-top:-20px'>⚙️ <strong style='margin:0;padding:0'>Lightroom Configuration Converting...</strong><br/><em>Converting editing parameters to Lightroom-compatible format...</em></div>"
-                    })
-                    conversion_index = len(chat_history) - 1
-                    yield chat_history, None
-                    # Same processing logic... (omitting repetitive code here for brevity)
-                    # [Continue processing logic, format as above]
-        # Prepare download files if available
-        download_file_list = []
-        if latest_session_dir and os.path.exists(latest_session_dir):
-            for file in os.listdir(latest_session_dir):
-                if file.endswith('.lrtemplate'):
-                    download_file_list.append(os.path.join(latest_session_dir, file))
-        yield chat_history, download_file_list if download_file_list else None
-    except Exception as e:
-        error_msg = f"❌ **Oops! Something went wrong**\n\n```\nError: {str(e)}\n```\n\n💡 **Try again with:**\n- A different image format\n- A simpler description\n- Refreshing the page"
-        chat_history = [
-            {"role": "user", "content": "Image analysis request"},
-            {"role": "assistant", "content": error_msg}
-        ]
-        yield chat_history, None
 # Create Gradio interface
 def create_interface():
@@ -1334,9 +822,9 @@ def create_interface():
         # Event binding - simplified to match test1.py working pattern
         input_image.change(
-            fn=get_box_coordinates_simple,
-            inputs=input_image,
-            outputs=coordinates_output
         )
         # Main processing button - streaming output, pass all parameters
         process_btn.click(

 import subprocess
 import os
 from pathlib import Path
 import re
 import gradio as gr
 from werkzeug.utils import secure_filename  # Add this import
 from utils.system_prompt import SHORT_SYSTEM_PROMPT_WITH_THINKING
 from utils.lua_converter import LuaConverter
 from utils.lua2lrt import lua_to_lrtemplate
 def extract_json_from_answer(answer):
     except Exception as e:
         return None, f"Error in json_to_lua: {str(e)}"
 # Local model client class
 class LocalModelClient:
         self.model = None
         self.processor = None
         self.model_loaded = False
         # Load model
         self._load_model()
     def _load_model(self):
+        print(f"🔍 Loading model from {self.model_path}...")
     def chat(self, messages, system=None, images=None, **kwargs):
         """
                 formatted_messages, tokenize=False, add_generation_prompt=True
             )
+            output_text = "hello world"  # Placeholder for actual model output
             # Create Response object for compatibility
             class Response:
                 def __init__(self, text):
     parser.add_argument(
         "--server_port",
         type=int,
+        default=7861,  # Change to standard Gradio port
         help="Port for the Gradio server"
     )
     parser.add_argument(
         "--server_name",
         type=str,
+        default="127.0.0.1",
         help="Server name/IP for the Gradio server"
     )
     parser.add_argument(
             image_key = str(input_image)  # Use string representation as key
         last_box = annotated_image_dict["boxes"][-1]
+        width, height = pil_image.shape[:2]
         xmin = last_box["xmin"] / width
         ymin = last_box["ymin"] / height
         local_dict[image_key] = [xmin, ymin, xmax, ymax]
         # Format the coordinates into a string
+        print( str([xmin, ymin, xmax, ymax]))
         return str([xmin, ymin, xmax, ymax]), f"In the region <box>{str([xmin, ymin, xmax, ymax])}</box>, {prompt_original}"
     return "No box drawn", prompt_original
             image_key = str(input_image)  # Use string representation as key
         last_box = annotated_image_dict["boxes"][-1]
+        height, width = pil_image.shape[:2]
         xmin = last_box["xmin"] / width
         ymin = last_box["ymin"] / height
         local_dict[image_key] = [xmin, ymin, xmax, ymax]
         # Format the coordinates into a string
+        print(str([xmin, ymin, xmax, ymax]))
         return str([xmin, ymin, xmax, ymax])
     return "No bounding box drawn yet."
 def process_analysis_pipeline_stream(image_dict, user_prompt, max_new_tokens, top_k, top_p, temperature):
+    print("🔍 Processing analysis pipeline with streaming output...")
 # Create Gradio interface
 def create_interface():
         # Event binding - simplified to match test1.py working pattern
         input_image.change(
+            fn=get_box_coordinates,
+            inputs=[input_image, user_prompt],
+            outputs=[coordinates_output, user_prompt]
         )
         # Main processing button - streaming output, pass all parameters
         process_btn.click(