import sys import os import subprocess # For calling generate.py import tempfile # For handling temporary image files from typing import Optional from PIL import Image as PILImage import gradio as gr # Add the cloned nanoVLM directory to Python's system path (generate.py might need this too if it imports from 'models') NANOVLM_REPO_PATH = "/app/nanoVLM" if NANOVLM_REPO_PATH not in sys.path: print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path") sys.path.insert(0, NANOVLM_REPO_PATH) print(f"DEBUG: Python sys.path: {sys.path}") # Path to the generate.py script within our Docker container GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py" MODEL_REPO_ID = "lusxvr/nanoVLM-222M" # Model ID for generate.py print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}") print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}") def call_generate_script(image_path: str, prompt_text: str) -> str: """ Calls the generate.py script as a subprocess and returns its output. """ print(f"DEBUG (call_generate_script): Calling with image_path='{image_path}', prompt='{prompt_text}'") # Arguments for generate.py (ensure they match its expected format) # From previous success: --hf_model, --image, --prompt, --generations, --max_new_tokens cmd_args = [ "python", "-u", GENERATE_SCRIPT_PATH, "--hf_model", MODEL_REPO_ID, "--image", image_path, "--prompt", prompt_text, "--generations", "1", # Get one generation for the UI "--max_new_tokens", "70" # Adjust as needed # --device is handled by generate.py internally ] print(f"DEBUG (call_generate_script): Executing command: {' '.join(cmd_args)}") try: # Execute the command # capture_output=True, text=True are for Python 3.7+ # For Python 3.9 (as in your Dockerfile base), this is fine. process = subprocess.run( cmd_args, capture_output=True, text=True, check=True, # Raise an exception for non-zero exit codes timeout=2400 # Add a timeout (e.g., 2 minutes) ) stdout = process.stdout stderr = process.stderr print(f"DEBUG (call_generate_script): generate.py STDOUT:\n{stdout}") if stderr: print(f"DEBUG (call_generate_script): generate.py STDERR:\n{stderr}") # --- Parse the output from generate.py --- # The generate.py script prints: # Outputs: # >> Generation 1: Actual generated text here. # We need to extract "Actual generated text here." output_lines = stdout.splitlines() generated_text = "Error: Could not parse output from generate.py script." # Default parsing_output = False for line in output_lines: if "Outputs:" in line: parsing_output = True continue if parsing_output and line.strip().startswith(">> Generation 1:"): # Extract text after ">> Generation 1: " (note the two spaces) generated_text = line.split(">> Generation 1: ", 1)[-1].strip() break # Found the first generation print(f"DEBUG (call_generate_script): Parsed generated text: '{generated_text}'") return generated_text except subprocess.CalledProcessError as e: print(f"ERROR (call_generate_script): generate.py exited with error code {e.returncode}") print(f"ERROR (call_generate_script): STDOUT: {e.stdout}") print(f"ERROR (call_generate_script): STDERR: {e.stderr}") return f"Error executing generation script (Code {e.returncode}). Check logs." except subprocess.TimeoutExpired: print("ERROR (call_generate_script): generate.py timed out.") return "Error: Generation script timed out." except Exception as e: print(f"ERROR (call_generate_script): An unexpected error occurred: {e}") import traceback traceback.print_exc() return f"An unexpected error occurred while calling generation script: {str(e)}" def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str: print(f"DEBUG (gradio_interface_fn): Received prompt: '{prompt_input_str}'") if image_input_pil is None: return "Please upload an image." if not prompt_input_str: return "Please provide a prompt." # Save the uploaded PIL image to a temporary file # tempfile.NamedTemporaryFile creates a file that is deleted when closed. # We need to ensure it has a .jpg extension for some image libraries if they are picky. # The 'delete=False' allows us to close it, pass its name, and then delete it manually. try: with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file: image_input_pil.save(tmp_image_file, format="JPEG") tmp_image_path = tmp_image_file.name print(f"DEBUG (gradio_interface_fn): Temporary image saved to: {tmp_image_path}") # Call the generate.py script with the path to the temporary image result_text = call_generate_script(tmp_image_path, prompt_input_str) return result_text except Exception as e: print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {e}") import traceback; traceback.print_exc() return f"An error occurred: {str(e)}" finally: # Clean up the temporary image file if 'tmp_image_path' in locals() and os.path.exists(tmp_image_path): try: os.remove(tmp_image_path) print(f"DEBUG (gradio_interface_fn): Temporary image {tmp_image_path} removed.") except Exception as e_remove: print(f"WARN (gradio_interface_fn): Could not remove temporary image {tmp_image_path}: {e_remove}") # --- Gradio Interface Definition --- description_md = """ ## nanoVLM-222M Interactive Demo (via generate.py) Upload an image and type a prompt. This interface calls the `generate.py` script from `huggingface/nanoVLM` under the hood to perform inference. """ print("DEBUG: Defining Gradio interface...") iface = None try: iface = gr.Interface( fn=gradio_interface_fn, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'") ], outputs=gr.Textbox(label="Generated Text", show_copy_button=True), title="nanoVLM-222M Demo (via Script)", description=description_md, allow_flagging="never" ) print("DEBUG: Gradio interface defined successfully.") except Exception as e: print(f"CRITICAL ERROR defining Gradio interface: {e}") import traceback; traceback.print_exc() # --- Launch Gradio App --- if __name__ == "__main__": print("DEBUG: Entered __main__ block for Gradio launch.") if not os.path.exists(GENERATE_SCRIPT_PATH): print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.") iface = None # Prevent launch if iface is not None: print("DEBUG: Attempting to launch Gradio interface...") try: iface.launch(server_name="0.0.0.0", server_port=7860) print("DEBUG: Gradio launch command issued.") except Exception as e: print(f"CRITICAL ERROR launching Gradio interface: {e}") import traceback; traceback.print_exc() else: print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.")