Spaces:
Running
Running
import sys | |
import os | |
import subprocess # For calling generate.py | |
import tempfile # For handling temporary image files | |
from typing import Optional | |
from PIL import Image as PILImage | |
import gradio as gr | |
# Add the cloned nanoVLM directory to Python's system path (generate.py might need this too if it imports from 'models') | |
NANOVLM_REPO_PATH = "/app/nanoVLM" | |
if NANOVLM_REPO_PATH not in sys.path: | |
print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path") | |
sys.path.insert(0, NANOVLM_REPO_PATH) | |
print(f"DEBUG: Python sys.path: {sys.path}") | |
# Path to the generate.py script within our Docker container | |
GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py" | |
MODEL_REPO_ID = "lusxvr/nanoVLM-222M" # Model ID for generate.py | |
print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}") | |
print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}") | |
def call_generate_script(image_path: str, prompt_text: str) -> str: | |
""" | |
Calls the generate.py script as a subprocess and returns its output. | |
""" | |
print(f"DEBUG (call_generate_script): Calling with image_path='{image_path}', prompt='{prompt_text}'") | |
# Arguments for generate.py (ensure they match its expected format) | |
# From previous success: --hf_model, --image, --prompt, --generations, --max_new_tokens | |
cmd_args = [ | |
"python", "-u", GENERATE_SCRIPT_PATH, | |
"--hf_model", MODEL_REPO_ID, | |
"--image", image_path, | |
"--prompt", prompt_text, | |
"--generations", "1", # Get one generation for the UI | |
"--max_new_tokens", "70" # Adjust as needed | |
# --device is handled by generate.py internally | |
] | |
print(f"DEBUG (call_generate_script): Executing command: {' '.join(cmd_args)}") | |
try: | |
# Execute the command | |
# capture_output=True, text=True are for Python 3.7+ | |
# For Python 3.9 (as in your Dockerfile base), this is fine. | |
process = subprocess.run( | |
cmd_args, | |
capture_output=True, | |
text=True, | |
check=True, # Raise an exception for non-zero exit codes | |
timeout=2400 # Add a timeout (e.g., 2 minutes) | |
) | |
stdout = process.stdout | |
stderr = process.stderr | |
print(f"DEBUG (call_generate_script): generate.py STDOUT:\n{stdout}") | |
if stderr: | |
print(f"DEBUG (call_generate_script): generate.py STDERR:\n{stderr}") | |
# --- Parse the output from generate.py --- | |
# The generate.py script prints: | |
# Outputs: | |
# >> Generation 1: Actual generated text here. | |
# We need to extract "Actual generated text here." | |
output_lines = stdout.splitlines() | |
generated_text = "Error: Could not parse output from generate.py script." # Default | |
parsing_output = False | |
for line in output_lines: | |
if "Outputs:" in line: | |
parsing_output = True | |
continue | |
if parsing_output and line.strip().startswith(">> Generation 1:"): | |
# Extract text after ">> Generation 1: " (note the two spaces) | |
generated_text = line.split(">> Generation 1: ", 1)[-1].strip() | |
break # Found the first generation | |
print(f"DEBUG (call_generate_script): Parsed generated text: '{generated_text}'") | |
return generated_text | |
except subprocess.CalledProcessError as e: | |
print(f"ERROR (call_generate_script): generate.py exited with error code {e.returncode}") | |
print(f"ERROR (call_generate_script): STDOUT: {e.stdout}") | |
print(f"ERROR (call_generate_script): STDERR: {e.stderr}") | |
return f"Error executing generation script (Code {e.returncode}). Check logs." | |
except subprocess.TimeoutExpired: | |
print("ERROR (call_generate_script): generate.py timed out.") | |
return "Error: Generation script timed out." | |
except Exception as e: | |
print(f"ERROR (call_generate_script): An unexpected error occurred: {e}") | |
import traceback | |
traceback.print_exc() | |
return f"An unexpected error occurred while calling generation script: {str(e)}" | |
def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str: | |
print(f"DEBUG (gradio_interface_fn): Received prompt: '{prompt_input_str}'") | |
if image_input_pil is None: | |
return "Please upload an image." | |
if not prompt_input_str: | |
return "Please provide a prompt." | |
# Save the uploaded PIL image to a temporary file | |
# tempfile.NamedTemporaryFile creates a file that is deleted when closed. | |
# We need to ensure it has a .jpg extension for some image libraries if they are picky. | |
# The 'delete=False' allows us to close it, pass its name, and then delete it manually. | |
try: | |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file: | |
image_input_pil.save(tmp_image_file, format="JPEG") | |
tmp_image_path = tmp_image_file.name | |
print(f"DEBUG (gradio_interface_fn): Temporary image saved to: {tmp_image_path}") | |
# Call the generate.py script with the path to the temporary image | |
result_text = call_generate_script(tmp_image_path, prompt_input_str) | |
return result_text | |
except Exception as e: | |
print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {e}") | |
import traceback; traceback.print_exc() | |
return f"An error occurred: {str(e)}" | |
finally: | |
# Clean up the temporary image file | |
if 'tmp_image_path' in locals() and os.path.exists(tmp_image_path): | |
try: | |
os.remove(tmp_image_path) | |
print(f"DEBUG (gradio_interface_fn): Temporary image {tmp_image_path} removed.") | |
except Exception as e_remove: | |
print(f"WARN (gradio_interface_fn): Could not remove temporary image {tmp_image_path}: {e_remove}") | |
# --- Gradio Interface Definition --- | |
description_md = """ | |
## nanoVLM-222M Interactive Demo (via generate.py) | |
Upload an image and type a prompt. This interface calls the `generate.py` script from | |
`huggingface/nanoVLM` under the hood to perform inference. | |
""" | |
print("DEBUG: Defining Gradio interface...") | |
iface = None | |
try: | |
iface = gr.Interface( | |
fn=gradio_interface_fn, | |
inputs=[ | |
gr.Image(type="pil", label="Upload Image"), | |
gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'") | |
], | |
outputs=gr.Textbox(label="Generated Text", show_copy_button=True), | |
title="nanoVLM-222M Demo (via Script)", | |
description=description_md, | |
allow_flagging="never" | |
) | |
print("DEBUG: Gradio interface defined successfully.") | |
except Exception as e: | |
print(f"CRITICAL ERROR defining Gradio interface: {e}") | |
import traceback; traceback.print_exc() | |
# --- Launch Gradio App --- | |
if __name__ == "__main__": | |
print("DEBUG: Entered __main__ block for Gradio launch.") | |
if not os.path.exists(GENERATE_SCRIPT_PATH): | |
print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.") | |
iface = None # Prevent launch | |
if iface is not None: | |
print("DEBUG: Attempting to launch Gradio interface...") | |
try: | |
iface.launch(server_name="0.0.0.0", server_port=7860) | |
print("DEBUG: Gradio launch command issued.") | |
except Exception as e: | |
print(f"CRITICAL ERROR launching Gradio interface: {e}") | |
import traceback; traceback.print_exc() | |
else: | |
print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.") |