Spaces:

witcher23
/

nanoVLM-inference

Running

vidhanm

increased timeout

08a37b7 1 day ago

7.72 kB

	import sys
	import os
	import subprocess # For calling generate.py
	import tempfile # For handling temporary image files
	from typing import Optional
	from PIL import Image as PILImage
	import gradio as gr

	# Add the cloned nanoVLM directory to Python's system path (generate.py might need this too if it imports from 'models')
	NANOVLM_REPO_PATH = "/app/nanoVLM"
	if NANOVLM_REPO_PATH not in sys.path:
	print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path")
	sys.path.insert(0, NANOVLM_REPO_PATH)

	print(f"DEBUG: Python sys.path: {sys.path}")

	# Path to the generate.py script within our Docker container
	GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py"
	MODEL_REPO_ID = "lusxvr/nanoVLM-222M" # Model ID for generate.py

	print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}")
	print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}")


	def call_generate_script(image_path: str, prompt_text: str) -> str:
	"""
	Calls the generate.py script as a subprocess and returns its output.
	"""
	print(f"DEBUG (call_generate_script): Calling with image_path='{image_path}', prompt='{prompt_text}'")

	# Arguments for generate.py (ensure they match its expected format)
	# From previous success: --hf_model, --image, --prompt, --generations, --max_new_tokens
	cmd_args = [
	"python", "-u", GENERATE_SCRIPT_PATH,
	"--hf_model", MODEL_REPO_ID,
	"--image", image_path,
	"--prompt", prompt_text,
	"--generations", "1", # Get one generation for the UI
	"--max_new_tokens", "70" # Adjust as needed
	# --device is handled by generate.py internally
	]

	print(f"DEBUG (call_generate_script): Executing command: {' '.join(cmd_args)}")

	try:
	# Execute the command
	# capture_output=True, text=True are for Python 3.7+
	# For Python 3.9 (as in your Dockerfile base), this is fine.
	process = subprocess.run(
	cmd_args,
	capture_output=True,
	text=True,
	check=True, # Raise an exception for non-zero exit codes
	timeout=2400 # Add a timeout (e.g., 2 minutes)
	)

	stdout = process.stdout
	stderr = process.stderr

	print(f"DEBUG (call_generate_script): generate.py STDOUT:\n{stdout}")
	if stderr:
	print(f"DEBUG (call_generate_script): generate.py STDERR:\n{stderr}")

	# --- Parse the output from generate.py ---
	# The generate.py script prints:
	# Outputs:
	# >> Generation 1: Actual generated text here.
	# We need to extract "Actual generated text here."

	output_lines = stdout.splitlines()
	generated_text = "Error: Could not parse output from generate.py script." # Default

	parsing_output = False
	for line in output_lines:
	if "Outputs:" in line:
	parsing_output = True
	continue
	if parsing_output and line.strip().startswith(">> Generation 1:"):
	# Extract text after ">> Generation 1: " (note the two spaces)
	generated_text = line.split(">> Generation 1: ", 1)[-1].strip()
	break # Found the first generation

	print(f"DEBUG (call_generate_script): Parsed generated text: '{generated_text}'")
	return generated_text

	except subprocess.CalledProcessError as e:
	print(f"ERROR (call_generate_script): generate.py exited with error code {e.returncode}")
	print(f"ERROR (call_generate_script): STDOUT: {e.stdout}")
	print(f"ERROR (call_generate_script): STDERR: {e.stderr}")
	return f"Error executing generation script (Code {e.returncode}). Check logs."
	except subprocess.TimeoutExpired:
	print("ERROR (call_generate_script): generate.py timed out.")
	return "Error: Generation script timed out."
	except Exception as e:
	print(f"ERROR (call_generate_script): An unexpected error occurred: {e}")
	import traceback
	traceback.print_exc()
	return f"An unexpected error occurred while calling generation script: {str(e)}"


	def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str:
	print(f"DEBUG (gradio_interface_fn): Received prompt: '{prompt_input_str}'")
	if image_input_pil is None:
	return "Please upload an image."
	if not prompt_input_str:
	return "Please provide a prompt."

	# Save the uploaded PIL image to a temporary file
	# tempfile.NamedTemporaryFile creates a file that is deleted when closed.
	# We need to ensure it has a .jpg extension for some image libraries if they are picky.
	# The 'delete=False' allows us to close it, pass its name, and then delete it manually.
	try:
	with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file:
	image_input_pil.save(tmp_image_file, format="JPEG")
	tmp_image_path = tmp_image_file.name

	print(f"DEBUG (gradio_interface_fn): Temporary image saved to: {tmp_image_path}")

	# Call the generate.py script with the path to the temporary image
	result_text = call_generate_script(tmp_image_path, prompt_input_str)

	return result_text

	except Exception as e:
	print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {e}")
	import traceback; traceback.print_exc()
	return f"An error occurred: {str(e)}"
	finally:
	# Clean up the temporary image file
	if 'tmp_image_path' in locals() and os.path.exists(tmp_image_path):
	try:
	os.remove(tmp_image_path)
	print(f"DEBUG (gradio_interface_fn): Temporary image {tmp_image_path} removed.")
	except Exception as e_remove:
	print(f"WARN (gradio_interface_fn): Could not remove temporary image {tmp_image_path}: {e_remove}")


	# --- Gradio Interface Definition ---
	description_md = """
	## nanoVLM-222M Interactive Demo (via generate.py)
	Upload an image and type a prompt. This interface calls the `generate.py` script from
	`huggingface/nanoVLM` under the hood to perform inference.
	"""

	print("DEBUG: Defining Gradio interface...")
	iface = None
	try:
	iface = gr.Interface(
	fn=gradio_interface_fn,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'")
	],
	outputs=gr.Textbox(label="Generated Text", show_copy_button=True),
	title="nanoVLM-222M Demo (via Script)",
	description=description_md,
	allow_flagging="never"
	)
	print("DEBUG: Gradio interface defined successfully.")
	except Exception as e:
	print(f"CRITICAL ERROR defining Gradio interface: {e}")
	import traceback; traceback.print_exc()

	# --- Launch Gradio App ---
	if __name__ == "__main__":
	print("DEBUG: Entered __main__ block for Gradio launch.")
	if not os.path.exists(GENERATE_SCRIPT_PATH):
	print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.")
	iface = None # Prevent launch

	if iface is not None:
	print("DEBUG: Attempting to launch Gradio interface...")
	try:
	iface.launch(server_name="0.0.0.0", server_port=7860)
	print("DEBUG: Gradio launch command issued.")
	except Exception as e:
	print(f"CRITICAL ERROR launching Gradio interface: {e}")
	import traceback; traceback.print_exc()
	else:
	print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.")