Spaces:
Running
Running
File size: 7,722 Bytes
ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec 08a37b7 a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec ff119bb a4ebbec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import sys
import os
import subprocess # For calling generate.py
import tempfile # For handling temporary image files
from typing import Optional
from PIL import Image as PILImage
import gradio as gr
# Add the cloned nanoVLM directory to Python's system path (generate.py might need this too if it imports from 'models')
NANOVLM_REPO_PATH = "/app/nanoVLM"
if NANOVLM_REPO_PATH not in sys.path:
print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path")
sys.path.insert(0, NANOVLM_REPO_PATH)
print(f"DEBUG: Python sys.path: {sys.path}")
# Path to the generate.py script within our Docker container
GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py"
MODEL_REPO_ID = "lusxvr/nanoVLM-222M" # Model ID for generate.py
print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}")
print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}")
def call_generate_script(image_path: str, prompt_text: str) -> str:
"""
Calls the generate.py script as a subprocess and returns its output.
"""
print(f"DEBUG (call_generate_script): Calling with image_path='{image_path}', prompt='{prompt_text}'")
# Arguments for generate.py (ensure they match its expected format)
# From previous success: --hf_model, --image, --prompt, --generations, --max_new_tokens
cmd_args = [
"python", "-u", GENERATE_SCRIPT_PATH,
"--hf_model", MODEL_REPO_ID,
"--image", image_path,
"--prompt", prompt_text,
"--generations", "1", # Get one generation for the UI
"--max_new_tokens", "70" # Adjust as needed
# --device is handled by generate.py internally
]
print(f"DEBUG (call_generate_script): Executing command: {' '.join(cmd_args)}")
try:
# Execute the command
# capture_output=True, text=True are for Python 3.7+
# For Python 3.9 (as in your Dockerfile base), this is fine.
process = subprocess.run(
cmd_args,
capture_output=True,
text=True,
check=True, # Raise an exception for non-zero exit codes
timeout=2400 # Add a timeout (e.g., 2 minutes)
)
stdout = process.stdout
stderr = process.stderr
print(f"DEBUG (call_generate_script): generate.py STDOUT:\n{stdout}")
if stderr:
print(f"DEBUG (call_generate_script): generate.py STDERR:\n{stderr}")
# --- Parse the output from generate.py ---
# The generate.py script prints:
# Outputs:
# >> Generation 1: Actual generated text here.
# We need to extract "Actual generated text here."
output_lines = stdout.splitlines()
generated_text = "Error: Could not parse output from generate.py script." # Default
parsing_output = False
for line in output_lines:
if "Outputs:" in line:
parsing_output = True
continue
if parsing_output and line.strip().startswith(">> Generation 1:"):
# Extract text after ">> Generation 1: " (note the two spaces)
generated_text = line.split(">> Generation 1: ", 1)[-1].strip()
break # Found the first generation
print(f"DEBUG (call_generate_script): Parsed generated text: '{generated_text}'")
return generated_text
except subprocess.CalledProcessError as e:
print(f"ERROR (call_generate_script): generate.py exited with error code {e.returncode}")
print(f"ERROR (call_generate_script): STDOUT: {e.stdout}")
print(f"ERROR (call_generate_script): STDERR: {e.stderr}")
return f"Error executing generation script (Code {e.returncode}). Check logs."
except subprocess.TimeoutExpired:
print("ERROR (call_generate_script): generate.py timed out.")
return "Error: Generation script timed out."
except Exception as e:
print(f"ERROR (call_generate_script): An unexpected error occurred: {e}")
import traceback
traceback.print_exc()
return f"An unexpected error occurred while calling generation script: {str(e)}"
def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str:
print(f"DEBUG (gradio_interface_fn): Received prompt: '{prompt_input_str}'")
if image_input_pil is None:
return "Please upload an image."
if not prompt_input_str:
return "Please provide a prompt."
# Save the uploaded PIL image to a temporary file
# tempfile.NamedTemporaryFile creates a file that is deleted when closed.
# We need to ensure it has a .jpg extension for some image libraries if they are picky.
# The 'delete=False' allows us to close it, pass its name, and then delete it manually.
try:
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file:
image_input_pil.save(tmp_image_file, format="JPEG")
tmp_image_path = tmp_image_file.name
print(f"DEBUG (gradio_interface_fn): Temporary image saved to: {tmp_image_path}")
# Call the generate.py script with the path to the temporary image
result_text = call_generate_script(tmp_image_path, prompt_input_str)
return result_text
except Exception as e:
print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {e}")
import traceback; traceback.print_exc()
return f"An error occurred: {str(e)}"
finally:
# Clean up the temporary image file
if 'tmp_image_path' in locals() and os.path.exists(tmp_image_path):
try:
os.remove(tmp_image_path)
print(f"DEBUG (gradio_interface_fn): Temporary image {tmp_image_path} removed.")
except Exception as e_remove:
print(f"WARN (gradio_interface_fn): Could not remove temporary image {tmp_image_path}: {e_remove}")
# --- Gradio Interface Definition ---
description_md = """
## nanoVLM-222M Interactive Demo (via generate.py)
Upload an image and type a prompt. This interface calls the `generate.py` script from
`huggingface/nanoVLM` under the hood to perform inference.
"""
print("DEBUG: Defining Gradio interface...")
iface = None
try:
iface = gr.Interface(
fn=gradio_interface_fn,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'")
],
outputs=gr.Textbox(label="Generated Text", show_copy_button=True),
title="nanoVLM-222M Demo (via Script)",
description=description_md,
allow_flagging="never"
)
print("DEBUG: Gradio interface defined successfully.")
except Exception as e:
print(f"CRITICAL ERROR defining Gradio interface: {e}")
import traceback; traceback.print_exc()
# --- Launch Gradio App ---
if __name__ == "__main__":
print("DEBUG: Entered __main__ block for Gradio launch.")
if not os.path.exists(GENERATE_SCRIPT_PATH):
print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.")
iface = None # Prevent launch
if iface is not None:
print("DEBUG: Attempting to launch Gradio interface...")
try:
iface.launch(server_name="0.0.0.0", server_port=7860)
print("DEBUG: Gradio launch command issued.")
except Exception as e:
print(f"CRITICAL ERROR launching Gradio interface: {e}")
import traceback; traceback.print_exc()
else:
print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.") |