File size: 7,722 Bytes
ff119bb
 
a4ebbec
 
ff119bb
 
a4ebbec
ff119bb
a4ebbec
 
ff119bb
 
 
 
a4ebbec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff119bb
a4ebbec
 
 
 
 
 
 
 
08a37b7
a4ebbec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff119bb
a4ebbec
ff119bb
 
a4ebbec
ff119bb
 
a4ebbec
 
ff119bb
 
a4ebbec
ff119bb
 
a4ebbec
 
 
 
ff119bb
a4ebbec
 
 
ff119bb
a4ebbec
 
 
 
 
 
ff119bb
 
a4ebbec
 
 
 
 
 
 
 
 
 
 
 
 
 
ff119bb
a4ebbec
 
 
ff119bb
 
a4ebbec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff119bb
 
 
 
a4ebbec
 
 
 
ff119bb
 
 
 
 
 
 
 
 
a4ebbec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import sys
import os
import subprocess # For calling generate.py
import tempfile # For handling temporary image files
from typing import Optional
from PIL import Image as PILImage
import gradio as gr

# Add the cloned nanoVLM directory to Python's system path (generate.py might need this too if it imports from 'models')
NANOVLM_REPO_PATH = "/app/nanoVLM"
if NANOVLM_REPO_PATH not in sys.path:
    print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path")
    sys.path.insert(0, NANOVLM_REPO_PATH)

print(f"DEBUG: Python sys.path: {sys.path}")

# Path to the generate.py script within our Docker container
GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py"
MODEL_REPO_ID = "lusxvr/nanoVLM-222M" # Model ID for generate.py

print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}")
print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}")


def call_generate_script(image_path: str, prompt_text: str) -> str:
    """
    Calls the generate.py script as a subprocess and returns its output.
    """
    print(f"DEBUG (call_generate_script): Calling with image_path='{image_path}', prompt='{prompt_text}'")
    
    # Arguments for generate.py (ensure they match its expected format)
    # From previous success: --hf_model, --image, --prompt, --generations, --max_new_tokens
    cmd_args = [
        "python", "-u", GENERATE_SCRIPT_PATH,
        "--hf_model", MODEL_REPO_ID,
        "--image", image_path,
        "--prompt", prompt_text,
        "--generations", "1",      # Get one generation for the UI
        "--max_new_tokens", "70"   # Adjust as needed
        # --device is handled by generate.py internally
    ]
    
    print(f"DEBUG (call_generate_script): Executing command: {' '.join(cmd_args)}")
    
    try:
        # Execute the command
        # capture_output=True, text=True are for Python 3.7+
        # For Python 3.9 (as in your Dockerfile base), this is fine.
        process = subprocess.run(
            cmd_args,
            capture_output=True,
            text=True,
            check=True,  # Raise an exception for non-zero exit codes
            timeout=2400  # Add a timeout (e.g., 2 minutes)
        )
        
        stdout = process.stdout
        stderr = process.stderr
        
        print(f"DEBUG (call_generate_script): generate.py STDOUT:\n{stdout}")
        if stderr:
            print(f"DEBUG (call_generate_script): generate.py STDERR:\n{stderr}")

        # --- Parse the output from generate.py ---
        # The generate.py script prints:
        # Outputs:
        #   >> Generation 1:  Actual generated text here.
        # We need to extract "Actual generated text here."
        
        output_lines = stdout.splitlines()
        generated_text = "Error: Could not parse output from generate.py script." # Default
        
        parsing_output = False
        for line in output_lines:
            if "Outputs:" in line:
                parsing_output = True
                continue
            if parsing_output and line.strip().startswith(">> Generation 1:"):
                # Extract text after ">> Generation 1:  " (note the two spaces)
                generated_text = line.split(">> Generation 1:  ", 1)[-1].strip()
                break # Found the first generation
        
        print(f"DEBUG (call_generate_script): Parsed generated text: '{generated_text}'")
        return generated_text

    except subprocess.CalledProcessError as e:
        print(f"ERROR (call_generate_script): generate.py exited with error code {e.returncode}")
        print(f"ERROR (call_generate_script): STDOUT: {e.stdout}")
        print(f"ERROR (call_generate_script): STDERR: {e.stderr}")
        return f"Error executing generation script (Code {e.returncode}). Check logs."
    except subprocess.TimeoutExpired:
        print("ERROR (call_generate_script): generate.py timed out.")
        return "Error: Generation script timed out."
    except Exception as e:
        print(f"ERROR (call_generate_script): An unexpected error occurred: {e}")
        import traceback
        traceback.print_exc()
        return f"An unexpected error occurred while calling generation script: {str(e)}"


def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str:
    print(f"DEBUG (gradio_interface_fn): Received prompt: '{prompt_input_str}'")
    if image_input_pil is None:
        return "Please upload an image."
    if not prompt_input_str:
        return "Please provide a prompt."

    # Save the uploaded PIL image to a temporary file
    # tempfile.NamedTemporaryFile creates a file that is deleted when closed.
    # We need to ensure it has a .jpg extension for some image libraries if they are picky.
    # The 'delete=False' allows us to close it, pass its name, and then delete it manually.
    try:
        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file:
            image_input_pil.save(tmp_image_file, format="JPEG")
            tmp_image_path = tmp_image_file.name
        
        print(f"DEBUG (gradio_interface_fn): Temporary image saved to: {tmp_image_path}")
        
        # Call the generate.py script with the path to the temporary image
        result_text = call_generate_script(tmp_image_path, prompt_input_str)
        
        return result_text
        
    except Exception as e:
        print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {e}")
        import traceback; traceback.print_exc()
        return f"An error occurred: {str(e)}"
    finally:
        # Clean up the temporary image file
        if 'tmp_image_path' in locals() and os.path.exists(tmp_image_path):
            try:
                os.remove(tmp_image_path)
                print(f"DEBUG (gradio_interface_fn): Temporary image {tmp_image_path} removed.")
            except Exception as e_remove:
                print(f"WARN (gradio_interface_fn): Could not remove temporary image {tmp_image_path}: {e_remove}")


# --- Gradio Interface Definition ---
description_md = """
## nanoVLM-222M Interactive Demo (via generate.py)
Upload an image and type a prompt. This interface calls the `generate.py` script from
`huggingface/nanoVLM` under the hood to perform inference.
"""

print("DEBUG: Defining Gradio interface...")
iface = None
try:
    iface = gr.Interface(
        fn=gradio_interface_fn,
        inputs=[
            gr.Image(type="pil", label="Upload Image"),
            gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'")
        ],
        outputs=gr.Textbox(label="Generated Text", show_copy_button=True),
        title="nanoVLM-222M Demo (via Script)",
        description=description_md,
        allow_flagging="never"
    )
    print("DEBUG: Gradio interface defined successfully.")
except Exception as e:
    print(f"CRITICAL ERROR defining Gradio interface: {e}")
    import traceback; traceback.print_exc()

# --- Launch Gradio App ---
if __name__ == "__main__":
    print("DEBUG: Entered __main__ block for Gradio launch.")
    if not os.path.exists(GENERATE_SCRIPT_PATH):
        print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.")
        iface = None # Prevent launch

    if iface is not None:
        print("DEBUG: Attempting to launch Gradio interface...")
        try:
            iface.launch(server_name="0.0.0.0", server_port=7860)
            print("DEBUG: Gradio launch command issued.")
        except Exception as e:
            print(f"CRITICAL ERROR launching Gradio interface: {e}")
            import traceback; traceback.print_exc()
    else:
        print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.")