File size: 9,800 Bytes
bc870ea
 
 
3416431
bc870ea
 
 
ed52ef5
db1e6d4
bc870ea
 
f43190d
bc870ea
 
 
 
 
 
 
668b8e4
bc870ea
 
 
 
 
 
 
 
668b8e4
 
bc870ea
 
668b8e4
f77dded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc870ea
e971244
 
 
 
bc870ea
e971244
bc870ea
 
 
 
 
668b8e4
 
bc870ea
 
 
 
 
 
668b8e4
 
 
 
 
 
bc870ea
 
 
 
 
 
d548d36
bc870ea
d548d36
 
e9fd080
 
d548d36
1f823d6
e9fd080
0bb39a0
d548d36
e9fd080
bc870ea
e9fd080
 
bc870ea
 
 
 
9e24507
 
 
 
 
bc870ea
ed52ef5
 
bc870ea
3dd110d
bc870ea
 
 
3dd110d
9e24507
 
8b41a8f
 
3dd110d
bc870ea
 
 
 
668b8e4
 
3dd110d
668b8e4
 
 
bc870ea
 
 
668b8e4
bc870ea
 
0bb39a0
f43190d
bc870ea
3dd110d
2f44119
 
 
 
 
 
 
 
 
b57663f
2f44119
 
 
 
 
 
 
 
 
b57663f
2f44119
 
 
 
6bb4fbf
 
 
 
 
 
 
 
b57663f
 
f1e6a3a
b57663f
97a7c26
 
 
 
 
 
 
 
b57663f
7054a43
6bb4fbf
 
b68b0af
6bb4fbf
 
ed52ef5
b57663f
3dd110d
0bb39a0
 
 
 
 
bc870ea
 
668b8e4
0bb39a0
bc870ea
 
 
3dd110d
75a8eb7
090a80f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# -*- coding: utf-8 -*-
# ZenCtrl Inpainting Playground (Baseten backend)

#import spaces
import os, json, base64, requests
from io import BytesIO
from PIL import Image, ImageDraw
import gradio as gr
import replicate

# ────────── Secrets & endpoints ──────────
BASETEN_MODEL_URL = os.getenv("BASETEN_MODEL_URL")
BASETEN_API_KEY = os.getenv("BASETEN_API_KEY")
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN")

# ────────── Globals ──────────
ADAPTER_SIZE = 1024
css = "#col-container {margin:0 auto; max-width:960px;}"

# Background generation via Replicate
def _gen_bg(prompt: str):
    url = replicate.run(
        "google/imagen-4-fast",
        input={"prompt": prompt or "cinematic background", "aspect_ratio": "1:1"},
    )
    url = url[0] if isinstance(url, list) else url
    return Image.open(BytesIO(requests.get(url, timeout=120).content)).convert("RGB")

# Main processing function
def process_image_and_text(subject_image, adapter_dict, prompt, _unused1, _unused2, size=ADAPTER_SIZE, rank=10.0):
    seed, guidance_scale, steps = 42, 2.5, 28

    adapter_image = adapter_dict["image"] if isinstance(adapter_dict, dict) else adapter_dict
    if isinstance(adapter_dict, dict):  # Image/sketch input
        adapter_image = adapter_dict["image"]
        adapter_mask = adapter_dict["mask"]
        if adapter_mask is not None:
            # convert mask -> solid green rectangle on copy of adapter_image
            m = adapter_mask.convert("L").point(lambda p: 255 if p else 0)
            bbox = m.getbbox()
            if bbox:
                rect = Image.new("L", m.size, 0)
                ImageDraw.Draw(rect).rectangle(bbox, fill=255)
                m = rect
            green = Image.new("RGB", adapter_image.size, "#00FF00")
            adapter_image = Image.composite(green, adapter_image, m)
    else:
        adapter_image = adapter_dict

    # def prep(img: Image.Image):
    #     w, h = img.size
    #     m = min(w, h)
    #     return img.crop(((w - m) // 2, (h - m) // 2, (w + m) // 2, (h + m) // 2)).resize((size, size), Image.LANCZOS)
    def prep(img: Image.Image):
        return img.resize((size, size), Image.LANCZOS)

    subj_proc = prep(subject_image)
    adap_proc = prep(adapter_image)

    def b64(img):
        buf = BytesIO()
        img.save(buf, format="PNG")
        return base64.b64encode(buf.getvalue()).decode()

    payload = {
        "prompt": prompt,
        "subject_image": b64(subj_proc),
        "adapter_image": b64(adap_proc),
        "height": size,
        "width": size,
        "steps": steps,
        "seed": seed,
        "guidance_scale": guidance_scale,
        "rank": rank,
    }

    headers = {"Content-Type": "application/json"}
    if BASETEN_API_KEY:
        headers["Authorization"] = f"Api-Key {BASETEN_API_KEY}"

    resp = requests.post(BASETEN_MODEL_URL, headers=headers, json=payload, timeout=180)
    resp.raise_for_status()
    data = resp.json()
    
    # Extract base64 image from 'blended' key
    if "blended" in data:
        try:
            blended_bytes = base64.b64decode(data["raw_result"])
            raw_img = Image.open(BytesIO(blended_bytes)).convert("RGB")
            return raw_img, raw_img
        except Exception:
            raise gr.Error("Failed to decode 'blended' image from Baseten response.")
    else:
        raise gr.Error("Baseten response missing 'blended' image.")



# ────────── Header HTML ──────────
header_html = """
<h1>ZenCtrl Inpainting Beta</h1>
<div align=\"center\" style=\"line-height: 1;\">
  <a href=\"https://discord.com/invite/b9RuYQ3F8k\" target=\"_blank\" style=\"margin: 10px;\" name=\"discord_link\"><img src=\"https://img.shields.io/badge/Discord-Join-7289da.svg?logo=discord\" alt=\"Discord\" style=\"display: inline-block; vertical-align: middle;\"></a>
  <a href=\"https://fotographer.ai/zen-control\" target=\"_blank\" style=\"margin: 10px;\" name=\"lp_link\"><img src=\"https://img.shields.io/badge/Website-Landing_Page-blue\" alt=\"LP\" style=\"display: inline-block; vertical-align: middle;\"></a>
  <a href=\"https://x.com/FotographerAI\" target=\"_blank\" style=\"margin: 10px;\" name=\"twitter_link\"><img src=\"https://img.shields.io/twitter/follow/FotographerAI?style=social\" alt=\"X\" style=\"display: inline-block; vertical-align: middle;\"></a>
</div>
"""

# ────────── Gradio UI ──────────
with gr.Blocks(css=css, title="ZenCtrl Inpainting") as demo:
    raw_state = gr.State()

    gr.HTML(header_html)
    gr.Markdown(
        "**Generate context-aware images of your subject with ZenCtrl’s inpainting playground.** Upload a subject + optional mask, write a prompt, and hit **Generate**.   \n"
        "Open *Advanced Settings* for an AI-generated background.  \n\n"
        "**Note:** The model was trained mainly on interior scenes and other *rigid* objects. Results on people or highly deformable items may contain visual distortions. \n"
        "In case of High traffic , your requests might be queued and processed one by one by our backend server"
    )

    with gr.Row():
        with gr.Column(scale=2, elem_id="col-container"):
            subj_img = gr.Image(type="pil", label="Subject image")
            ref_img = gr.Image(type="pil", label="Background / Mask image", tool="sketch", brush_color="#00FF00")
            ref_img_ex = gr.Image(type="pil", visible=False)

            # Removed Florence-SAM
            promptbox = gr.Textbox(label="Generation prompt", value="furniture", lines=2)
            run_btn = gr.Button("Generate", variant="primary")

            with gr.Accordion("Advanced Settings", open=False):
                bgprompt = gr.Textbox(label="Background Prompt", value="Scandinavian living room …")
                bg_btn = gr.Button("Generate BG")

        with gr.Column(scale=2):
            output_img = gr.Image(label="Output Image")
            bg_img = gr.Image(label="Background", visible=True)

    # ---------- Example wrapper ---------------------------------
    # def _load_and_show(subj_path, bg_path, prompt_text):
    #     out_path = subj_path.replace(".png", "_out.png")
    #     return (
    #         Image.open(subj_path),                             # β†’ gr.Image widget ok
    #         {"image": Image.open(bg_path), "mask": None},      # ← **dict for sketch!**
    #         prompt_text,                                       # β†’ gr.Textbox
    #         Image.open(out_path)                               # β†’ gr.Image output
    #     )

    def _load_and_show(subj_path, bg_path, prompt_text):
        """
        Takes the three values coming from an Examples row
        and returns FOUR objects – one for every output widget:
          1. subject PIL image               -> subj_img
          2. dict for the sketch component   -> ref_img
          3. prompt string                   -> promptbox
          4. pre-rendered result PIL         -> output_img
        """
        out_path = subj_path.replace(".png", "_out.png")          # your saved result
        return (
            Image.open(subj_path),                                # 1️⃣ subject
            {"image": Image.open(bg_path), "mask": None},         # 2️⃣ sketch dict
            prompt_text,                                          # 3️⃣ prompt
            Image.open(out_path)                                  # 4️⃣ output image
        )

    def ex(subj, bg, prompt):
        return [
            Image.open(subj),
            {"image": Image.open(bg), "mask": None},
            prompt
        ]

    # ---------- Examples ----------------------------------------
    gr.Examples(
        examples=[
            ["examples/sofa1_1.png", "examples/sofa1_bg.png", "add the sofa", "examples/sofa1_out.png"],
            ["examples/sofa2.png", "examples/sofa2_bg.png", "add this sofa", "examples/sofa2_out.png"],
            ["examples/chair1.png", "examples/chair1_bg.png", "add the chair", "examples/chair1_out.png"],
            ["examples/console_table.png", "examples/console_table_bg.png", "Scandinavian console table against a gallery-style wall filled with abstract framed art,", "examples/console_table_out.png"],
            ["examples/office_chair.png", "examples/office_chair_bg.png", "office chair", "examples/office_chair_out.png"],
            ["examples/office_chair1.png", "examples/office_chair1_bg.png", "Executive mesh chair in a modern home office, with matte black wall panels, built-in shelves, ", "examples/office_chair1_out.png"],
            ["examples/bed.png", "examples/bed_in.png", "Low platform bed in a Japandi-style bedroom, surrounded by floating nightstands", "examples/bed_out.png"],
            ["examples/car.png", "examples/car_bg.png", "car on the road", "examples/car_out.png"],
        ],
        inputs        = [subj_img, ref_img, promptbox, output_img],
        outputs       = [subj_img, ref_img, promptbox, output_img],
        fn            = _load_and_show,
        #preprocess    = False,     # data already widget-ready
        cache_examples=False
        )


    # ---------- Buttons & interactions --------------------------
    # run_btn.click(
    #     process_image_and_text,
    #     inputs=[subj_img, ref_img, promptbox, gr.State(False), gr.State("")],
    #     outputs=[gallery, raw_state]
    # )
    run_btn.click(
        process_image_and_text,
        inputs=[subj_img, ref_img, promptbox, gr.State(False), gr.State("")],
        outputs=[output_img, raw_state]
    )
    bg_btn.click(_gen_bg, inputs=[bgprompt], outputs=[bg_img])

# ---------------- Launch ---------------------------------------
if __name__ == "__main__":
    demo.launch()