Spaces:

RayTsai-030
/

LightsOut-demo

Runtime error

App Files Files Community

RayTsai-030 commited on Sep 21

Commit

0e1ee20

1 Parent(s): 31cffe7

update

Browse files

Files changed (2) hide show

app.py +50 -135
src/pipelines/__pycache__/pipeline_stable_diffusion_outpaint.cpython-39.pyc +0 -0

app.py CHANGED Viewed

@@ -35,84 +35,20 @@ intro = """
 </h1>
 <span>[<a target="_blank" href="https://ray-1026.github.io/lightsout/">Project page</a>]</span>
 </div>
 """
-# SYSTEM_PROMPT = """
-# # Edit Instruction Rewriter
-# You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
-# Please strictly follow the rewriting rules below:
-# ## 1. General Principles
-# - Keep the rewritten prompt **concise**. Avoid overly long sentences and reduce unnecessary descriptive language.
-# - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
-# - Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
-# - All added objects or modifications must align with the logic and style of the edited input image’s overall scene.
-# ## 2. Task Type Handling Rules
-# ### 1. Add, Delete, Replace Tasks
-# - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
-# - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
-#     > Original: "Add an animal"
-#     > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
-# - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
-# - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
-# ### 2. Text Editing Tasks
-# - All text content must be enclosed in English double quotes `" "`. Do not translate or alter the original language of the text, and do not change the capitalization.
-# - **For text replacement tasks, always use the fixed template:**
-#     - `Replace "xx" to "yy"`.
-#     - `Replace the xx bounding box to "yy"`.
-# - If the user does not specify text content, infer and add concise text based on the instruction and the input image’s context. For example:
-#     > Original: "Add a line of text" (poster)
-#     > Rewritten: "Add text \"LIMITED EDITION\" at the top center with slight shadow"
-# - Specify text position, color, and layout in a concise way.
-# ### 3. Human Editing Tasks
-# - Maintain the person’s core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
-# - If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
-# - **For expression changes, they must be natural and subtle, never exaggerated.**
-# - If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
-#     - For background change tasks, emphasize maintaining subject consistency at first.
-# - Example:
-#     > Original: "Change the person’s hat"
-#     > Rewritten: "Replace the man’s hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
-# ### 4. Style Transformation or Enhancement Tasks
-# - If a style is specified, describe it concisely with key visual traits. For example:
-#     > Original: "Disco style"
-#     > Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
-# - If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
-# - **For coloring tasks, including restoring old photos, always use the fixed template:** "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
-# - If there are other changes, place the style description at the end.
-# ## 3. Rationality and Logic Checks
-# - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
-# - Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
-# # Output Format Example
-# ```json
-# {
-#    "Rewritten": "..."
-# }
-# """
-# def polish_prompt(prompt, img):
-#     prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
-#     success = False
-#     while not success:
-#         try:
-#             result = api(prompt, [img])
-#             # print(f"Result: {result}")
-#             # print(f"Polished Prompt: {polished_prompt}")
-#             if isinstance(result, str):
-#                 result = result.replace("```json", "")
-#                 result = result.replace("```", "")
-#                 result = json.loads(result)
-#             else:
-#                 result = json.loads(result)
-#             polished_prompt = result["Rewritten"]
-#             polished_prompt = polished_prompt.strip()
-#             polished_prompt = polished_prompt.replace("\n", " ")
-#             success = True
-#         except Exception as e:
-#             print(f"[Warning] Error during API call: {e}")
-#     return polished_prompt
 def encode_image(pil_image):
@@ -123,46 +59,13 @@ def encode_image(pil_image):
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
-# def api(prompt, img_list, model="qwen-vl-max-latest", kwargs={}):
-#     import dashscope
-#     api_key = os.environ.get("DASH_API_KEY")
-#     if not api_key:
-#         raise EnvironmentError("DASH_API_KEY is not set")
-#     assert model in ["qwen-vl-max-latest"], f"Not implemented model {model}"
-#     sys_promot = (
-#         "you are a helpful assistant, you should provide useful answers to users."
-#     )
-#     messages = [
-#         {"role": "system", "content": sys_promot},
-#         {"role": "user", "content": []},
-#     ]
-#     for img in img_list:
-#         messages[1]["content"].append(
-#             {"image": f"data:image/png;base64,{encode_image(img)}"}
-#         )
-#     messages[1]["content"].append({"text": f"{prompt}"})
-#     response_format = kwargs.get("response_format", None)
-#     response = dashscope.MultiModalConversation.call(
-#         api_key=api_key,
-#         model=model,  # For example, use qwen-plus here. You can change the model name as needed. Model list: https://help.aliyun.com/zh/model-studio/getting-started/models
-#         messages=messages,
-#         result_format="message",
-#         response_format=response_format,
-#     )
-#     if response.status_code == 200:
-#         return response.output.choices[0].message.content[0]["text"]
-#     else:
-#         raise Exception(f"Failed to post: {response}")
 ## --- Model Loading --- ##
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16
-print(f"Using device: {device}")
 # controlnet
 controlnet = ControlNetModel.from_pretrained(
@@ -171,7 +74,9 @@ controlnet = ControlNetModel.from_pretrained(
 # outpainter
 pipe = ControlNetOutpaintPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-2-inpainting", controlnet=controlnet, torch_dtype=dtype
 ).to(device)
 pipe.scheduler = CustomScheduler.from_config(pipe.scheduler.config)
 pipe.unet.load_attn_procs("./weights/light_outpaint_lora", use_safetensors=True)
@@ -189,17 +94,20 @@ blip2 = blip2.to(device)
 # light regressor
 lsr_module = LightSourceRegressor()
-ckpt = torch.load("./weights/light_regress/model.pth", map_location="cpu" if device=="cpu" else None)
 lsr_module.load_state_dict(ckpt["model"])
 lsr_module.to(device)
 lsr_module.eval()
 # SIFR model
 sifr_model = Uformer(img_size=512, img_ch=3, output_ch=6).to(device)
-sifr_model.load_state_dict(torch.load("./weights/net_g_last.pth", map_location="cpu" if device=="cpu" else None))
-# --- UI Constants and Helpers ---
-MAX_SEED = np.iinfo(np.int32).max
 # --- Main Inference Function (with hardcoded negative prompt) ---
@@ -220,7 +128,9 @@ def infer(
     Generates an image
     """
     # dataset
-    dataset = HFCustomImageLoader(image, left_outpaint, right_outpaint, up_outpaint, down_outpaint)
     data = dataset[0]
     # generator
@@ -255,9 +165,7 @@ def infer(
             pred_mask = pred_mask.cpu()
         pred_mask = pred_mask.numpy()
-        data["control_img"] = Image.fromarray(
-            (pred_mask[0, 0] * 255).astype(np.uint8)
-        )
     print("Finish light source detection...")
@@ -337,21 +245,12 @@ def infer(
     print("Finish flare removal...")
-    return outpaint_result, deflare_result
 # --- Examples and UI Layout ---
 examples = []
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 1024px;
-}
-#edit_text{
-    margin-top: -62px !important
-}
-"""
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
@@ -378,16 +277,28 @@ with gr.Blocks(css=css) as demo:
             with gr.Column():
                 left_outpaint = gr.Slider(
-                    label="Left outpaint (px)", minimum=32, maximum=128, step=32, value=64
                 )
                 right_outpaint = gr.Slider(
-                    label="Right outpaint (px)", minimum=32, maximum=128, step=32, value=64
                 )
                 up_outpaint = gr.Slider(
                     label="Up outpaint (px)", minimum=32, maximum=128, step=32, value=64
                 )
                 down_outpaint = gr.Slider(
-                    label="Down outpaint (px)", minimum=32, maximum=128, step=32, value=64
                 )
             # randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
@@ -418,6 +329,10 @@ with gr.Blocks(css=css) as demo:
                 )
         with gr.Row():
             with gr.Column():
                 outpainted_result = gr.Image(
                     label="Outpainted Result", show_label=True, type="pil"
@@ -458,7 +373,7 @@ with gr.Blocks(css=css) as demo:
             up_outpaint,
             down_outpaint,
         ],
-        outputs=[outpainted_result, flarefree_result],
     )
 if __name__ == "__main__":

 </h1>
 <span>[<a target="_blank" href="https://ray-1026.github.io/lightsout/">Project page</a>]</span>
 </div>
+<div style="text-align: center; margin-top: 15px; font-size: 1.2em;">
+    <strong>NOTICE</strong>: This demo is limited to cpu inference only. For better experience, please run the code locally with a GPU.
+</div>
 """
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 1024px;
+}
+#edit_text{
+    margin-top: -62px !important
+}
+"""
 def encode_image(pil_image):
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
+# --- UI Constants and Helpers ---
+MAX_SEED = np.iinfo(np.int32).max
 ## --- Model Loading --- ##
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16
 # controlnet
 controlnet = ControlNetModel.from_pretrained(
 # outpainter
 pipe = ControlNetOutpaintPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-2-inpainting",
+    controlnet=controlnet,
+    torch_dtype=dtype,
 ).to(device)
 pipe.scheduler = CustomScheduler.from_config(pipe.scheduler.config)
 pipe.unet.load_attn_procs("./weights/light_outpaint_lora", use_safetensors=True)
 # light regressor
 lsr_module = LightSourceRegressor()
+ckpt = torch.load(
+    "./weights/light_regress/model.pth", map_location="cpu" if device == "cpu" else None
+)
 lsr_module.load_state_dict(ckpt["model"])
 lsr_module.to(device)
 lsr_module.eval()
 # SIFR model
 sifr_model = Uformer(img_size=512, img_ch=3, output_ch=6).to(device)
+sifr_model.load_state_dict(
+    torch.load(
+        "./weights/net_g_last.pth", map_location="cpu" if device == "cpu" else None
+    )
+)
 # --- Main Inference Function (with hardcoded negative prompt) ---
     Generates an image
     """
     # dataset
+    dataset = HFCustomImageLoader(
+        image, left_outpaint, right_outpaint, up_outpaint, down_outpaint
+    )
     data = dataset[0]
     # generator
             pred_mask = pred_mask.cpu()
         pred_mask = pred_mask.numpy()
+        data["control_img"] = Image.fromarray((pred_mask[0, 0] * 255).astype(np.uint8))
     print("Finish light source detection...")
     print("Finish flare removal...")
+    return data["control_img"], outpaint_result, deflare_result
 # --- Examples and UI Layout ---
 examples = []
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
             with gr.Column():
                 left_outpaint = gr.Slider(
+                    label="Left outpaint (px)",
+                    minimum=32,
+                    maximum=128,
+                    step=32,
+                    value=64,
                 )
                 right_outpaint = gr.Slider(
+                    label="Right outpaint (px)",
+                    minimum=32,
+                    maximum=128,
+                    step=32,
+                    value=64,
                 )
                 up_outpaint = gr.Slider(
                     label="Up outpaint (px)", minimum=32, maximum=128, step=32, value=64
                 )
                 down_outpaint = gr.Slider(
+                    label="Down outpaint (px)",
+                    minimum=32,
+                    maximum=128,
+                    step=32,
+                    value=64,
                 )
             # randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                 )
         with gr.Row():
+            with gr.Column():
+                lightmask_result = gr.Image(
+                    label="Lightmask Result", show_label=True, type="pil"
+                )
             with gr.Column():
                 outpainted_result = gr.Image(
                     label="Outpainted Result", show_label=True, type="pil"
             up_outpaint,
             down_outpaint,
         ],
+        outputs=[lightmask_result, outpainted_result, flarefree_result],
     )
 if __name__ == "__main__":

src/pipelines/__pycache__/pipeline_stable_diffusion_outpaint.cpython-39.pyc CHANGED Viewed

Binary files a/src/pipelines/__pycache__/pipeline_stable_diffusion_outpaint.cpython-39.pyc and b/src/pipelines/__pycache__/pipeline_stable_diffusion_outpaint.cpython-39.pyc differ