Flux-Use-Compiled-Graph

Running on Zero

sayakpaul HF Staff commited on Sep 11

Commit

bb10560

1 Parent(s): ee4246b

up

Browse files

Files changed (5) hide show

README.md +1 -6
aoti.py +15 -0
app.py +33 -84
hub_utils.py +0 -35
optimization.py +0 -43

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Flux.1-Dev Compiled Graph
 emoji: 🖼️
 colorFrom: yellow
 colorTo: green
@@ -7,11 +7,6 @@ sdk: gradio
 sdk_version: 5.39.0
 app_file: app.py
 pinned: false
-hf_oauth: true
-hf_oauth_scopes:
-  - read-repos
-  - write-repos
-  - manage-repos
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Flux.1-Dev Use Compiled Graph
 emoji: 🖼️
 colorFrom: yellow
 colorTo: green
 sdk_version: 5.39.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

aoti.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+from huggingface_hub import hf_hub_download
+from spaces.zero.torch.aoti import ZeroGPUCompiledModel
+from spaces.zero.torch.aoti import ZeroGPUWeights
+from spaces.zero.torch.aoti import drain_module_parameters
+def aoti_load_(module: torch.nn.Module, repo_id: str, filename: str):
+    compiled_graph_file = hf_hub_download(repo_id, filename)
+    state_dict = module.state_dict()
+    zerogpu_weights = ZeroGPUWeights({name: weight for name, weight in state_dict.items()})
+    compiled = ZeroGPUCompiledModel(compiled_graph_file, zerogpu_weights)
+    setattr(module, "forward", compiled)
+    drain_module_parameters(module)

app.py CHANGED Viewed

@@ -1,90 +1,39 @@
-import spaces
 import gradio as gr
 import torch
-from diffusers import DiffusionPipeline
-from optimization import compile_transformer
-from hub_utils import _push_compiled_graph_to_hub
-from huggingface_hub import whoami
-import time
 # --- Model Loading ---
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the model pipeline
-pipe = DiffusionPipeline.from_pretrained("black-forest-labs/Flux.1-Dev", torch_dtype=dtype).to(device)
-@spaces.GPU(duration=1200)
-def push_to_hub(repo_id, filename, oauth_token: gr.OAuthToken, progress=gr.Progress(track_tqdm=True)):
-    if not filename.endswith(".pt2"):
-        raise NotImplementedError("The filename must end with a `.pt2` extension.")
-    # this will throw if token is invalid
-    try:
-        _ = whoami(oauth_token.token)
-        # --- Ahead-of-time compilation ---
-        start = time.perf_counter()
-        compiled_transformer = compile_transformer(pipe, prompt="prompt")
-        if torch.cuda.is_available():
-            torch.cuda.synchronize()
-        end = time.perf_counter()
-        print(f"Compilation took: {start - end} seconds.")
-        token = oauth_token.token
-        out = _push_compiled_graph_to_hub(
-            compiled_transformer.archive_file, repo_id=repo_id, token=token, path_in_repo=filename
-        )
-        if not isinstance(out, str) and hasattr(out, "commit_url"):
-            commit_url = out.commit_url
-            return f"[{commit_url}]({commit_url})"
-        else:
-            return out
-    except Exception as e:
-        raise gr.Error(
-            f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}"""
-        )
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 520px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(
-            "## Compile [Flux.1-Dev](https://hf.co/black-forest-labs/Flux.1-Dev) graph ahead of time & push to the Hub"
-        )
-        gr.Markdown(
-            "Enter a **repo_id** and **filename**. This repo automatically compiles the Flux.1-Dev model ahead of time. Read more about this in [this post](https://huggingface.co/blog/zerogpu-aoti)."
-        )
-        gr.Markdown("Depending on the model, it can take some time (2-10 mins) to compile.")
-        repo_id = gr.Textbox(label="repo_id", placeholder="e.g. sayakpaul/qwen-aot")
-        filename = gr.Textbox(label="filename", placeholder="e.g. compiled.pt2")
-        run = gr.Button("Push graph to Hub", variant="primary")
-        markdown_out = gr.Markdown()
-    run.click(push_to_hub, inputs=[repo_id, filename], outputs=[markdown_out])
-def swap_visibilty(profile: gr.OAuthProfile | None):
-    return gr.update(elem_classes=["main_ui_logged_in"]) if profile else gr.update(elem_classes=["main_ui_logged_out"])
-css_login = """
-.main_ui_logged_out{opacity: 0.3; pointer-events: none; margin: 0 auto; max-width: 520px}
-"""
-with gr.Blocks(css=css_login) as demo_login:
-    gr.LoginButton()
-    with gr.Column(elem_classes="main_ui_logged_out") as main_ui:
-        demo.render()
-    demo_login.load(fn=swap_visibilty, outputs=main_ui)
-demo_login.queue()
-demo_login.launch()

+from datetime import datetime
 import gradio as gr
+import spaces
 import torch
+from diffusers import FluxPipeline
+from aoti import aoti_load
 # --- Model Loading ---
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+pipeline = FluxPipeline.from_pretrained(
+    "black-forest-labs/Flux.1-Dev", torch_dtype=torch.bfloat16
+).to(device)
+pipeline.transformer.fuse_qkv_projections()
+aoti_load_(pipeline.transformer, "sayakpaul/flux-dev-aot", "flux-dev-aot.pt2")
+@spaces.GPU
+def generate_image(prompt: str, progress=gr.Progress(track_tqdm=True)):
+    generator = torch.Generator(device='cuda').manual_seed(42)
+    t0 = datetime.now()
+    output = pipeline(
+        prompt=prompt,
+        num_inference_steps=28,
+        generator=generator,
+    )
+    return [(output.images[0], f'{(datetime.now() - t0).total_seconds():.2f}s')]
+gr.Interface(
+    fn=generate_image,
+    inputs=gr.Text(label="Prompt"),
+    outputs=gr.Gallery(),
+    examples=["A cat playing with a ball of yarn"],
+    cache_examples=False,
+).launch()

hub_utils.py DELETED Viewed

@@ -1,35 +0,0 @@
-from io import BytesIO
-from huggingface_hub import create_repo, upload_file
-import tempfile
-import os
-DEFAULT_ARCHIVE_FILENAME = "archived_graph.pt2"
-def _push_compiled_graph_to_hub(archive: BytesIO, repo_id, **kwargs):
-    if not isinstance(archive, BytesIO):
-        raise NotImplementedError("Incorrect type of `archive` provided.")
-    commit_message = kwargs.pop("commit_message", "Uploaded from spaces.")
-    private = kwargs.pop("private", False)
-    path_in_repo = kwargs.pop("path_in_repo", DEFAULT_ARCHIVE_FILENAME)
-    token = kwargs.pop("token")
-    repo_id = create_repo(repo_id, private=private, exist_ok=True, token=token).repo_id
-    with tempfile.TemporaryDirectory() as tmpdir:
-        output_path = os.path.join(tmpdir, os.path.basename(path_in_repo))
-        with open(output_path, "wb") as f:
-            f.write(archive.getvalue())
-        try:
-            info = upload_file(
-                repo_id=repo_id,
-                path_or_fileobj=output_path,
-                path_in_repo=os.path.basename(path_in_repo),
-                commit_message=commit_message,
-                token=token,
-            )
-            return info
-        except Exception as e:
-            print(f"File couldn't be pushed to the Hub with the following error: {e}.")
-            return e

optimization.py DELETED Viewed

@@ -1,43 +0,0 @@
-import spaces
-from typing import Any
-from typing import Callable
-from typing import ParamSpec
-import torch
-from torch.utils._pytree import tree_map
-P = ParamSpec("P")
-TRANSFORMER_HIDDEN_DIM = torch.export.Dim("hidden", min=4096, max=8212)
-# Specific to Flux. More about this is available in
-# https://huggingface.co/blog/zerogpu-aoti
-TRANSFORMER_DYNAMIC_SHAPES = {
-    "hidden_states": {1: TRANSFORMER_HIDDEN_DIM},
-    "img_ids": {0: TRANSFORMER_HIDDEN_DIM},
-}
-INDUCTOR_CONFIGS = {
-    "conv_1x1_as_mm": True,
-    "epilogue_fusion": False,
-    "coordinate_descent_tuning": True,
-    "coordinate_descent_check_all_directions": True,
-    "max_autotune": True,
-    "triton.cudagraphs": True,
-}
-def compile_transformer(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
-    def f():
-        with spaces.aoti_capture(pipeline.transformer) as call:
-            pipeline(*args, **kwargs)
-        dynamic_shapes = tree_map(lambda v: None, call.kwargs)
-        dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
-        exported = torch.export.export(
-            mod=pipeline.transformer, args=call.args, kwargs=call.kwargs, dynamic_shapes=dynamic_shapes
-        )
-        return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
-    compiled_transformer = f()
-    return compiled_transformer