attempt compilation
Browse files
app.py
CHANGED
@@ -7,11 +7,24 @@ import spaces
|
|
7 |
|
8 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
pipe = AuraFlowPipeline.from_pretrained(
|
11 |
"AuraDiffusion/AuraFlow",
|
12 |
torch_dtype=torch.float16
|
13 |
).to("cuda")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
MAX_SEED = np.iinfo(np.int32).max
|
16 |
MAX_IMAGE_SIZE = 1024
|
17 |
|
|
|
7 |
|
8 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
|
10 |
+
torch.set_float32_matmul_precision("high")
|
11 |
+
|
12 |
+
torch._inductor.config.conv_1x1_as_mm = True
|
13 |
+
torch._inductor.config.coordinate_descent_tuning = True
|
14 |
+
torch._inductor.config.epilogue_fusion = False
|
15 |
+
torch._inductor.config.coordinate_descent_check_all_directions = True
|
16 |
+
|
17 |
pipe = AuraFlowPipeline.from_pretrained(
|
18 |
"AuraDiffusion/AuraFlow",
|
19 |
torch_dtype=torch.float16
|
20 |
).to("cuda")
|
21 |
|
22 |
+
pipe.transformer.to(memory_format=torch.channels_last)
|
23 |
+
pipe.vae.to(memory_format=torch.channels_last)
|
24 |
+
|
25 |
+
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
26 |
+
pipe.vae.decode = torch.compile(pipe.vae.decode, mode="max-autotune", fullgraph=True)
|
27 |
+
|
28 |
MAX_SEED = np.iinfo(np.int32).max
|
29 |
MAX_IMAGE_SIZE = 1024
|
30 |
|