Spaces:
Runtime error
Runtime error
SD-XL max tokens optimization (with compel)
Browse files
app.py
CHANGED
@@ -8,11 +8,23 @@ from gradio_client import Client
|
|
8 |
|
9 |
client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
|
10 |
|
|
|
11 |
from diffusers import DiffusionPipeline
|
12 |
import torch
|
13 |
|
14 |
-
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",
|
|
|
|
|
|
|
15 |
pipe.to("cuda")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
#pipe.enable_model_cpu_offload()
|
17 |
|
18 |
# if using torch < 2.0
|
@@ -79,9 +91,7 @@ def infer(audio_file):
|
|
79 |
I'll give you a music description, from i want you to provide an illustrative image description that would fit well with the music.
|
80 |
Do not processs each segment or song, but provide a summary for the whole instead.
|
81 |
Answer with only one image description. Never do lists. Maximum 77 tokens.
|
82 |
-
|
83 |
Here's the music description :
|
84 |
-
|
85 |
{cap_result}
|
86 |
|
87 |
"""
|
@@ -95,7 +105,11 @@ def infer(audio_file):
|
|
95 |
|
96 |
print(f"Llama2 result: {result}")
|
97 |
|
98 |
-
|
|
|
|
|
|
|
|
|
99 |
|
100 |
print("Finished")
|
101 |
|
|
|
8 |
|
9 |
client = Client("https://fffiloni-test-llama-api.hf.space/", hf_token=hf_token)
|
10 |
|
11 |
+
from compel import Compel, ReturnedEmbeddingsType
|
12 |
from diffusers import DiffusionPipeline
|
13 |
import torch
|
14 |
|
15 |
+
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",
|
16 |
+
torch_dtype=torch.float16,
|
17 |
+
use_safetensors=True,
|
18 |
+
variant="fp16")
|
19 |
pipe.to("cuda")
|
20 |
+
|
21 |
+
compel = Compel(
|
22 |
+
tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
|
23 |
+
text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
|
24 |
+
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
25 |
+
requires_pooled=[False, True]
|
26 |
+
)
|
27 |
+
|
28 |
#pipe.enable_model_cpu_offload()
|
29 |
|
30 |
# if using torch < 2.0
|
|
|
91 |
I'll give you a music description, from i want you to provide an illustrative image description that would fit well with the music.
|
92 |
Do not processs each segment or song, but provide a summary for the whole instead.
|
93 |
Answer with only one image description. Never do lists. Maximum 77 tokens.
|
|
|
94 |
Here's the music description :
|
|
|
95 |
{cap_result}
|
96 |
|
97 |
"""
|
|
|
105 |
|
106 |
print(f"Llama2 result: {result}")
|
107 |
|
108 |
+
# ———
|
109 |
+
|
110 |
+
prompt = result
|
111 |
+
conditioning, pooled = compel(prompt)
|
112 |
+
images = pipe(prompt_embeds=conditioning, pooled_prompt_embeds=pooled).images[0]
|
113 |
|
114 |
print("Finished")
|
115 |
|