fantaxy commited on
Commit
833ef3a
1 Parent(s): c866b24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -102
app.py CHANGED
@@ -1,114 +1,210 @@
1
- import torch
2
- import torchaudio
3
- from einops import rearrange
 
4
  import gradio as gr
 
5
  import spaces
6
- import os
7
- import uuid
8
- from transformers import pipeline
9
-
10
- # Importing the model-related functions
11
- from stable_audio_tools import get_pretrained_model
12
- from stable_audio_tools.inference.generation import generate_diffusion_cond
13
-
14
- # Load the model outside of the GPU-decorated function
15
- def load_model():
16
- print("Loading model...")
17
- model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
18
- print("Model loaded successfully.")
19
- return model, model_config
20
-
21
- # 번역 모델 로드
22
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
23
-
24
- # Function to set up, generate, and process the audio
25
- @spaces.GPU(duration=120) # Allocate GPU only when this function is called
26
- def generate_audio(prompt, seconds_total=30, steps=100, cfg_scale=7):
27
- print(f"Original Prompt: {prompt}")
28
-
29
- # 한글 텍스트를 영어로 번역
30
- translated_prompt = translator(prompt, max_length=512)[0]['translation_text']
31
- print(f"Translated Prompt: {translated_prompt}")
32
-
33
- device = "cuda" if torch.cuda.is_available() else "cpu"
34
- print(f"Using device: {device}")
35
-
36
- # Fetch the Hugging Face token from the environment variable
37
- hf_token = os.getenv('HF_TOKEN')
38
- print(f"Hugging Face token: {hf_token}")
39
-
40
- # Use pre-loaded model and configuration
41
- model, model_config = load_model()
42
- sample_rate = model_config["sample_rate"]
43
- sample_size = model_config["sample_size"]
44
-
45
- print(f"Sample rate: {sample_rate}, Sample size: {sample_size}")
46
-
47
- model = model.to(device)
48
- print("Model moved to device.")
49
-
50
- # Set up text and timing conditioning
51
- conditioning = [{
52
- "prompt": translated_prompt,
53
- "seconds_start": 0,
54
- "seconds_total": seconds_total
55
- }]
56
- print(f"Conditioning: {conditioning}")
57
-
58
- # Generate stereo audio
59
- print("Generating audio...")
60
- output = generate_diffusion_cond(
61
- model,
62
- steps=steps,
63
- cfg_scale=cfg_scale,
64
- conditioning=conditioning,
65
- sample_size=sample_size,
66
- sigma_min=0.3,
67
- sigma_max=500,
68
- sampler_type="dpmpp-3m-sde",
69
- device=device
70
- )
71
- print("Audio generated.")
72
 
73
- # Rearrange audio batch to a single sequence
74
- output = rearrange(output, "b d n -> d (b n)")
75
- print("Audio rearranged.")
 
 
 
76
 
77
- # Peak normalize, clip, convert to int16
78
- output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
79
- print("Audio normalized and converted.")
 
 
 
80
 
81
- # Generate a unique filename for the output
82
- unique_filename = f"output_{uuid.uuid4().hex}.wav"
83
- print(f"Saving audio to file: {unique_filename}")
84
 
85
- # Save to file
86
- torchaudio.save(unique_filename, output, sample_rate)
87
- print(f"Audio saved: {unique_filename}")
88
 
89
- # Return the path to the generated audio file
90
- return unique_filename
 
 
 
 
 
91
 
92
- css = """
93
- footer {
94
- visibility: hidden;
95
- }
96
- """
97
 
98
- # Setting up the Gradio Interface
99
- interface = gr.Interface(theme="Nymbo/Nymbo_Theme", css=css,
100
- fn=generate_audio,
101
- inputs=[
102
- gr.Textbox(label="프롬프트", placeholder="여기에 텍스트 프롬프트를 입력하세요"),
103
- gr.Slider(0, 47, value=30, label="오디오 길이 (초)"),
104
- gr.Slider(10, 150, value=100, step=10, label="디퓨전 단계 수"),
105
- gr.Slider(1, 15, value=7, step=0.1, label="CFG 스케일")
106
- ],
107
- outputs=gr.Audio(type="filepath", label="생성된 오디오"),
108
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # Pre-load the model to avoid multiprocessing issues
111
- model, model_config = load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- # Launch the Interface
114
- interface.launch()
 
1
+ import logging
2
+ import random
3
+ import warnings
4
+ import os
5
  import gradio as gr
6
+ import numpy as np
7
  import spaces
8
+ import torch
9
+ from diffusers import FluxControlNetModel
10
+ from diffusers.pipelines import FluxControlNetPipeline
11
+ from gradio_imageslider import ImageSlider
12
+ from PIL import Image
13
+ from huggingface_hub import snapshot_download
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ css = """
16
+ #col-container {
17
+ margin: 0 auto;
18
+ max-width: 512px;
19
+ }
20
+ """
21
 
22
+ if torch.cuda.is_available():
23
+ power_device = "GPU"
24
+ device = "cuda"
25
+ else:
26
+ power_device = "CPU"
27
+ device = "cpu"
28
 
 
 
 
29
 
30
+ huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 
 
31
 
32
+ model_path = snapshot_download(
33
+ repo_id="black-forest-labs/FLUX.1-dev",
34
+ repo_type="model",
35
+ ignore_patterns=["*.md", "*..gitattributes"],
36
+ local_dir="FLUX.1-dev",
37
+ token=huggingface_token, # type a new token-id.
38
+ )
39
 
 
 
 
 
 
40
 
41
+ # Load pipeline
42
+ controlnet = FluxControlNetModel.from_pretrained(
43
+ "jasperai/Flux.1-dev-Controlnet-Upscaler", torch_dtype=torch.bfloat16
44
+ ).to(device)
45
+ pipe = FluxControlNetPipeline.from_pretrained(
46
+ model_path, controlnet=controlnet, torch_dtype=torch.bfloat16
 
 
 
 
47
  )
48
+ pipe.to(device)
49
+
50
+ MAX_SEED = 1000000
51
+ MAX_PIXEL_BUDGET = 1024 * 1024
52
+
53
+
54
+ def process_input(input_image, upscale_factor, **kwargs):
55
+ w, h = input_image.size
56
+ w_original, h_original = w, h
57
+ aspect_ratio = w / h
58
+
59
+ was_resized = False
60
+
61
+ if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
62
+ warnings.warn(
63
+ f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to ({int(aspect_ratio * MAX_PIXEL_BUDGET ** 0.5 // upscale_factor), int(MAX_PIXEL_BUDGET ** 0.5 // aspect_ratio // upscale_factor)}) pixels."
64
+ )
65
+ gr.Info(
66
+ f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing input to ({int(aspect_ratio * MAX_PIXEL_BUDGET ** 0.5 // upscale_factor), int(MAX_PIXEL_BUDGET ** 0.5 // aspect_ratio // upscale_factor)}) pixels budget."
67
+ )
68
+ input_image = input_image.resize(
69
+ (
70
+ int(aspect_ratio * MAX_PIXEL_BUDGET**0.5 // upscale_factor),
71
+ int(MAX_PIXEL_BUDGET**0.5 // aspect_ratio // upscale_factor),
72
+ )
73
+ )
74
+ was_resized = True
75
+
76
+ # resize to multiple of 8
77
+ w, h = input_image.size
78
+ w = w - w % 8
79
+ h = h - h % 8
80
+
81
+ return input_image.resize((w, h)), w_original, h_original, was_resized
82
+
83
+
84
+ @spaces.GPU#(duration=42)
85
+ def infer(
86
+ seed,
87
+ randomize_seed,
88
+ input_image,
89
+ num_inference_steps,
90
+ upscale_factor,
91
+ controlnet_conditioning_scale,
92
+ progress=gr.Progress(track_tqdm=True),
93
+ ):
94
+ if randomize_seed:
95
+ seed = random.randint(0, MAX_SEED)
96
+ true_input_image = input_image
97
+ input_image, w_original, h_original, was_resized = process_input(
98
+ input_image, upscale_factor
99
+ )
100
+
101
+ # rescale with upscale factor
102
+ w, h = input_image.size
103
+ control_image = input_image.resize((w * upscale_factor, h * upscale_factor))
104
+
105
+ generator = torch.Generator().manual_seed(seed)
106
+
107
+ gr.Info("Upscaling image...")
108
+ image = pipe(
109
+ prompt="",
110
+ control_image=control_image,
111
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
112
+ num_inference_steps=num_inference_steps,
113
+ guidance_scale=3.5,
114
+ height=control_image.size[1],
115
+ width=control_image.size[0],
116
+ generator=generator,
117
+ ).images[0]
118
+
119
+ if was_resized:
120
+ gr.Info(
121
+ f"Resizing output image to targeted {w_original * upscale_factor}x{h_original * upscale_factor} size."
122
+ )
123
+
124
+ # resize to target desired size
125
+ image = image.resize((w_original * upscale_factor, h_original * upscale_factor))
126
+ image.save("output.jpg")
127
+ # convert to numpy
128
+ return [true_input_image, image, seed]
129
+
130
+
131
+ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
132
+
133
+
134
+ with gr.Row():
135
+ run_button = gr.Button(value="Run")
136
+
137
+ with gr.Row():
138
+ with gr.Column(scale=4):
139
+ input_im = gr.Image(label="Input Image", type="pil")
140
+ with gr.Column(scale=1):
141
+ num_inference_steps = gr.Slider(
142
+ label="Number of Inference Steps",
143
+ minimum=8,
144
+ maximum=50,
145
+ step=1,
146
+ value=28,
147
+ )
148
+ upscale_factor = gr.Slider(
149
+ label="Upscale Factor",
150
+ minimum=1,
151
+ maximum=4,
152
+ step=1,
153
+ value=4,
154
+ )
155
+ controlnet_conditioning_scale = gr.Slider(
156
+ label="Controlnet Conditioning Scale",
157
+ minimum=0.1,
158
+ maximum=1.5,
159
+ step=0.1,
160
+ value=0.6,
161
+ )
162
+ seed = gr.Slider(
163
+ label="Seed",
164
+ minimum=0,
165
+ maximum=MAX_SEED,
166
+ step=1,
167
+ value=42,
168
+ )
169
+
170
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
171
+
172
+ with gr.Row():
173
+ result = ImageSlider(label="Input / Output", type="pil", interactive=True)
174
+
175
+ examples = gr.Examples(
176
+ examples=[
177
+ [42, False, "z1.webp", 28, 4, 0.6],
178
+ [42, False, "z2.webp", 28, 4, 0.6],
179
+
180
+ ],
181
+ inputs=[
182
+ seed,
183
+ randomize_seed,
184
+ input_im,
185
+ num_inference_steps,
186
+ upscale_factor,
187
+ controlnet_conditioning_scale,
188
+ ],
189
+ fn=infer,
190
+ outputs=result,
191
+ cache_examples="lazy",
192
+ )
193
 
194
+ gr.on(
195
+ [run_button.click],
196
+ fn=infer,
197
+ inputs=[
198
+ seed,
199
+ randomize_seed,
200
+ input_im,
201
+ num_inference_steps,
202
+ upscale_factor,
203
+ controlnet_conditioning_scale,
204
+ ],
205
+ outputs=result,
206
+ show_api=False,
207
+ # show_progress="minimal",
208
+ )
209
 
210
+ demo.queue().launch(share=False)