DRBAPH commited on
Commit
d4ca24e
1 Parent(s): 3d41f57

Upload gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +32 -16
gradio_app.py CHANGED
@@ -10,7 +10,7 @@ from datetime import datetime
10
  import gradio as gr
11
 
12
  # Define the function to generate audio based on a prompt
13
- def generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, sampler_type):
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  # Download model
@@ -19,6 +19,16 @@ def generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_ti
19
  sample_size = model_config["sample_size"]
20
 
21
  model = model.to(device)
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Set up text and timing conditioning
24
  conditioning = [{
@@ -41,11 +51,19 @@ def generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_ti
41
  seed=seed
42
  )
43
 
 
 
 
44
  # Rearrange audio batch to a single sequence
45
  output = rearrange(output, "b d n -> d (b n)")
46
 
47
- # Peak normalize, clip, convert to int16, and save to temporary file
48
- output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
 
 
 
 
 
49
  torchaudio.save("temp_output.wav", output, sample_rate)
50
 
51
  # Convert to MP3 format using pydub
@@ -74,7 +92,7 @@ def generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_ti
74
 
75
  return full_path
76
 
77
- def audio_generator(prompt, sampler_type, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed):
78
  try:
79
  print("Generating audio with parameters:")
80
  print("Prompt:", prompt)
@@ -85,8 +103,9 @@ def audio_generator(prompt, sampler_type, steps, cfg_scale, sigma_min, sigma_max
85
  print("Sigma Max:", sigma_max)
86
  print("Generation Time:", generation_time)
87
  print("Seed:", seed)
 
88
 
89
- filename = generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, sampler_type)
90
  return gr.Audio(filename), f"Generated: {filename}"
91
  except Exception as e:
92
  return str(e)
@@ -106,16 +125,13 @@ sampler_dropdown = gr.Dropdown(
106
  ],
107
  value="dpmpp-3m-sde"
108
  )
109
- steps_slider = gr.Slider(minimum=0, maximum=200, label="Steps", step=1)
110
- steps_slider.value = 100 # Set the default value here
111
- cfg_scale_slider = gr.Slider(minimum=0, maximum=15, label="CFG Scale", step=0.1)
112
- cfg_scale_slider.value = 7 # Set the default value here
113
  sigma_min_slider = gr.Slider(minimum=0, maximum=50, label="Sigma Min", step=0.1, value=0.3)
114
- sigma_max_slider = gr.Slider(minimum=0, maximum=1000, label="Sigma Max", step=1, value=500)
115
- generation_time_slider = gr.Slider(minimum=0, maximum=47, label="Generation Time (seconds)", step=1)
116
- generation_time_slider.value = 47 # Set the default value here
117
- seed_slider = gr.Slider(minimum=-1, maximum=999999, label="Seed", step=1)
118
- seed_slider.value = 77212 # Set the default value here
119
 
120
  output_textbox = gr.Textbox(label="Output")
121
 
@@ -124,8 +140,8 @@ description = "[Github Repository](https://github.com/Saganaki22/StableAudioWebU
124
 
125
  gr.Interface(
126
  audio_generator,
127
- [prompt_textbox, sampler_dropdown, steps_slider, cfg_scale_slider, sigma_min_slider, sigma_max_slider, generation_time_slider, seed_slider],
128
  [gr.Audio(), output_textbox],
129
  title=title,
130
  description=description
131
- ).launch()
 
10
  import gradio as gr
11
 
12
  # Define the function to generate audio based on a prompt
13
+ def generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, sampler_type, model_half):
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  # Download model
 
19
  sample_size = model_config["sample_size"]
20
 
21
  model = model.to(device)
22
+
23
+ # Print model data type before conversion
24
+ print("Model data type before conversion:", next(model.parameters()).dtype)
25
+
26
+ # Convert model to float16 if model_half is True
27
+ if model_half:
28
+ model = model.to(torch.float16)
29
+
30
+ # Print model data type after conversion
31
+ print("Model data type after conversion:", next(model.parameters()).dtype)
32
 
33
  # Set up text and timing conditioning
34
  conditioning = [{
 
51
  seed=seed
52
  )
53
 
54
+ # Print output data type
55
+ print("Output data type:", output.dtype)
56
+
57
  # Rearrange audio batch to a single sequence
58
  output = rearrange(output, "b d n -> d (b n)")
59
 
60
+ # Peak normalize, clip, and convert to int16 directly if model_half is used
61
+ output = output.div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767)
62
+ if model_half:
63
+ output = output.to(torch.int16).cpu()
64
+ else:
65
+ output = output.to(torch.float32).to(torch.int16).cpu()
66
+
67
  torchaudio.save("temp_output.wav", output, sample_rate)
68
 
69
  # Convert to MP3 format using pydub
 
92
 
93
  return full_path
94
 
95
+ def audio_generator(prompt, sampler_type, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, model_half):
96
  try:
97
  print("Generating audio with parameters:")
98
  print("Prompt:", prompt)
 
103
  print("Sigma Max:", sigma_max)
104
  print("Generation Time:", generation_time)
105
  print("Seed:", seed)
106
+ print("Model Half Precision:", model_half)
107
 
108
+ filename = generate_audio(prompt, steps, cfg_scale, sigma_min, sigma_max, generation_time, seed, sampler_type, model_half)
109
  return gr.Audio(filename), f"Generated: {filename}"
110
  except Exception as e:
111
  return str(e)
 
125
  ],
126
  value="dpmpp-3m-sde"
127
  )
128
+ steps_slider = gr.Slider(minimum=0, maximum=200, label="Steps", step=1, value=100)
129
+ cfg_scale_slider = gr.Slider(minimum=0, maximum=15, label="CFG Scale", step=0.1, value=7)
 
 
130
  sigma_min_slider = gr.Slider(minimum=0, maximum=50, label="Sigma Min", step=0.1, value=0.3)
131
+ sigma_max_slider = gr.Slider(minimum=0, maximum=1000, label="Sigma Max", step=0.1, value=500)
132
+ generation_time_slider = gr.Slider(minimum=0, maximum=47, label="Generation Time (seconds)", step=1, value=47)
133
+ seed_slider = gr.Slider(minimum=-1, maximum=999999, label="Seed", step=1, value=123456)
134
+ model_half_checkbox = gr.Checkbox(label="Low VRAM (float16)", value=False)
 
135
 
136
  output_textbox = gr.Textbox(label="Output")
137
 
 
140
 
141
  gr.Interface(
142
  audio_generator,
143
+ [prompt_textbox, sampler_dropdown, steps_slider, cfg_scale_slider, sigma_min_slider, sigma_max_slider, generation_time_slider, seed_slider, model_half_checkbox],
144
  [gr.Audio(), output_textbox],
145
  title=title,
146
  description=description
147
+ ).launch()