Johann Diedrick commited on
Commit
2de6199
β€’
1 Parent(s): 44831f3

init commit

Browse files
Files changed (3) hide show
  1. README.md +6 -5
  2. app.py +44 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Audioldm2 Api
3
- emoji: 🐨
4
- colorFrom: purple
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.3.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: AudioLDM2 API
3
+ emoji: πŸš€
4
+ colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.41.2
8
  app_file: app.py
9
  pinned: false
10
+ license: openrail
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from diffusers import AudioLDM2Pipeline
4
+
5
+ # make Space compatible with CPU duplicates
6
+ if torch.cuda.is_available():
7
+ device = "cuda"
8
+ torch_dtype = torch.float16
9
+ else:
10
+ device = "cpu"
11
+ torch_dtype = torch.float32
12
+
13
+ # load the diffusers pipeline
14
+ repo_id = "cvssp/audioldm2"
15
+ pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
16
+ # pipe.unet = torch.compile(pipe.unet)
17
+
18
+ # set the generator for reproducibility
19
+ generator = torch.Generator(device)
20
+
21
+
22
+ def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates):
23
+ if text is None:
24
+ raise gr.Error("Please provide a text input.")
25
+
26
+ waveforms = pipe(
27
+ text,
28
+ audio_length_in_s=duration,
29
+ guidance_scale=guidance_scale,
30
+ num_inference_steps=200,
31
+ negative_prompt=negative_prompt,
32
+ num_waveforms_per_prompt=n_candidates if n_candidates else 1,
33
+ generator=generator.manual_seed(int(random_seed)),
34
+ )["audios"]
35
+
36
+ return gr.make_waveform((16000, waveforms[0]), bg_image="bg.png")
37
+
38
+ gradio_interface = gr.Interface(
39
+ fn = my_inference_function,
40
+ inputs = "text",
41
+ outputs = "audio",
42
+ )
43
+
44
+ gradio_interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ diffusers
2
+ torch
3
+ gradio