Serhiy Stetskovych
commited on
Commit
β’
4b1870b
0
Parent(s):
iniial commit
Browse files- Dockerfile +21 -0
- README.md +10 -0
- app.py +60 -0
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
|
2 |
+
|
3 |
+
|
4 |
+
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
RUN apt-get update
|
8 |
+
RUN apt-get install -y python3-pip git
|
9 |
+
|
10 |
+
RUN useradd -m -u 1000 user
|
11 |
+
USER user
|
12 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
13 |
+
|
14 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
15 |
+
RUN pip3 install --upgrade pip wheel
|
16 |
+
RUN pip install numpy==1.26.2 torch==2.3.0 packaging
|
17 |
+
RUN pip install -U flash-attn==2.5.8
|
18 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
19 |
+
|
20 |
+
COPY --chown=user app.py /app
|
21 |
+
CMD [ "python3", "app.py" ]
|
README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Valle2 Demo
|
3 |
+
emoji: π
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: gray
|
6 |
+
sdk: docker
|
7 |
+
app_port: 7860
|
8 |
+
pinned: false
|
9 |
+
---
|
10 |
+
|
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
9 |
+
|
10 |
+
# Vocoder
|
11 |
+
vocoder = torch.hub.load(repo_or_dir='ex3ndr/supervoice-vocoder', model='bigvsan')
|
12 |
+
vocoder.to(device)
|
13 |
+
vocoder.eval()
|
14 |
+
|
15 |
+
# GPT Model
|
16 |
+
gpt = torch.hub.load(repo_or_dir='ex3ndr/supervoice-gpt', model='phonemizer')
|
17 |
+
gpt.to(device)
|
18 |
+
gpt.eval()
|
19 |
+
|
20 |
+
# Main Model
|
21 |
+
model = torch.hub.load(repo_or_dir='ex3ndr/supervoice-voicebox', model='phonemizer', gpt=gpt, vocoder=vocoder)
|
22 |
+
model.to(device)
|
23 |
+
model.eval()
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
description = f'''
|
29 |
+
Voicebox demo
|
30 |
+
'''
|
31 |
+
|
32 |
+
def synthesise(text, voice):
|
33 |
+
output = model.synthesize(text, voice = voice, steps = 8, alpha = 0.1)
|
34 |
+
waveform = output['wav']
|
35 |
+
return (24000, waveform.numpy())
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
i = gr.Interface(
|
39 |
+
fn=synthesise,
|
40 |
+
description=description,
|
41 |
+
inputs=[
|
42 |
+
gr.Text(label='Text:', lines=5, max_lines=10),
|
43 |
+
gr.Dropdown(label="voice", choices=("voice_1", "voice_2"), value="voice_1"),
|
44 |
+
],
|
45 |
+
outputs=[
|
46 |
+
gr.Audio(
|
47 |
+
label="Audio:",
|
48 |
+
autoplay=False,
|
49 |
+
streaming=False,
|
50 |
+
type="numpy",
|
51 |
+
),
|
52 |
+
|
53 |
+
],
|
54 |
+
allow_flagging ='never',
|
55 |
+
cache_examples=True,
|
56 |
+
title='Something',
|
57 |
+
examples=[ ],
|
58 |
+
)
|
59 |
+
i.queue(max_size=20, default_concurrency_limit=4)
|
60 |
+
i.launch(share=False, server_name="0.0.0.0")
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.3.0
|
2 |
+
gradio
|
3 |
+
torchaudio
|
4 |
+
vocos
|
5 |
+
encodec
|
6 |
+
sentencepiece
|
7 |
+
xformers
|
8 |
+
flash-attn
|