darylalim commited on
Commit
6bd4009
1 Parent(s): 542dc70

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +95 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from parler_tts import ParlerTTSForConditionalGeneration
3
+ from transformers import AutoTokenizer, AutoFeatureExtractor
4
+ import gradio as gr
5
+ import spaces
6
+
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+
9
+ model_name = "parler-tts/parler_tts_mini_v0.1"
10
+
11
+ model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+
14
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
15
+
16
+ sr = feature_extractor.sampling_rate
17
+
18
+ examples = [
19
+ [
20
+ "Hey, how are you doing today?",
21
+ "A female speaker with a slightly high-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
22
+ ],
23
+ [
24
+ "The life of the land is perpetuated in righteousness.",
25
+ "A male speaker with a low-pitched voice delivers his words at a slightly slow pace and a dramatic tone, in a very spacious environment, accompanied by noticeable background noise."
26
+ ]
27
+ ]
28
+
29
+ @spaces.GPU
30
+ def generate_speech(text, description):
31
+ """
32
+ Generate speech with a text prompt.
33
+ """
34
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
35
+ prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
36
+
37
+ generation = model.generate(
38
+ input_ids=input_ids,
39
+ prompt_input_ids=prompt_input_ids,
40
+ do_sample=True,
41
+ temperature=1.0
42
+ )
43
+ audio_arr = generation.cpu().numpy().squeeze()
44
+
45
+ return sr, audio_arr
46
+
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# Parler-TTS Mini")
49
+ gr.Markdown(
50
+ """
51
+ Tips:
52
+ - Include term "very clear audio" and/or "very noisy audio"
53
+ - Use punctuation for prosody
54
+ - Control gender, speaking rate, pitch, reverberation in prompt
55
+ """
56
+ )
57
+ with gr.Row():
58
+ with gr.Column():
59
+ input_text = gr.Textbox(
60
+ label="Input Text",
61
+ lines=2,
62
+ elem_id="input_text"
63
+ )
64
+ description = gr.Textbox(
65
+ label="Description",
66
+ lines=2,
67
+ elem_id="input_description"
68
+ )
69
+ run_button = gr.Button("Generate Audio", variant="primary")
70
+ with gr.Column():
71
+ audio_out = gr.Audio(
72
+ label="Parler-TTS generation",
73
+ type="numpy",
74
+ elem_id="audio_out"
75
+ )
76
+
77
+ inputs = [input_text, description]
78
+ outputs = [audio_out]
79
+ gr.Examples(
80
+ examples=examples,
81
+ fn=generate_speech,
82
+ inputs=inputs,
83
+ outputs=outputs,
84
+ cache_examples=True
85
+ )
86
+ run_button.click(
87
+ fn=generate_speech,
88
+ inputs=inputs,
89
+ outputs=outputs,
90
+ queue=True
91
+ )
92
+
93
+ demo.queue()
94
+
95
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/parler-tts.git
2
+ gradio
3
+ spaces