TranGiaBao commited on
Commit
77852b1
0 Parent(s):

Duplicate from trangiabao17032000/final_tts

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +148 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Final Tts
3
+ emoji: 📈
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.40.1
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: trangiabao17032000/final_tts
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import soundfile as sf
3
+ import gradio as gr
4
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
5
+ from transformers import SpeechT5HifiGan
6
+ from datasets import load_dataset
7
+ from IPython.display import Audio
8
+ import numpy as np
9
+ model_name = "trangiabao17032000/final_tts"
10
+
11
+ #processor
12
+ processor = SpeechT5Processor.from_pretrained(model_name)
13
+ tokenizer = processor.tokenizer
14
+
15
+ #model
16
+ model = SpeechT5ForTextToSpeech.from_pretrained(model_name)
17
+ model.resize_token_embeddings(len(tokenizer))
18
+ model.eval()
19
+
20
+ #vocoder
21
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
22
+ vocoder.eval()
23
+
24
+ #speaker embedding
25
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
26
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
27
+
28
+ #cleaner text
29
+ def convert_string_to_numbers(input_str):
30
+ try:
31
+ # Replace comma with period and attempt to convert the string to a float
32
+ num = float(input_str.replace(',', '.'))
33
+ if num.is_integer():
34
+ return int(num)
35
+ return num
36
+ except ValueError:
37
+ # If it's not a valid float, check if it's an integer or a negative integer
38
+ if input_str.replace('.', '', 1).isdigit(): # Remove one dot for checking integers
39
+ return int(input_str.replace(',', ''))
40
+ elif input_str[0] == '-' and input_str[1:].replace('.', '', 1).isdigit():
41
+ return int(input_str.replace(',', ''))
42
+ else:
43
+ raise ValueError("Invalid input: couldn't convert to a number")
44
+
45
+ def number_to_vietnamese_words(number):
46
+ ones = ['', 'một', 'hai', 'ba', 'bốn', 'năm', 'sáu', 'bảy', 'tám', 'chín']
47
+ tens = ['', 'mười', 'hai mươi', 'ba mươi', 'bốn mươi', 'năm mươi', 'sáu mươi', 'bảy mươi', 'tám mươi', 'chín mươi']
48
+ hundreds = ['', 'một trăm', 'Hai trăm', 'ba trăm', 'bốn trăm', 'năm trăm', 'sáu trăm', 'bảy trăm', 'tám trăm', 'chín trăm']
49
+ thousands = [''] + ['nghìn', 'triệu', 'tỷ']
50
+
51
+ def words(n):
52
+
53
+ if n < 10:
54
+ return ones[n]
55
+ elif n < 20:
56
+ return tens[n//10] + " " + words(n % 10)
57
+ elif n < 100:
58
+ return tens[n // 10] + ('' if n % 10 == 0 else ' ' + ones[n % 10])
59
+ else:
60
+ return hundreds[n // 100] + ('' if n % 100 == 0 else (' lẻ ' if n % 100 < 10 else ' ') + words(n % 100))
61
+
62
+ if number == 0:
63
+ return 'không'
64
+
65
+ integer_part = int(number)
66
+ decimal_part = round((number - integer_part) * 100) # Round the decimal part to 2 decimal places
67
+
68
+ result = []
69
+ i = 0
70
+ while integer_part > 0:
71
+ if integer_part % 1000 != 0:
72
+ result.append(words(integer_part % 1000) + (' ' + thousands[i] if i > 0 else ''))
73
+ integer_part //= 1000
74
+ i += 1
75
+
76
+ result_integer = ' '.join(result[::-1])
77
+
78
+ result_decimal = ''
79
+ if decimal_part > 0:
80
+ result_decimal = ' phẩy'
81
+ for digit in str(decimal_part):
82
+ result_decimal += ' ' + ones[int(digit)]
83
+
84
+ return result_integer + result_decimal
85
+
86
+ def is_num(string):
87
+ try:
88
+ float(string)
89
+ except ValueError:
90
+ return False
91
+ return True
92
+
93
+ def normalize(input):
94
+ input = input.lower()
95
+ newstr = map(lambda x: number_to_vietnamese_words(convert_string_to_numbers(x)) if is_num(x) else x, input.split(" "))
96
+ return ' '.join(newstr)
97
+
98
+ def split_paragraph_into_sentences(paragraph, max_chars = 300):
99
+ sentences = []
100
+ words = paragraph.split()
101
+ current_sentence = words[0]
102
+
103
+ for word in words[1:]:
104
+ if len(current_sentence) + len(word) + 1 <= max_chars:
105
+ current_sentence += ' ' + word
106
+ else:
107
+ sentences.append(current_sentence)
108
+ current_sentence = word
109
+
110
+ if current_sentence:
111
+ sentences.append(current_sentence)
112
+
113
+ return sentences
114
+
115
+ # generator speech
116
+ def text_to_speech(paragraph):
117
+ try:
118
+ paragraph = normalize(paragraph)
119
+ except:
120
+ paragraph = paragraph.lower()
121
+ list_sentence = split_paragraph_into_sentences(paragraph)
122
+ final_speech = np.array([])
123
+
124
+ for sentence in list_sentence:
125
+
126
+ inputs = processor(text=sentence, return_tensors="pt")
127
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings,vocoder=vocoder)
128
+ final_speech = np.concatenate((final_speech, speech.numpy()))
129
+
130
+ sf.write("tts_example.wav", final_speech, samplerate=16000)
131
+ return "tts_example.wav"
132
+
133
+ tts_examples = [
134
+ "xin chào mọi người, đây là sản phẩm thử nghiệm cho tiếng việt.",
135
+ "Mình sẽ tổ chức sinh nhật vào thứ 6 ngày 7 tháng này",
136
+ ]
137
+
138
+ #gradio interface
139
+ iface = gr.Interface(
140
+ fn=text_to_speech,
141
+ inputs=gr.Textbox(),
142
+ outputs=gr.Audio(),
143
+ title="Text-to-Speech",
144
+ examples=tts_examples,
145
+ description="Give me something to say!",
146
+ )
147
+
148
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ soundfile
4
+ datasets
5
+ IPython
6
+ sentencepiece