Spaces:

vietvoice
/

truyen-ngontinh

Running

tu commited on 4 days ago

Commit

c17bcc7

•

1 Parent(s): 9a436eb

update model + change preprocess

Files changed (2) hide show

app.py CHANGED Viewed

@@ -10,11 +10,29 @@ from text import text_to_sequence
 from scipy.io.wavfile import write
 from preprocess import preprocess
 import onnxruntime
 def get_text(texts, hps):
     text_norm_list = []
-    for text in texts.split(","):
           text = preprocess(text)
           chunk_strings = []
           chunk_len = 30
           for i in range(0, len(text.split()), chunk_len):
@@ -29,8 +47,8 @@ def get_text(texts, hps):
 def tts(text):
     model_path = "model.onnx"
-    config_path = "config.json"
-    sid = 4
     output_wav_path = "output.wav"
     sess_options = onnxruntime.SessionOptions()
     model = onnxruntime.InferenceSession(str(model_path), sess_options=sess_options, providers=["CPUExecutionProvider"])
@@ -43,7 +61,7 @@ def tts(text):
     for stn_tst in stn_tst_list:
         text = np.expand_dims(np.array(stn_tst, dtype=np.int64), 0)
         text_lengths = np.array([text.shape[1]], dtype=np.int64)
-        scales = np.array([0.667, 1.1, 0.85], dtype=np.float32)
         sid = np.array([int(sid)]) if sid is not None else None
         audio = model.run(

 from scipy.io.wavfile import write
 from preprocess import preprocess
 import onnxruntime
+import re
 def get_text(texts, hps):
     text_norm_list = []
+    texts = texts.replace("“", "")
+    texts = texts.replace("”", "")
+    texts = texts.replace("…", ".")
+    texts = re.split('[!:;\.\n]', texts)
+    tmp = []
+    for t in texts:
+        t = t.rstrip().strip()
+        if len(t) > 0:
+            tmp.append(t)
+    texts = []
+    for t in tmp:
+        texts.append(t)
+        texts.append(".")
+        #texts.append(".")
+    #print(texts)
+    for text in texts:
           text = preprocess(text)
+          print(text)
           chunk_strings = []
           chunk_len = 30
           for i in range(0, len(text.split()), chunk_len):
 def tts(text):
     model_path = "model.onnx"
+    config_path = "configs/thu_hue.json"
+    sid = 9
     output_wav_path = "output.wav"
     sess_options = onnxruntime.SessionOptions()
     model = onnxruntime.InferenceSession(str(model_path), sess_options=sess_options, providers=["CPUExecutionProvider"])
     for stn_tst in stn_tst_list:
         text = np.expand_dims(np.array(stn_tst, dtype=np.int64), 0)
         text_lengths = np.array([text.shape[1]], dtype=np.int64)
+        scales = np.array([0.667, 1.08, 0.8], dtype=np.float32)
         sid = np.array([int(sid)]) if sid is not None else None
         audio = model.run(

model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a0c9c2e2ce10af648bf6b0718feac0d23cb713da66452168674d467c68f1c52
-size 123389245

 version https://git-lfs.github.com/spec/v1
+oid sha256:c781e3fdb725651e0074d4549e272fe22d4cc71872b098bbc62d6bb995b567c4
+size 130096954