TenzinGayche commited on
Commit
c95b8ef
β€’
1 Parent(s): 1c36eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -24
app.py CHANGED
@@ -5,33 +5,18 @@ import torch
5
  import pyewts
6
  import noisereduce as nr
7
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
8
- def remove_repeated_words(text):
9
- # Tokenize the input text into words
10
- words = text.split()
11
 
12
- # Create a dictionary to count word occurrences
13
- word_count = {}
14
-
15
- # Create a list to store the final words
16
- new_words = []
17
-
18
- for word in words:
19
- # Check if the word is in the dictionary
20
- if word in word_count:
21
- # If it has occurred once before, add it to the list with a count of 2
22
- if word_count[word] == 1:
23
- new_words.append(word)
24
- word_count[word] = 2
25
- else:
26
- # If it has not occurred before, add it to the dictionary with a count of 1
27
- word_count[word] = 1
28
- new_words.append(word)
29
-
30
- # Join the modified words back into a string
31
- result = ' '.join(new_words)
32
  return result
33
 
34
-
35
  converter = pyewts.pyewts()
36
  checkpoint = "TenzinGayche/TTS_run3_ep20_174k_b"
37
  processor = SpeechT5Processor.from_pretrained(checkpoint)
@@ -65,6 +50,7 @@ def predict(text, speaker):
65
  return (16000, np.zeros(0).astype(np.int16))
66
  text = converter.toWylie(text)
67
  text=cleanup_text(text)
 
68
  inputs = processor(text=text, return_tensors="pt")
69
  # limit input length
70
  input_ids = inputs["input_ids"]
 
5
  import pyewts
6
  import noisereduce as nr
7
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
8
+ from num2tib.core import convert
9
+ from num2tib.core import convert2text
10
+ import re
11
 
12
+ def replace_numbers_with_convert(sentence, wylie=True):
13
+ pattern = r'\d+'
14
+ def replace(match):
15
+ return convert(match.group(), wylie)
16
+ result = re.sub(pattern, replace, sentence)
17
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  return result
19
 
 
20
  converter = pyewts.pyewts()
21
  checkpoint = "TenzinGayche/TTS_run3_ep20_174k_b"
22
  processor = SpeechT5Processor.from_pretrained(checkpoint)
 
50
  return (16000, np.zeros(0).astype(np.int16))
51
  text = converter.toWylie(text)
52
  text=cleanup_text(text)
53
+ text=replace_numbers_with_convert(text)
54
  inputs = processor(text=text, return_tensors="pt")
55
  # limit input length
56
  input_ids = inputs["input_ids"]