Spaces:
Paused
Paused
Commit
•
3678d66
1
Parent(s):
29a572d
Update app.py
Browse files
app.py
CHANGED
@@ -88,6 +88,8 @@ api = HfApi(token=HF_TOKEN)
|
|
88 |
|
89 |
repo_id = "jbilcke-hf/zephyr-xtts"
|
90 |
|
|
|
|
|
91 |
default_system_message = f"""
|
92 |
You're the storyteller, crafting a short tale for young listeners. Please abide by these guidelines:
|
93 |
- Keep your sentences short, concise and easy to understand.
|
@@ -143,6 +145,26 @@ LLAMA_VERBOSE=False
|
|
143 |
print("Running LLM Zephyr")
|
144 |
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
# <|system|>
|
147 |
# You are a friendly chatbot who always responds in the style of a pirate.</s>
|
148 |
# <|user|>
|
@@ -507,7 +529,6 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
|
|
507 |
print("Sentence for speech:", sentence)
|
508 |
|
509 |
try:
|
510 |
-
SENTENCE_SPLIT_LENGTH=350
|
511 |
if len(sentence)<SENTENCE_SPLIT_LENGTH:
|
512 |
# no problem continue on
|
513 |
sentence_list = [sentence]
|
@@ -515,7 +536,8 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
|
|
515 |
# Until now nltk likely split sentences properly but we need additional
|
516 |
# check for longer sentence and split at last possible position
|
517 |
# Do whatever necessary, first break at hypens then spaces and then even split very long words
|
518 |
-
sentence_list=textwrap.wrap(sentence,SENTENCE_SPLIT_LENGTH)
|
|
|
519 |
print("SPLITTED LONG SENTENCE:",sentence_list)
|
520 |
|
521 |
for sentence in sentence_list:
|
|
|
88 |
|
89 |
repo_id = "jbilcke-hf/zephyr-xtts"
|
90 |
|
91 |
+
SENTENCE_SPLIT_LENGTH=250
|
92 |
+
|
93 |
default_system_message = f"""
|
94 |
You're the storyteller, crafting a short tale for young listeners. Please abide by these guidelines:
|
95 |
- Keep your sentences short, concise and easy to understand.
|
|
|
145 |
print("Running LLM Zephyr")
|
146 |
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
147 |
|
148 |
+
def split_sentences(text, max_len):
|
149 |
+
# Apply custom rules to enforce sentence breaks with double punctuation
|
150 |
+
text = re.sub(r"(\s*\.{2})\s*", r".\1 ", text) # for '..'
|
151 |
+
text = re.sub(r"(\s*\!{2})\s*", r"!\1 ", text) # for '!!'
|
152 |
+
|
153 |
+
# Use NLTK to split into sentences
|
154 |
+
sentences = nltk.sent_tokenize(text)
|
155 |
+
|
156 |
+
# Then check if each sentence is greater than max_len, if so, use textwrap to split it
|
157 |
+
sentence_list = []
|
158 |
+
for sent in sentences:
|
159 |
+
if len(sent) > max_len:
|
160 |
+
wrapped = textwrap.wrap(sent, max_len, break_long_words=True)
|
161 |
+
sentence_list.extend(wrapped)
|
162 |
+
else:
|
163 |
+
sentence_list.append(sent)
|
164 |
+
|
165 |
+
return sentence_list
|
166 |
+
|
167 |
+
|
168 |
# <|system|>
|
169 |
# You are a friendly chatbot who always responds in the style of a pirate.</s>
|
170 |
# <|user|>
|
|
|
529 |
print("Sentence for speech:", sentence)
|
530 |
|
531 |
try:
|
|
|
532 |
if len(sentence)<SENTENCE_SPLIT_LENGTH:
|
533 |
# no problem continue on
|
534 |
sentence_list = [sentence]
|
|
|
536 |
# Until now nltk likely split sentences properly but we need additional
|
537 |
# check for longer sentence and split at last possible position
|
538 |
# Do whatever necessary, first break at hypens then spaces and then even split very long words
|
539 |
+
# sentence_list=textwrap.wrap(sentence,SENTENCE_SPLIT_LENGTH)
|
540 |
+
sentence_list = split_sentences(sentence, SENTENCE_SPLIT_LENGTH)
|
541 |
print("SPLITTED LONG SENTENCE:",sentence_list)
|
542 |
|
543 |
for sentence in sentence_list:
|