Spaces:
Sleeping
Sleeping
| import torch | |
| import re | |
| import gradio as gr | |
| import soundfile as sf | |
| import numpy as np | |
| from transformers import SpeechT5HifiGan | |
| from IPython.display import Audio | |
| from transformers import SpeechT5ForTextToSpeech | |
| from transformers import SpeechT5Processor | |
| # helper function | |
| number_words = { | |
| 0: "zero", 1: "one", 2: "two", 3: "three", 4: "four", 5: "five", 6: "six", 7: "seven", 8: "eight", 9: "nine", | |
| 10: "ten", 11: "eleven", 12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen", 16: "sixteen", 17: "seventeen", | |
| 18: "eighteen", 19: "nineteen", 20: "twenty", 30: "thirty", 40: "forty", 50: "fifty", 60: "sixty", 70: "seventy", | |
| 80: "eighty", 90: "ninety", 100: "hundred", 1000: "thousand" | |
| } | |
| replacements = [ | |
| ("β", '"'), | |
| ("β", '"'), | |
| ("β", ","), | |
| ("_", " "), | |
| ("\xa0", " "), | |
| ("\n", " "), | |
| ("$","dollar"), | |
| ("%","percent"), | |
| ("&","and"), | |
| ("*","star"), | |
| ("+","plus"), | |
| ("β","-") | |
| ] | |
| def number_to_words(number): | |
| if number < 20: | |
| return number_words[number] | |
| elif number < 100: | |
| tens, unit = divmod(number, 10) | |
| return number_words[tens * 10] + (" " + number_words[unit] if unit else "") | |
| elif number < 1000: | |
| hundreds, remainder = divmod(number, 100) | |
| return (number_words[hundreds] + " hundred" if hundreds > 1 else "hundred") + (" " + number_to_words(remainder) if remainder else "") | |
| elif number < 1000000: | |
| thousands, remainder = divmod(number, 1000) | |
| return (number_to_words(thousands) + " thousand" if thousands > 1 else "thousand") + (" " + number_to_words(remainder) if remainder else "") | |
| elif number < 1000000000: | |
| millions, remainder = divmod(number, 1000000) | |
| return number_to_words(millions) + " million" + (" " + number_to_words(remainder) if remainder else "") | |
| elif number < 1000000000000: | |
| billions, remainder = divmod(number, 1000000000) | |
| return number_to_words(billions) + " billion" + (" " + number_to_words(remainder) if remainder else "") | |
| else: | |
| return str(number) | |
| def replace_numbers_with_words(text): | |
| def replace(match): | |
| number = int(match.group()) | |
| return number_to_words(number) | |
| # Find the numbers and change with words. | |
| result = re.sub(r'\b\d+\b', replace, text) | |
| return result | |
| def cleanup_text(text): | |
| for src, dst in replacements: | |
| text = text.replace(src, dst) | |
| return text | |
| def normalize_text(text): | |
| # Convert to lowercase | |
| text = text.lower() | |
| # Remove punctuation (except apostrophes) | |
| text = re.sub(r'[^\w\s\']', '', text) | |
| # Remove extra whitespace | |
| text = ' '.join(text.split()) | |
| return text | |
| model = SpeechT5ForTextToSpeech.from_pretrained( | |
| "Yassmen/speecht5_finetuned_english_tehnical" | |
| ) | |
| checkpoint = "microsoft/speecht5_tts" | |
| processor = SpeechT5Processor.from_pretrained(checkpoint) | |
| def generate_wav_file(text): | |
| try: | |
| converted_text = replace_numbers_with_words(text) | |
| cleaned_text = cleanup_text(converted_text) | |
| final_text = normalize_text(cleaned_text) | |
| inputs = processor(text=final_text, return_tensors="pt") | |
| speaker_embeddings = torch.tensor(np.load('speaker_embedding.npy')) | |
| vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) | |
| # Convert the speech to a WAV file | |
| output_file = "output.wav" | |
| sf.write(output_file, speech.detach().cpu().numpy(), 16000) | |
| return output_file # Return the file path for download | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| return None | |
| iface = gr.Interface( | |
| fn=generate_wav_file, | |
| inputs=gr.Textbox(lines=3, label="Enter text to convert to speech"), | |
| outputs= gr.Audio(type="filepath", label="Generated Audio"), | |
| title="Text-to-Speech Technical EN" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |