Spaces:
Sleeping
Sleeping
File size: 3,346 Bytes
9f9087b b8b580d f5001e3 9f9087b b8b580d 9f9087b b8b580d 9f9087b b8b580d c578c49 b8b580d f5001e3 b8b580d c578c49 b8b580d 9f9087b c578c49 9f9087b f5001e3 b8b580d 9f9087b c578c49 b8b580d c578c49 b8b580d f5001e3 b8b580d f5001e3 9f9087b b8b580d 9f9087b f5001e3 9f9087b b8b580d 9f9087b f5001e3 b8b580d 9f9087b b8b580d f5001e3 9f9087b b8b580d 9f9087b b8b580d f5001e3 b8b580d 9f9087b f5001e3 9f9087b 8587500 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
import torch
import numpy as np
from scipy.io.wavfile import write
import librosa
# تحميل النماذج والمُعالج
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
LANGUAGES = {"English": "en", "French": "fr", "Spanish": "es"}
def generate_speaker_embedding(speaker_type):
"""توليد تعبيرات الصوت بناءً على نوع الصوت (ذكر أو أنثى)."""
base_embedding = torch.randn(1, 512)
if speaker_type == "Female":
return base_embedding * 0.8
return base_embedding * 1.2
def adjust_speed(audio, speed):
"""تعديل سرعة الصوت باستخدام مكتبة librosa."""
return librosa.effects.time_stretch(audio, speed)
def text_to_speech(text, language, speaker_type, speed):
"""تحويل النص إلى صوت."""
try:
# توليد تعبيرات الصوت بناءً على نوع المتحدث
speaker_embeddings = generate_speaker_embedding(speaker_type)
# معالجة النص
inputs = processor(text=text, return_tensors="pt")
# توليد الصوت
with torch.no_grad(): # تحسين إدارة الموارد
generated_speech = model.generate_speech(
inputs["input_ids"],
speaker_embeddings,
vocoder=vocoder
).cpu().numpy()
# ضبط سرعة الصوت
adjusted_speech = adjust_speed(generated_speech, speed)
# حفظ الصوت كملف WAV
output_file = "output.wav"
write(output_file, 24000, (adjusted_speech * 32767).astype(np.int16))
return output_file
except Exception as e:
print(f"Error: {e}")
return None
# إنشاء واجهة Gradio
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# 🎙️ Multilingual Text-to-Speech")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Enter Text", placeholder="Type your text here...", lines=5)
language = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language")
speaker = gr.Radio(choices=["Male", "Female"], value="Male", label="Speaker Gender")
speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed")
submit_btn = gr.Button("Generate Speech")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech", type="filepath")
submit_btn.click(
fn=text_to_speech,
inputs=[text_input, language, speaker, speed],
outputs=audio_output
)
gr.Markdown("""
### Features:
- Multilingual support (English, French, Spanish)
- Male and Female voice options
- Adjustable speech speed
- High-quality, natural-sounding voices
""")
return demo
# تشغيل التطبيق
demo = create_interface()
demo.launch(server_name="0.0.0.0")
|