File size: 2,550 Bytes
b6e8173
 
 
 
 
 
 
 
 
 
b8e666a
b6e8173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756f5e3
b6e8173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272640c
b6e8173
 
 
 
 
 
 
 
 
 
 
 
 
 
2525fcd
 
b6e8173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import outetts
from outetts.models.config import GenerationConfig
from huggingface_hub import hf_hub_download, login
import os
import numpy as np

login(token=os.environ.get("HF_TOKEN"))

model_path = hf_hub_download(
    repo_id="KandirResearch/DarijaTTS-v0.1-500M",
    filename="unsloth.Q8_0.gguf",
)
model_config = outetts.GGUFModelConfig_v2(
    model_path=model_path,
    tokenizer_path="Lyte/DarijaTTS",
    #verbose=False
)
interface = outetts.InterfaceGGUF(model_version="0.3", cfg=model_config)

# TTS function
def tts(text, temperature, repetition_penalty):
    try:
        gen_cfg = GenerationConfig(
            text=text,
            temperature=temperature,
            repetition_penalty=repetition_penalty,
            max_length=4096,
            speaker=None
        )
        
        output = interface.generate(config=gen_cfg)
        
        output_path = "output.wav"
        output.save(output_path)
        
        return output_path
    except Exception as e:
        raise gr.Error(f"Error generating audio: {str(e)}")

with gr.Blocks() as demo:
    gr.Markdown("# Darija TTS Demo (alpha)")
    gr.Markdown("### Note: this is an early alpha which still needs training. for now temperature 0.3 seems to work best, but feel free to play with it.")
    
    with gr.Row():
        text_input = gr.Textbox(
            label="Enter text for TTS", 
            value="السلام كيداير لاباس عليك؟"
        )
    
    output_audio = gr.Audio(label="Generated Audio", type="filepath")
    generate_btn = gr.Button("Generate Audio")

    
    with gr.Row():
        temperature = gr.Slider(
            minimum=0.01, 
            maximum=1.0, 
            value=0.3, 
            step=0.01, 
            label="Temperature"
        )
        
        repetition_penalty = gr.Slider(
            minimum=0.8, 
            maximum=1.5, 
            value=1.1, 
            step=0.05, 
            label="Repetition Penalty"
        )
    
    with gr.Row():
        gr.Markdown("### Examples:")
    
    with gr.Row():
        gr.Examples(
        [
            ["رمضان مبارك سعيد"],
            ["اشنو بان ليك فهادشي مزيان؟"],
            ["السلام كيداير لاباس عليك؟"],
            ["واش كاين شي جديد اليوم؟"]
        ],
        inputs=[text_input])

    generate_btn.click(
        fn=tts,
        inputs=[text_input, temperature, repetition_penalty],
        outputs=output_audio
    )

demo.launch(debug=True)