Karayakar commited on
Commit
76c6e92
·
verified ·
1 Parent(s): 2e92c1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -130
app.py CHANGED
@@ -1,43 +1,9 @@
1
- import spaces
2
- import outetts
3
- from outetts.models.config import GenerationConfig
4
- import torch
5
- import json
6
  import gradio as gr
7
- from datetime import datetime
8
- import os
9
- from pathlib import Path
10
- from scipy.io import wavfile
11
- # Check if CUDA is available
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- # Set Hugging Face cache directory to the 'models' folder
14
- os.environ["HF_HOME"] = os.path.join(os.getcwd(), "models", "huggingface")
15
- os.environ["TRANSFORMERS_CACHE"] = os.path.join(os.getcwd(), "models", "huggingface", "transformers")
16
- os.environ["HF_DATASETS_CACHE"] = os.path.join(os.getcwd(), "models", "huggingface", "datasets")
17
- TOKENIZERS_PARALLELISM=False
18
- class OuteTTSGUI:
19
- def __init__(self):
20
- self.interface = None
21
- self.current_speaker = None
22
- self.history = []
23
- self.models_dir = "models"
24
- self.custom_speakers_dir = "custom_speakers"
25
- os.makedirs(self.custom_speakers_dir, exist_ok=True)
26
- os.makedirs(os.path.join(self.models_dir, "whisper"), exist_ok=True) # Create whisper directory
27
- self.model=None;
28
- self.examples= [
29
- ["Bu sabah olanları hatırlayınca kendimi tutamıyorum."],
30
- ["Patron yanlış dosyayı gönderince <laugh> hepimiz şok olduk."],
31
- ["Sen ciddi misin? Gerçekten öyle mi dedi?"],
32
- ["Sahneye çıkmadan önce pantolonun ters olduğunu fark etti."],
33
- ["Mikrofona yanlışlıkla nefesini üfleyince"],
34
- ["Hayat bazen böyle zor işte, dedi."],
35
- ["Sınıfta sessizlik varken aniden gelen, dikkat dağıttı."],
36
- ["Özür dilerim, biraz rahatsızım bugün."] ]
37
-
38
 
39
- def loadModel(self):
40
- model_config = outetts.ModelConfig(
 
41
  model_path="Karayakar/Oute-TTS",
42
  tokenizer_path="Karayakar/Oute-TTS",
43
  interface_version=outetts.InterfaceVersion.V2,
@@ -47,100 +13,43 @@ class OuteTTSGUI:
47
  dtype=torch.bfloat16,
48
 
49
  )
50
- self.interface = outetts.Interface(config=model_config)
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- def get_history(self):
53
- if not self.history:
54
- return "No generation history available."
55
-
56
- history_text = "Generation History:\n\n"
57
- for item in reversed(self.history):
58
- history_text += f"Time: {item['timestamp']}\n"
59
- history_text += f"Speaker: {item['speaker']}\n"
60
- history_text += f"Text: {item['text']}\n"
61
- history_text += f"File: {item['path']}\n"
62
- history_text += "-" * 50 + "\n"
63
-
64
- return history_text
65
 
 
 
 
 
 
66
 
67
- @spaces.GPU()
68
- def generate_speech(self,text, progress=gr.Progress()):
69
- progress(0.1, "Processing text...")
70
- gen_cfg = GenerationConfig( text=text, speaker= None )
71
- timestamp_str = str(int(datetime.now().timestamp()))
72
- file_name=f"{timestamp_str}.wav"
73
- output=self.interface.generate(config=gen_cfg)
74
- # Save the generated audio
75
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
76
- output_path = os.path.join("generated_audio", f"generated_{timestamp}.wav")
77
- output.save(output_path)
78
-
79
- # Add to history
80
- self.history.append({
81
- "timestamp": timestamp,
82
- "text": text,
83
- "speaker": "Default",
84
- "path": output_path
85
- })
86
- return output_path, "Generation successful!"
87
-
88
 
89
- def create_gui():
90
- gui = OuteTTSGUI()
91
- gui.loadModel()
92
- with gr.Blocks(title="Oute-TTS") as demo:
93
- gr.Markdown(f"""
94
- # 🎵 Oute-TTS Turkish (TTS) - [Karay Akar](https://www.linkedin.com/in/karayakar/)
95
- Aşağıya metninizi girin ve Oute TTS modeliyle doğal sesli konuşmaya dönüştürülmesini dinleyin.
96
- """)
97
- with gr.Row():
98
- with gr.Column(scale=3):
99
- text_input = gr.Textbox(
100
- label="Cevirilecek metin",
101
- placeholder="Metin girin...",
102
- lines=5
103
- )
104
-
105
- with gr.Row():
106
- submit_btn = gr.Button("Generate Speech", variant="primary")
107
- clear_btn = gr.Button("Clear")
108
-
109
- with gr.Column(scale=2):
110
- audio_output = gr.Audio(label="Generated Audio")
111
- #generation_output = gr.Textbox(label="Generation Status", lines=2)
112
-
113
- # Set up examples
114
- gr.Examples(
115
- examples=gui.examples,
116
- inputs=[text_input],
117
- outputs=audio_output,
118
- fn=gui.generate_speech,
119
- cache_examples=True,
120
- )
121
-
122
- # Set up event handlers
123
- submit_btn.click(
124
- fn=gui.generate_speech,
125
- inputs=[text_input],
126
- outputs=[audio_output]
127
- )
128
-
129
- clear_btn.click(
130
- fn=lambda: (None, None),
131
- inputs=[],
132
- outputs=[text_input, audio_output]
133
- )
134
- return demo
135
- # Launch the app
136
- if __name__ == "__main__":
137
- #demo.queue().launch(share=False, ssr_mode=False)
138
- # Try to import get_compatible_dtype outside the function for default value setting
139
- try:
140
- from outetts.models.config import get_compatible_dtype
141
- except ImportError:
142
- get_compatible_dtype = None # Define as None if import fails
143
 
144
- demo = create_gui()
145
- #demo.launch(share=False)
146
- demo.queue().launch(share=False, ssr_mode=False)
 
 
 
 
 
 
1
  import gradio as gr
2
+ from outetts.v2.interface import InterfaceHF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ #interface = InterfaceHF("Karayakar/Oute-TTS")
5
+
6
+ model_config = outetts.ModelConfig(
7
  model_path="Karayakar/Oute-TTS",
8
  tokenizer_path="Karayakar/Oute-TTS",
9
  interface_version=outetts.InterfaceVersion.V2,
 
13
  dtype=torch.bfloat16,
14
 
15
  )
16
+ interface = outetts.Interface(config=model_config)
17
+
18
+ def generate_tts(text, temperature, repetition_penalty, reference_audio, reference_text):
19
+
20
+
21
+
22
+ output = interface.generate(
23
+ text=text,
24
+ #speaker=speaker,
25
+ #temperature=temperature,
26
+ #repetition_penalty=repetition_penalty
27
+ )
28
+ output.save("output.wav")
29
+ return "output.wav"
30
 
31
+ with gr.Blocks() as demo:
32
+ gr.Markdown("# OuteTTS-0.3-500M Text-to-Speech Demo")
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ with gr.Row():
35
+ with gr.Column():
36
+ text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
37
+ temperature = gr.Slider(0.1, 1.0, value=0.1, label="Temperature")
38
+ repetition_penalty = gr.Slider(0.5, 2.0, value=1.1, label="Repetition Penalty")
39
 
40
+ gr.Markdown("""
41
+ **Note**: For voice cloning, both a reference audio file and its corresponding transcription must be provided.
42
+ If either the audio file or transcription is missing, the model will generate audio with random characteristics.""")
43
+ reference_audio = gr.Audio(label="Reference Audio (for voice cloning)", type="filepath")
44
+ reference_text = gr.Textbox(label="Reference Transcription Text (matching the audio)", placeholder="Enter reference text here if using voice cloning")
45
+ submit_button = gr.Button("Generate Speech")
46
+ with gr.Column():
47
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ submit_button.click(
50
+ fn=generate_tts,
51
+ inputs=[text_input, temperature, repetition_penalty, reference_audio, reference_text],
52
+ outputs=audio_output
53
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ demo.launch()