UpCoder commited on
Commit
0010293
Β·
verified Β·
1 Parent(s): 01a6ec3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -73
app.py CHANGED
@@ -2,108 +2,86 @@ import gradio as gr
2
  import os
3
  import re
4
  import numpy as np
 
5
  from TTS.utils.synthesizer import Synthesizer
6
  from huggingface_hub import hf_hub_download
7
 
8
- # 1. Maxfiy kalitni olish
9
- hf_token = os.environ.get("HF_TOKEN")
 
10
 
11
- # 2. Modelni maxfiy ombordan yuklab olish
12
- repo_id = "UpCoder/behruz-vits-v3-private"
13
-
14
- try:
15
- print("Model fayllari yuklanmoqda...")
16
- model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
17
- config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
18
- except Exception as e:
19
- print(f"Fayllarni yuklashda xatolik: {e}")
20
-
21
- # 3. Sun'iy intellektni ishga tushirish
22
- print("Sun'iy intellekt ishga tushmoqda...")
23
- synthesizer = Synthesizer(
24
- tts_checkpoint=model_path,
25
- tts_config_path=config_path,
26
- use_cuda=False
27
- )
28
-
29
- # VITS uchun standart chastota (22050 Hz)
30
- SAMPLE_RATE = 22050
31
-
32
- def split_into_sentences(text):
33
- sentences = re.split(r'(?<=[.!?]) +', text.strip())
34
- return [s for s in sentences if s.strip()]
35
 
36
  def synthesize_full_audio(text):
37
- if not text.strip():
38
  return None
39
 
40
- # Xavfsizlik: Server qotib qolmasligi uchun matnni 2000 belgi bilan cheklaymiz
41
- if len(text) > 2000:
42
- text = text[:2000]
43
 
44
- sentences = split_into_sentences(text)
 
 
 
 
 
45
  all_wavs = []
46
-
47
- # Jumlalar orasida tabiiy nafas olish uchun 0.25 soniyalik sukut
48
  silence = np.zeros(int(SAMPLE_RATE * 0.25))
49
 
50
  for i, sentence in enumerate(sentences):
51
  try:
52
- wav = synthesizer.tts(sentence)
53
  all_wavs.append(np.array(wav))
54
-
55
- # Oxirgi jumladan tashqari hammadan keyin sukut qo'shamiz
56
  if i < len(sentences) - 1:
57
  all_wavs.append(silence)
58
-
59
  except Exception as e:
60
- print(f"Jumlani o'qishda xatolik: {sentence}. Xato: {e}")
61
  continue
62
 
63
  if not all_wavs:
64
  return None
65
 
66
- # Barcha audio parchalarni bitta butun faylga birlashtirish
67
  final_wav = np.concatenate(all_wavs)
 
68
  final_wav_int16 = (final_wav * 32767).astype(np.int16)
69
 
 
 
 
 
70
  return (SAMPLE_RATE, final_wav_int16)
71
 
72
- # 4. Professional va O'zbekcha Interfeys (UI) yaratish
73
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", secondary_hue="teal")) as iface:
74
- gr.Markdown(
75
- """
76
- <div style="text-align: center;">
77
- <h1>πŸŽ™οΈ Behruzning Raqamli Ovozli Kloni (V3)</h1>
78
- <p><strong>Mening sun'iy intellekt ovoz generatorimga xush kelibsiz!</strong> Ushbu model o'zimning haqiqiy ovozim asosida neyrotarmoqlar yordamida o'qitildi.</p>
79
- </div>
80
-
81
- πŸ’‘ **Foydali maslahat:** Katta matnlarni (masalan, butun bir xatboshini) bemalol kiritishingiz mumkin! Dastur uni avtomat ravishda jumlalarga bo'lib, xatosiz o'qib beradi va bitta tayyor audio fayl qilib taqdim etadi.
82
- """
83
- )
84
 
85
  with gr.Row():
86
- with gr.Column(scale=2):
87
- text_input = gr.Textbox(
88
- label="O'zbekcha matnni bu yerga kiriting (Maksimum 2000 belgi)",
89
- lines=6,
90
- placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
91
- )
92
- generate_btn = gr.Button("πŸš€ Ovozga Aylantirish", variant="primary")
93
-
94
- with gr.Column(scale=1):
95
- audio_output = gr.Audio(label="🎧 Tayyor Audio Fayl")
96
-
97
- gr.Examples(
98
- examples=[
99
- "Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
100
- "Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
101
- "Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
102
- ],
103
- inputs=text_input,
104
- label="Namuna jumlalar (birini tanlang)"
105
- )
106
-
107
- generate_btn.click(fn=synthesize_full_audio, inputs=text_input, outputs=audio_output)
108
 
109
  iface.launch()
 
2
  import os
3
  import re
4
  import numpy as np
5
+ import torch
6
  from TTS.utils.synthesizer import Synthesizer
7
  from huggingface_hub import hf_hub_download
8
 
9
+ # --- GLOBAL SETTINGS ---
10
+ SAMPLE_RATE = 22050
11
+ _synthesizer = None # We keep this empty until needed
12
 
13
+ def get_synthesizer():
14
+ """Only loads the model when it's actually called. This prevents the '137' boot crash."""
15
+ global _synthesizer
16
+ if _synthesizer is None:
17
+ hf_token = os.environ.get("HF_TOKEN")
18
+ repo_id = "UpCoder/behruz-vits-v3-private"
19
+
20
+ print("--- Loading Behruz AI Voice Model ---")
21
+ try:
22
+ model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
23
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
24
+
25
+ _synthesizer = Synthesizer(
26
+ tts_checkpoint=model_path,
27
+ tts_config_path=config_path,
28
+ use_cuda=False # CPU is safer for free tier RAM limits
29
+ )
30
+ print("--- Model Loaded Successfully ---")
31
+ except Exception as e:
32
+ print(f"Loading failed: {e}")
33
+ return None
34
+ return _synthesizer
 
 
35
 
36
  def synthesize_full_audio(text):
37
+ if not text or not text.strip():
38
  return None
39
 
40
+ # Safety limit to prevent RAM spikes
41
+ if len(text) > 1200:
42
+ text = text[:1200]
43
 
44
+ synth = get_synthesizer()
45
+ if synth is None:
46
+ return None
47
+
48
+ # Sentence splitting logic
49
+ sentences = re.split(r'(?<=[.!?]) +', text.strip())
50
  all_wavs = []
 
 
51
  silence = np.zeros(int(SAMPLE_RATE * 0.25))
52
 
53
  for i, sentence in enumerate(sentences):
54
  try:
55
+ wav = synth.tts(sentence)
56
  all_wavs.append(np.array(wav))
 
 
57
  if i < len(sentences) - 1:
58
  all_wavs.append(silence)
 
59
  except Exception as e:
60
+ print(f"Skip error: {e}")
61
  continue
62
 
63
  if not all_wavs:
64
  return None
65
 
 
66
  final_wav = np.concatenate(all_wavs)
67
+ # Convert to 16-bit PCM for browser compatibility
68
  final_wav_int16 = (final_wav * 32767).astype(np.int16)
69
 
70
+ # Force clear some memory after processing
71
+ if torch.cuda.is_available():
72
+ torch.cuda.empty_cache()
73
+
74
  return (SAMPLE_RATE, final_wav_int16)
75
 
76
+ # --- GRADIO UI ---
77
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald")) as iface:
78
+ gr.Markdown("# πŸŽ™οΈ Behruz Voice Clone V3 (Optimized)")
 
 
 
 
 
 
 
 
 
79
 
80
  with gr.Row():
81
+ text_input = gr.Textbox(label="O'zbekcha matn", lines=4)
82
+ audio_output = gr.Audio(label="Ovoz")
83
+
84
+ btn = gr.Button("πŸš€ Generatsiya", variant="primary")
85
+ btn.click(fn=synthesize_full_audio, inputs=text_input, outputs=audio_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  iface.launch()