Behruz-Voice-AI / app.py
UpCoder's picture
Update app.py
01a6ec3 verified
import gradio as gr
import os
import re
import numpy as np
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download
# 1. Maxfiy kalitni olish
hf_token = os.environ.get("HF_TOKEN")
# 2. Modelni maxfiy ombordan yuklab olish
repo_id = "UpCoder/behruz-vits-v3-private"
try:
print("Model fayllari yuklanmoqda...")
model_path = hf_hub_download(repo_id=repo_id, filename="checkpoint_43000.pth", token=hf_token)
config_path = hf_hub_download(repo_id=repo_id, filename="config.json", token=hf_token)
except Exception as e:
print(f"Fayllarni yuklashda xatolik: {e}")
# 3. Sun'iy intellektni ishga tushirish
print("Sun'iy intellekt ishga tushmoqda...")
synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
use_cuda=False
)
# VITS uchun standart chastota (22050 Hz)
SAMPLE_RATE = 22050
def split_into_sentences(text):
sentences = re.split(r'(?<=[.!?]) +', text.strip())
return [s for s in sentences if s.strip()]
def synthesize_full_audio(text):
if not text.strip():
return None
# Xavfsizlik: Server qotib qolmasligi uchun matnni 2000 belgi bilan cheklaymiz
if len(text) > 2000:
text = text[:2000]
sentences = split_into_sentences(text)
all_wavs = []
# Jumlalar orasida tabiiy nafas olish uchun 0.25 soniyalik sukut
silence = np.zeros(int(SAMPLE_RATE * 0.25))
for i, sentence in enumerate(sentences):
try:
wav = synthesizer.tts(sentence)
all_wavs.append(np.array(wav))
# Oxirgi jumladan tashqari hammadan keyin sukut qo'shamiz
if i < len(sentences) - 1:
all_wavs.append(silence)
except Exception as e:
print(f"Jumlani o'qishda xatolik: {sentence}. Xato: {e}")
continue
if not all_wavs:
return None
# Barcha audio parchalarni bitta butun faylga birlashtirish
final_wav = np.concatenate(all_wavs)
final_wav_int16 = (final_wav * 32767).astype(np.int16)
return (SAMPLE_RATE, final_wav_int16)
# 4. Professional va O'zbekcha Interfeys (UI) yaratish
with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", secondary_hue="teal")) as iface:
gr.Markdown(
"""
<div style="text-align: center;">
<h1>πŸŽ™οΈ Behruzning Raqamli Ovozli Kloni (V3)</h1>
<p><strong>Mening sun'iy intellekt ovoz generatorimga xush kelibsiz!</strong> Ushbu model o'zimning haqiqiy ovozim asosida neyrotarmoqlar yordamida o'qitildi.</p>
</div>
πŸ’‘ **Foydali maslahat:** Katta matnlarni (masalan, butun bir xatboshini) bemalol kiritishingiz mumkin! Dastur uni avtomat ravishda jumlalarga bo'lib, xatosiz o'qib beradi va bitta tayyor audio fayl qilib taqdim etadi.
"""
)
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="O'zbekcha matnni bu yerga kiriting (Maksimum 2000 belgi)",
lines=6,
placeholder="Salom! Bugun havo juda ajoyib, shunday emasmi? Men internetda yashaydigan raqamli sun'iy intellektman..."
)
generate_btn = gr.Button("πŸš€ Ovozga Aylantirish", variant="primary")
with gr.Column(scale=1):
audio_output = gr.Audio(label="🎧 Tayyor Audio Fayl")
gr.Examples(
examples=[
"Salom, men Behruzning raqamli egizagiman va men endi internetda yashayman!",
"Axborot texnologiyalari sohasida qanday yangiliklar bor, kuzatib boryapsizmi?",
"Voh, bu natijani umuman kutmagan edim! Qoyilmaqom ish bo'libdi."
],
inputs=text_input,
label="Namuna jumlalar (birini tanlang)"
)
generate_btn.click(fn=synthesize_full_audio, inputs=text_input, outputs=audio_output)
iface.launch()