File size: 1,774 Bytes
7bc996c
 
 
05c9154
7bc996c
05c9154
 
7bc996c
59feb39
7bc996c
 
 
 
 
 
 
 
 
 
 
05c9154
7bc996c
 
05c9154
7bc996c
 
 
 
 
 
05c9154
7bc996c
 
 
 
 
 
 
05c9154
7bc996c
 
 
05c9154
7bc996c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# -*- coding: utf-8 -*-
import gradio as gr
from faster_whisper import WhisperModel
from israwave import IsrawaveTTS
import tempfile
import os
import zipfile
from huggingface_hub import hf_hub_download

# Download model files from HF dataset (YoniAfek/israwaveTTS)
espeak_zip_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="espeak-ng-data.zip")
israwave_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="israwave.onnx")
nakdimon_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="nakdimon.onnx")

# Extract espeak-ng-data
espeak_dir = os.path.join(tempfile.gettempdir(), "espeak-ng-data")
os.makedirs(espeak_dir, exist_ok=True)
with zipfile.ZipFile(espeak_zip_path, "r") as zip_ref:
    zip_ref.extractall(espeak_dir)

# Load Whisper model
whisper_model = WhisperModel("ivrit-ai/whisper-large-v3-turbo-ct2")

# Load Israwave TTS
tts = IsrawaveTTS(
    model_path=israwave_path,
    speaker_model_path=nakdimon_path,
    espeak_data_path=espeak_dir
)

# Transcribe + TTS
def process_audio(audio_path):
    segments, _ = whisper_model.transcribe(audio_path, language="he")
    text = " ".join([seg.text for seg in segments])
    tts_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
    tts.tts_to_file(text, tts_path)
    return text, tts_path

# Gradio interface
demo = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath", label="๐ŸŽ™๏ธ ื”ืงืœื˜ ืืช ืขืฆืžืš"),
    outputs=[gr.Text(label="ืชืžืœื•ืœ"), gr.Audio(label="ื—ื–ืจื” ื‘ืงื•ืœ ืขื‘ืจื™")],
    title="ืชืžืœื•ืœ ื•ื“ื™ื‘ื•ืจ ืขื Israwave",
    description="ื”ืžืขืจื›ืช ืžืชืžืœืœืช ืืช ืžื” ืฉื ืืžืจ ื•ืžืฉืžื™ืขื” ืื•ืชื• ื—ื–ืจื” ื‘ืงื•ืœ ืขื‘ืจื™. ื”ืงื‘ืฆื™ื ื™ื•ืจื“ื™ื ืž-Hugging Face Datasets"
)

demo.launch()