import torch from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer import gradio as gr # Inisialisasi model dan tokenizer model_path = 'saved_model/model.pth' model = Wav2Vec2ForCTC.from_pretrained(model_path) tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_path) def text_to_speech(text): input_ids = tokenizer.encode(text, return_tensors="pt").squeeze(0) logits = model.generate(input_ids) audio = model.get_feature_vector(logits) return audio def tts(text): audio = text_to_speech(text) return audio inputs = gr.inputs.Textbox(label="Masukkan teks (Bahasa Jepang)") outputs = gr.outputs.Audio(label="Audio") title = "Text-to-Speech dengan Model VITS-SVC Gmodel" description = "Demo TTS menggunakan model VITS-SVC Gmodel untuk bahasa Jepang." gr.Interface(fn=tts, inputs=inputs, outputs=outputs, title=title, description=description).launch()