Spaces:
Runtime error
Runtime error
| import subprocess | |
| import sys | |
| import gradio as gr | |
| from inference import Mars5TTS, InferenceConfig | |
| import librosa | |
| import torch | |
| import numpy as np | |
| # requirements.txt 설치 확인 | |
| try: | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"]) | |
| print("Successfully installed requirements.txt") | |
| except subprocess.CalledProcessError as e: | |
| print(f"Failed to install requirements.txt: {e}") | |
| # GPU 메모리 초기화 | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| # MARS5 TTS 모델 로드 | |
| try: | |
| mars5 = Mars5TTS.from_pretrained("CAMB-AI/MARS5-TTS") | |
| config = InferenceConfig(temperature=0.7) | |
| except Exception as e: | |
| print(f"Model loading error: {str(e)}") | |
| raise | |
| def clone_with_prosody(text, ref_audio, enhance_prosody=True): | |
| try: | |
| if isinstance(ref_audio, str): | |
| audio_data, sr = librosa.load(ref_audio, sr=16000) | |
| else: | |
| audio_data = ref_audio | |
| output_audio = mars5.tts( | |
| text=text, | |
| ref_audio=audio_data, | |
| ref_sr=16000, | |
| config=config if enhance_prosody else None, | |
| language="ko" | |
| ) | |
| output_path = "output_cloned_audio.wav" | |
| output_audio.save(output_path) | |
| return output_path | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| interface = gr.Interface( | |
| fn=clone_with_prosody, | |
| inputs=[ | |
| gr.Textbox(label="Text to Convert", placeholder="Enter text to convert to speech"), | |
| gr.Audio(label="Reference Audio (Your Voice)", type="filepath", source="upload"), | |
| gr.Checkbox(label="Enhance Prosody (Intonation/Rhythm)", value=True) | |
| ], | |
| outputs=gr.Audio(label="Cloned Voice Output"), | |
| title="MARS5 Voice Cloner with Prosody", | |
| description="Upload a 3-5 second audio of your voice and enter text to clone your voice with prosody (intonation, rhythm, emotion).", | |
| allow_flagging="never" | |
| ) | |
| interface.launch() |