| import sys |
| import asyncio |
| from io import BytesIO |
|
|
| from fairseq import checkpoint_utils |
|
|
| import torch |
|
|
| import edge_tts |
| import librosa |
|
|
|
|
| |
| def has_mps() -> bool: |
| if sys.platform != "darwin": |
| return False |
| else: |
| if not getattr(torch, 'has_mps', False): |
| return False |
|
|
| try: |
| torch.zeros(1).to(torch.device("mps")) |
| return True |
| except Exception: |
| return False |
|
|
|
|
| def is_half(device: str) -> bool: |
| if not device.startswith('cuda'): |
| return False |
| else: |
| gpu_name = torch.cuda.get_device_name( |
| int(device.split(':')[-1]) |
| ).upper() |
|
|
| |
| if ( |
| ('16' in gpu_name and 'V100' not in gpu_name) |
| or 'P40' in gpu_name |
| or '1060' in gpu_name |
| or '1070' in gpu_name |
| or '1080' in gpu_name |
| ): |
| return False |
|
|
| return True |
|
|
|
|
| def load_hubert_model(device: str, model_path: str = 'hubert_base.pt'): |
| model = checkpoint_utils.load_model_ensemble_and_task( |
| [model_path] |
| )[0][0].to(device) |
|
|
| if is_half(device): |
| return model.half() |
| else: |
| return model.float() |
|
|
|
|
| async def call_edge_tts(speaker_name: str, text: str): |
| tts_com = edge_tts.Communicate(text, speaker_name) |
| tts_raw = b'' |
|
|
| |
| async for chunk in tts_com.stream(): |
| if chunk['type'] == 'audio': |
| tts_raw += chunk['data'] |
|
|
| |
| ffmpeg_proc = await asyncio.create_subprocess_exec( |
| 'ffmpeg', |
| '-f', 'mp3', |
| '-i', '-', |
| '-f', 'wav', |
| '-', |
| stdin=asyncio.subprocess.PIPE, |
| stdout=asyncio.subprocess.PIPE |
| ) |
| (tts_wav, _) = await ffmpeg_proc.communicate(tts_raw) |
|
|
| return librosa.load(BytesIO(tts_wav)) |
|
|