File size: 1,340 Bytes
3bbf2c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gc
import os
from contextlib import contextmanager
from time import time
from typing import Optional

import streamlit as st

from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_voices


@contextmanager
def timeit(desc=""):
    start = time()
    yield
    print(f"{desc} took {time() - start:.2f} seconds")


@st.cache_resource(max_entries=1)
def load_model(
    model_dir,
    high_vram,
    kv_cache,
    ar_checkpoint,
    diff_checkpoint,
):
    gc.collect()
    return TextToSpeech(
        models_dir=model_dir,
        high_vram=high_vram,
        kv_cache=kv_cache,
        ar_checkpoint=ar_checkpoint,
        diff_checkpoint=diff_checkpoint,
    )


@st.cache_data
def list_voices(extra_voices_dir: Optional[str]):
    voices = ["random"]
    if extra_voices_dir and os.path.isdir(extra_voices_dir):
        voices.extend(os.listdir(extra_voices_dir))
        extra_voices_ls = [extra_voices_dir]
    else:
        extra_voices_ls = []
    voices.extend(
        [v for v in os.listdir("tortoise/voices") if v != "cond_latent_example"]
    )
    #
    return voices, extra_voices_ls


@st.cache_resource(max_entries=1)
def load_voice_conditionings(voice, extra_voices_ls):
    voice_samples, conditioning_latents = load_voices(voice, extra_voices_ls)
    return voice_samples, conditioning_latents