Update README.md
Browse files
README.md
CHANGED
@@ -12,11 +12,8 @@ datasets:
|
|
12 |
- covost2
|
13 |
- europarl_st
|
14 |
- voxpopuli
|
15 |
-
widget:
|
16 |
-
- example_title: Common Voice sample 1
|
17 |
-
src: https://huggingface.co/facebook/xm_transformer_600m-es_en-multi_domain/resolve/main/common_voice_es_19966634.flac
|
18 |
---
|
19 |
-
##
|
20 |
|
21 |
Speech-to-speech translation model from fairseq S2UT ([paper](https://arxiv.org/abs/2204.02967)/[code](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/enhanced_direct_s2st_discrete_units.md)):
|
22 |
- Spanish-English
|
@@ -41,29 +38,13 @@ import torchaudio
|
|
41 |
|
42 |
cache_dir = os.getenv("HUGGINGFACE_HUB_CACHE")
|
43 |
|
44 |
-
#models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
|
45 |
-
# "facebook/xm_transformer_s2ut_800m-es-en-st-asr-bt_h1_2022",
|
46 |
-
# arg_overrides={"config_yaml": "config.yaml", "task": "speech_to_text"},
|
47 |
-
# cache_dir=cache_dir,
|
48 |
-
# )
|
49 |
-
# model = models[0].cpu()
|
50 |
-
# cfg["task"].cpu = True
|
51 |
-
# generator = task.build_generator([model], cfg)
|
52 |
-
|
53 |
-
|
54 |
-
# # requires 16000Hz mono channel audio
|
55 |
-
# audio, _ = torchaudio.load("/Users/lpw/git/api-inference-community/docker_images/fairseq/tests/samples/sample2.flac")
|
56 |
-
|
57 |
-
# sample = S2THubInterface.get_model_input(task, audio)
|
58 |
-
# unit = S2THubInterface.get_prediction(task, model, generator, sample)
|
59 |
-
|
60 |
# speech synthesis
|
61 |
library_name = "fairseq"
|
62 |
cache_dir = (
|
63 |
cache_dir or (Path.home() / ".cache" / library_name).as_posix()
|
64 |
)
|
65 |
cache_dir = snapshot_download(
|
66 |
-
f"facebook/
|
67 |
)
|
68 |
|
69 |
x = hub_utils.from_pretrained(
|
|
|
12 |
- covost2
|
13 |
- europarl_st
|
14 |
- voxpopuli
|
|
|
|
|
|
|
15 |
---
|
16 |
+
## unit_hifigan_HK_layer12.km2500_frame_TAT-TTS
|
17 |
|
18 |
Speech-to-speech translation model from fairseq S2UT ([paper](https://arxiv.org/abs/2204.02967)/[code](https://github.com/facebookresearch/fairseq/blob/main/examples/speech_to_speech/docs/enhanced_direct_s2st_discrete_units.md)):
|
19 |
- Spanish-English
|
|
|
38 |
|
39 |
cache_dir = os.getenv("HUGGINGFACE_HUB_CACHE")
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# speech synthesis
|
42 |
library_name = "fairseq"
|
43 |
cache_dir = (
|
44 |
cache_dir or (Path.home() / ".cache" / library_name).as_posix()
|
45 |
)
|
46 |
cache_dir = snapshot_download(
|
47 |
+
f"facebook/unit_hifigan_HK_layer12.km2500_frame_TAT-TTS", cache_dir=cache_dir, library_name=library_name
|
48 |
)
|
49 |
|
50 |
x = hub_utils.from_pretrained(
|