Spaces:
Runtime error
Runtime error
Cache only specific files from SeamlessM4T
Browse files- Dockerfile +3 -2
- cache.sh +6 -0
- translator.py +28 -2
Dockerfile
CHANGED
@@ -36,9 +36,10 @@ ENV HOME=/home/user \
|
|
36 |
WORKDIR ${HOME}/app
|
37 |
|
38 |
COPY --chown=1000 . ${HOME}/app
|
|
|
|
|
39 |
RUN pip install -r ${HOME}/app/requirements.txt && \
|
40 |
pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.0/cu121 && \
|
41 |
pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
|
42 |
-
|
43 |
-
RUN python -u translator.py
|
44 |
CMD ["python", "main.py"]
|
|
|
36 |
WORKDIR ${HOME}/app
|
37 |
|
38 |
COPY --chown=1000 . ${HOME}/app
|
39 |
+
# This will cache the model into the docker image
|
40 |
+
RUN ./cache.sh
|
41 |
RUN pip install -r ${HOME}/app/requirements.txt && \
|
42 |
pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.0/cu121 && \
|
43 |
pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
|
44 |
+
|
|
|
45 |
CMD ["python", "main.py"]
|
cache.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
mkdir -p /home/user/app/models
|
3 |
+
wget https://huggingface.co/facebook/seamless-m4t-v2-large/resolve/main/spm_char_lang38_tc.model -P /home/user/app/models
|
4 |
+
wget https://huggingface.co/facebook/seamless-m4t-v2-large/resolve/main/seamlessM4T_v2_large.pt -P /home/user/app/models
|
5 |
+
wget https://huggingface.co/facebook/seamless-m4t-large/resolve/main/tokenizer.model -P /home/user/app/models
|
6 |
+
wget https://dl.fbaipublicfiles.com/seamless/models/vocoder_v2.pt -P /home/user/app/models
|
translator.py
CHANGED
@@ -2,20 +2,33 @@ import os
|
|
2 |
import pathlib
|
3 |
import torch
|
4 |
from fairseq2.assets import InProcAssetMetadataProvider, asset_store
|
5 |
-
from huggingface_hub import snapshot_download
|
6 |
from seamless_communication.inference import Translator
|
7 |
|
8 |
CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
|
9 |
if not CHECKPOINTS_PATH.exists():
|
10 |
-
|
|
|
|
|
|
|
11 |
asset_store.env_resolvers.clear()
|
12 |
asset_store.env_resolvers.append(lambda: "demo")
|
13 |
demo_metadata = [
|
|
|
|
|
|
|
14 |
{
|
15 |
"name": "seamlessM4T_v2_large@demo",
|
16 |
"checkpoint": f"file://{CHECKPOINTS_PATH}/seamlessM4T_v2_large.pt",
|
17 |
"char_tokenizer": f"file://{CHECKPOINTS_PATH}/spm_char_lang38_tc.model",
|
18 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
{
|
20 |
"name": "vocoder_v2@demo",
|
21 |
"checkpoint": f"file://{CHECKPOINTS_PATH}/vocoder_v2.pt",
|
@@ -37,3 +50,16 @@ translator = Translator(
|
|
37 |
dtype=dtype,
|
38 |
apply_mintox=True,
|
39 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import pathlib
|
3 |
import torch
|
4 |
from fairseq2.assets import InProcAssetMetadataProvider, asset_store
|
|
|
5 |
from seamless_communication.inference import Translator
|
6 |
|
7 |
CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
|
8 |
if not CHECKPOINTS_PATH.exists():
|
9 |
+
# from huggingface_hub import snapshot_download
|
10 |
+
# snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
|
11 |
+
raise FileNotFoundError(f"Checkpoint path {CHECKPOINTS_PATH} does not exist")
|
12 |
+
|
13 |
asset_store.env_resolvers.clear()
|
14 |
asset_store.env_resolvers.append(lambda: "demo")
|
15 |
demo_metadata = [
|
16 |
+
# https://github.com/facebookresearch/seamless_communication/blob/dd67e71317d66752ef16cf21bd842ca3273244c9/src/seamless_communication/cards/seamlessM4T_v2_large.yaml#L10
|
17 |
+
# char_tokenizer: "https://huggingface.co/facebook/seamless-m4t-v2-large/resolve/main/spm_char_lang38_tc.model"
|
18 |
+
# checkpoint: "https://huggingface.co/facebook/seamless-m4t-v2-large/resolve/main/seamlessM4T_v2_large.pt"
|
19 |
{
|
20 |
"name": "seamlessM4T_v2_large@demo",
|
21 |
"checkpoint": f"file://{CHECKPOINTS_PATH}/seamlessM4T_v2_large.pt",
|
22 |
"char_tokenizer": f"file://{CHECKPOINTS_PATH}/spm_char_lang38_tc.model",
|
23 |
},
|
24 |
+
# https://github.com/facebookresearch/seamless_communication/blob/dd67e71317d66752ef16cf21bd842ca3273244c9/src/seamless_communication/cards/unity_nllb-100.yaml#L9C1-L9C93
|
25 |
+
# tokenizer: "https://huggingface.co/facebook/seamless-m4t-large/resolve/main/tokenizer.model"
|
26 |
+
{
|
27 |
+
"name": "unity_nllb-100@demo",
|
28 |
+
"tokenizer": f"file://{CHECKPOINTS_PATH}/tokenizer.model",
|
29 |
+
},
|
30 |
+
# https://github.com/facebookresearch/seamless_communication/blob/dd67e71317d66752ef16cf21bd842ca3273244c9/src/seamless_communication/cards/vocoder_v2.yaml#L10
|
31 |
+
# checkpoint: "https://dl.fbaipublicfiles.com/seamless/models/vocoder_v2.pt"
|
32 |
{
|
33 |
"name": "vocoder_v2@demo",
|
34 |
"checkpoint": f"file://{CHECKPOINTS_PATH}/vocoder_v2.pt",
|
|
|
50 |
dtype=dtype,
|
51 |
apply_mintox=True,
|
52 |
)
|
53 |
+
|
54 |
+
if __name__ == '__main__':
|
55 |
+
input_text = "Hello, how are you today?"
|
56 |
+
source_language_code = "eng"
|
57 |
+
target_language_code = "zsm"
|
58 |
+
|
59 |
+
result = translator.predict(
|
60 |
+
input=input_text,
|
61 |
+
task_str="T2TT",
|
62 |
+
src_lang=source_language_code,
|
63 |
+
tgt_lang=target_language_code,
|
64 |
+
)
|
65 |
+
print(str(result[0]))
|