hysts HF staff commited on
Commit
ecc5344
1 Parent(s): c111ea2
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.mp3 filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ *.whl filter=lfs diff=lfs merge=lfs -text
.pre-commit-config.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.5.0
4
+ hooks:
5
+ - id: check-executables-have-shebangs
6
+ - id: check-json
7
+ - id: check-merge-conflict
8
+ - id: check-shebang-scripts-are-executable
9
+ - id: check-toml
10
+ - id: check-yaml
11
+ - id: end-of-file-fixer
12
+ - id: mixed-line-ending
13
+ args: ["--fix=lf"]
14
+ - id: requirements-txt-fixer
15
+ - id: trailing-whitespace
16
+ - repo: https://github.com/myint/docformatter
17
+ rev: v1.7.5
18
+ hooks:
19
+ - id: docformatter
20
+ args: ["--in-place"]
21
+ - repo: https://github.com/pycqa/isort
22
+ rev: 5.12.0
23
+ hooks:
24
+ - id: isort
25
+ args: ["--profile", "black"]
26
+ - repo: https://github.com/pre-commit/mirrors-mypy
27
+ rev: v1.7.0
28
+ hooks:
29
+ - id: mypy
30
+ args: ["--ignore-missing-imports"]
31
+ additional_dependencies:
32
+ ["types-python-slugify", "types-requests", "types-PyYAML"]
33
+ - repo: https://github.com/psf/black
34
+ rev: 23.11.0
35
+ hooks:
36
+ - id: black
37
+ language_version: python3.10
38
+ args: ["--line-length", "119"]
39
+ - repo: https://github.com/kynan/nbstripout
40
+ rev: 0.6.1
41
+ hooks:
42
+ - id: nbstripout
43
+ args:
44
+ [
45
+ "--extra-keys",
46
+ "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
47
+ ]
48
+ - repo: https://github.com/nbQA-dev/nbQA
49
+ rev: 1.7.0
50
+ hooks:
51
+ - id: nbqa-black
52
+ - id: nbqa-pyupgrade
53
+ args: ["--py37-plus"]
54
+ - id: nbqa-isort
55
+ args: ["--float-to-top"]
.vscode/settings.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.defaultFormatter": "ms-python.black-formatter",
4
+ "editor.formatOnType": true,
5
+ "editor.codeActionsOnSave": {
6
+ "source.organizeImports": true
7
+ }
8
+ },
9
+ "black-formatter.args": [
10
+ "--line-length=119"
11
+ ],
12
+ "isort.args": ["--profile", "black"],
13
+ "flake8.args": [
14
+ "--max-line-length=119"
15
+ ],
16
+ "ruff.args": [
17
+ "--line-length=119"
18
+ ],
19
+ "editor.formatOnSave": true,
20
+ "files.insertFinalNewline": true
21
+ }
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
2
  ENV DEBIAN_FRONTEND=noninteractive
3
  RUN apt-get update && \
4
  apt-get upgrade -y && \
@@ -36,16 +36,18 @@ WORKDIR ${HOME}/app
36
 
37
  RUN curl https://pyenv.run | bash
38
  ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
39
- ARG PYTHON_VERSION=3.10.12
40
  RUN pyenv install ${PYTHON_VERSION} && \
41
  pyenv global ${PYTHON_VERSION} && \
42
  pyenv rehash && \
43
  pip install --no-cache-dir -U pip setuptools wheel
44
 
45
- COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
46
- RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
47
-
48
  COPY --chown=1000 . ${HOME}/app
 
 
 
 
 
49
  ENV PYTHONPATH=${HOME}/app \
50
  PYTHONUNBUFFERED=1 \
51
  GRADIO_ALLOW_FLAGGING=never \
 
1
+ FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
2
  ENV DEBIAN_FRONTEND=noninteractive
3
  RUN apt-get update && \
4
  apt-get upgrade -y && \
 
36
 
37
  RUN curl https://pyenv.run | bash
38
  ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
39
+ ARG PYTHON_VERSION=3.10.13
40
  RUN pyenv install ${PYTHON_VERSION} && \
41
  pyenv global ${PYTHON_VERSION} && \
42
  pyenv rehash && \
43
  pip install --no-cache-dir -U pip setuptools wheel
44
 
 
 
 
45
  COPY --chown=1000 . ${HOME}/app
46
+ RUN pip install -r ${HOME}/app/requirements.txt && \
47
+ pip install ${HOME}/app/wheels/fairseq2n-0.2.0.dev0-cp310-cp310-linux_x86_64.whl && \
48
+ pip install ${HOME}/app/wheels/fairseq2-0.2.0.dev0-py3-none-any.whl && \
49
+ pip install ${HOME}/app/wheels/seamless_communication-1.0.0-py3-none-any.whl
50
+
51
  ENV PYTHONPATH=${HOME}/app \
52
  PYTHONUNBUFFERED=1 \
53
  GRADIO_ALLOW_FLAGGING=never \
README.md CHANGED
@@ -1,11 +1,14 @@
1
  ---
2
- title: Seamless M4T
3
  emoji: 📞
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  suggested_hardware: t4-medium
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Seamless M4T v2
3
  emoji: 📞
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  suggested_hardware: t4-medium
9
+ models:
10
+ - facebook/seamless-m4t-large
11
+ - facebook/SONAR
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -6,7 +6,8 @@ import gradio as gr
6
  import numpy as np
7
  import torch
8
  import torchaudio
9
- from seamless_communication.models.inference.translator import Translator
 
10
 
11
  from lang_list import (
12
  LANGUAGE_NAME_TO_CODE,
@@ -16,6 +17,8 @@ from lang_list import (
16
  TEXT_SOURCE_LANGUAGE_NAMES,
17
  )
18
 
 
 
19
  DESCRIPTION = """# SeamlessM4T
20
 
21
  [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
@@ -25,7 +28,7 @@ This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-t
25
  translation and more, without relying on multiple separate models.
26
  """
27
 
28
- CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1"
29
 
30
  TASK_NAMES = [
31
  "S2ST (Speech to Speech translation)",
@@ -38,11 +41,17 @@ AUDIO_SAMPLE_RATE = 16000.0
38
  MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
39
  DEFAULT_TARGET_LANGUAGE = "French"
40
 
41
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
42
  translator = Translator(
43
- model_name_or_card="seamlessM4T_large",
44
- vocoder_name_or_card="vocoder_36langs",
45
  device=device,
 
46
  )
47
 
48
 
@@ -74,17 +83,19 @@ def predict(
74
  torchaudio.save(input_data, new_arr, sample_rate=int(AUDIO_SAMPLE_RATE))
75
  else:
76
  input_data = input_text
77
- text_out, wav, sr = translator.predict(
78
  input=input_data,
79
  task_str=task_name,
80
  tgt_lang=target_language_code,
81
  src_lang=source_language_code,
82
- ngram_filtering=True,
83
  )
 
 
84
  if task_name in ["S2ST", "T2ST"]:
85
- return (sr, wav.cpu().detach().numpy()), text_out
 
86
  else:
87
- return None, text_out
88
 
89
 
90
  def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
@@ -426,8 +437,6 @@ with gr.Blocks(css="style.css") as demo:
426
  outputs=[output_audio, output_text],
427
  api_name="run",
428
  )
429
- demo.queue(max_size=50).launch()
430
 
431
- # Linking models to the space
432
- # 'facebook/seamless-m4t-large'
433
- # 'facebook/SONAR'
 
6
  import numpy as np
7
  import torch
8
  import torchaudio
9
+ from huggingface_hub import snapshot_download
10
+ from seamless_communication.inference import Translator
11
 
12
  from lang_list import (
13
  LANGUAGE_NAME_TO_CODE,
 
17
  TEXT_SOURCE_LANGUAGE_NAMES,
18
  )
19
 
20
+ snapshot_download(repo_id="meta-private/M4Tv2", repo_type="model", local_dir="models")
21
+
22
  DESCRIPTION = """# SeamlessM4T
23
 
24
  [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
 
28
  translation and more, without relying on multiple separate models.
29
  """
30
 
31
+ CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
32
 
33
  TASK_NAMES = [
34
  "S2ST (Speech to Speech translation)",
 
41
  MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
42
  DEFAULT_TARGET_LANGUAGE = "French"
43
 
44
+ if torch.cuda.is_available():
45
+ device = torch.device("cuda:0")
46
+ dtype = torch.float16
47
+ else:
48
+ device = torch.device("cpu")
49
+ dtype = torch.float32
50
  translator = Translator(
51
+ model_name_or_card="seamlessM4T_v2_large",
52
+ vocoder_name_or_card="vocoder_v2",
53
  device=device,
54
+ dtype=dtype,
55
  )
56
 
57
 
 
83
  torchaudio.save(input_data, new_arr, sample_rate=int(AUDIO_SAMPLE_RATE))
84
  else:
85
  input_data = input_text
86
+ out_texts, out_audios = translator.predict(
87
  input=input_data,
88
  task_str=task_name,
89
  tgt_lang=target_language_code,
90
  src_lang=source_language_code,
 
91
  )
92
+ out_text = str(out_texts[0])
93
+
94
  if task_name in ["S2ST", "T2ST"]:
95
+ out_wav = out_audios.audio_wavs[0]
96
+ return (int(AUDIO_SAMPLE_RATE), out_wav.cpu().detach().numpy()), out_text
97
  else:
98
+ return None, out_text
99
 
100
 
101
  def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
 
437
  outputs=[output_audio, output_text],
438
  api_name="run",
439
  )
 
440
 
441
+ if __name__ == "__main__":
442
+ demo.queue(max_size=50).launch()
 
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
- fairseq2==0.1.0
2
- git+https://github.com/facebookresearch/seamless_communication
3
- gradio==3.40.1
4
- huggingface_hub==0.16.4
5
- torch==2.0.1
6
- torchaudio==2.0.2
 
1
+ gradio==3.50.2
2
+ omegaconf==2.3.0
3
+ torch==2.1.0
4
+ torchaudio==2.1.0
 
 
wheels/fairseq2-0.2.0.dev0-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef5a6eef679333aeb08d24bee6a054120bd7d29d67a60c4a73ca4aaa5bcc261
3
+ size 187508
wheels/fairseq2n-0.2.0.dev0-cp310-cp310-linux_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4865d75ce2a48aaf7b7416c7b109dec123fdc577186b9cabe2d83b74cb27d3fa
3
+ size 2213239
wheels/seamless_communication-1.0.0-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6641670c404c2eac0c2893f498fc350ca22c5d4c9c67afcec18a578563cb3f
3
+ size 121560