seamless-m4t-v2-large

Runtime error

App Files Files Community

hysts HF staff commited on Nov 13, 2023

Commit

ecc5344

•

1 Parent(s): c111ea2

Update

Browse files

Files changed (10) hide show

.gitattributes +1 -0
.pre-commit-config.yaml +55 -0
.vscode/settings.json +21 -0
Dockerfile +7 -5
README.md +4 -1
app.py +22 -13
requirements.txt +4 -6
wheels/fairseq2-0.2.0.dev0-py3-none-any.whl +3 -0
wheels/fairseq2n-0.2.0.dev0-cp310-cp310-linux_x86_64.whl +3 -0
wheels/seamless_communication-1.0.0-py3-none-any.whl +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.mp3 filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.mp3 filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/myint/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        args: ["--in-place"]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.7.0
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          ["types-python-slugify", "types-requests", "types-PyYAML"]
+  - repo: https://github.com/psf/black
+    rev: 23.11.0
+    hooks:
+      - id: black
+        language_version: python3.10
+        args: ["--line-length", "119"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.6.1
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--extra-keys",
+            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
+          ]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
+    hooks:
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: ["--py37-plus"]
+      - id: nbqa-isort
+        args: ["--float-to-top"]

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        }
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true
+}

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get upgrade -y && \
@@ -36,16 +36,18 @@ WORKDIR ${HOME}/app
 RUN curl https://pyenv.run | bash
 ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
-ARG PYTHON_VERSION=3.10.12
 RUN pyenv install ${PYTHON_VERSION} && \
     pyenv global ${PYTHON_VERSION} && \
     pyenv rehash && \
     pip install --no-cache-dir -U pip setuptools wheel
-COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
 COPY --chown=1000 . ${HOME}/app
 ENV PYTHONPATH=${HOME}/app \
     PYTHONUNBUFFERED=1 \
     GRADIO_ALLOW_FLAGGING=never \

+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get upgrade -y && \
 RUN curl https://pyenv.run | bash
 ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ARG PYTHON_VERSION=3.10.13
 RUN pyenv install ${PYTHON_VERSION} && \
     pyenv global ${PYTHON_VERSION} && \
     pyenv rehash && \
     pip install --no-cache-dir -U pip setuptools wheel
 COPY --chown=1000 . ${HOME}/app
+RUN pip install -r ${HOME}/app/requirements.txt && \
+    pip install ${HOME}/app/wheels/fairseq2n-0.2.0.dev0-cp310-cp310-linux_x86_64.whl && \
+    pip install ${HOME}/app/wheels/fairseq2-0.2.0.dev0-py3-none-any.whl && \
+    pip install ${HOME}/app/wheels/seamless_communication-1.0.0-py3-none-any.whl
 ENV PYTHONPATH=${HOME}/app \
     PYTHONUNBUFFERED=1 \
     GRADIO_ALLOW_FLAGGING=never \

README.md CHANGED Viewed

@@ -1,11 +1,14 @@
 ---
-title: Seamless M4T
 emoji: 📞
 colorFrom: blue
 colorTo: yellow
 sdk: docker
 pinned: false
 suggested_hardware: t4-medium
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Seamless M4T v2
 emoji: 📞
 colorFrom: blue
 colorTo: yellow
 sdk: docker
 pinned: false
 suggested_hardware: t4-medium
+models:
+  - facebook/seamless-m4t-large
+  - facebook/SONAR
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ import gradio as gr
 import numpy as np
 import torch
 import torchaudio
-from seamless_communication.models.inference.translator import Translator
 from lang_list import (
     LANGUAGE_NAME_TO_CODE,
@@ -16,6 +17,8 @@ from lang_list import (
     TEXT_SOURCE_LANGUAGE_NAMES,
 )
 DESCRIPTION = """# SeamlessM4T
 [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
@@ -25,7 +28,7 @@ This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-t
 translation and more, without relying on multiple separate models.
 """
-CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1"
 TASK_NAMES = [
     "S2ST (Speech to Speech translation)",
@@ -38,11 +41,17 @@ AUDIO_SAMPLE_RATE = 16000.0
 MAX_INPUT_AUDIO_LENGTH = 60  # in seconds
 DEFAULT_TARGET_LANGUAGE = "French"
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 translator = Translator(
-    model_name_or_card="seamlessM4T_large",
-    vocoder_name_or_card="vocoder_36langs",
     device=device,
 )
@@ -74,17 +83,19 @@ def predict(
         torchaudio.save(input_data, new_arr, sample_rate=int(AUDIO_SAMPLE_RATE))
     else:
         input_data = input_text
-    text_out, wav, sr = translator.predict(
         input=input_data,
         task_str=task_name,
         tgt_lang=target_language_code,
         src_lang=source_language_code,
-        ngram_filtering=True,
     )
     if task_name in ["S2ST", "T2ST"]:
-        return (sr, wav.cpu().detach().numpy()), text_out
     else:
-        return None, text_out
 def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
@@ -426,8 +437,6 @@ with gr.Blocks(css="style.css") as demo:
         outputs=[output_audio, output_text],
         api_name="run",
     )
-demo.queue(max_size=50).launch()
-# Linking models to the space
-# 'facebook/seamless-m4t-large'
-# 'facebook/SONAR'

 import numpy as np
 import torch
 import torchaudio
+from huggingface_hub import snapshot_download
+from seamless_communication.inference import Translator
 from lang_list import (
     LANGUAGE_NAME_TO_CODE,
     TEXT_SOURCE_LANGUAGE_NAMES,
 )
+snapshot_download(repo_id="meta-private/M4Tv2", repo_type="model", local_dir="models")
 DESCRIPTION = """# SeamlessM4T
 [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
 translation and more, without relying on multiple separate models.
 """
+CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
 TASK_NAMES = [
     "S2ST (Speech to Speech translation)",
 MAX_INPUT_AUDIO_LENGTH = 60  # in seconds
 DEFAULT_TARGET_LANGUAGE = "French"
+if torch.cuda.is_available():
+    device = torch.device("cuda:0")
+    dtype = torch.float16
+else:
+    device = torch.device("cpu")
+    dtype = torch.float32
 translator = Translator(
+    model_name_or_card="seamlessM4T_v2_large",
+    vocoder_name_or_card="vocoder_v2",
     device=device,
+    dtype=dtype,
 )
         torchaudio.save(input_data, new_arr, sample_rate=int(AUDIO_SAMPLE_RATE))
     else:
         input_data = input_text
+    out_texts, out_audios = translator.predict(
         input=input_data,
         task_str=task_name,
         tgt_lang=target_language_code,
         src_lang=source_language_code,
     )
+    out_text = str(out_texts[0])
     if task_name in ["S2ST", "T2ST"]:
+        out_wav = out_audios.audio_wavs[0]
+        return (int(AUDIO_SAMPLE_RATE), out_wav.cpu().detach().numpy()), out_text
     else:
+        return None, out_text
 def process_s2st_example(input_audio_file: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
         outputs=[output_audio, output_text],
         api_name="run",
     )
+if __name__ == "__main__":
+    demo.queue(max_size=50).launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,4 @@
-fairseq2==0.1.0
-git+https://github.com/facebookresearch/seamless_communication
-gradio==3.40.1
-huggingface_hub==0.16.4
-torch==2.0.1
-torchaudio==2.0.2

+gradio==3.50.2
+omegaconf==2.3.0
+torch==2.1.0
+torchaudio==2.1.0

wheels/fairseq2-0.2.0.dev0-py3-none-any.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ef5a6eef679333aeb08d24bee6a054120bd7d29d67a60c4a73ca4aaa5bcc261
+size 187508

wheels/fairseq2n-0.2.0.dev0-cp310-cp310-linux_x86_64.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4865d75ce2a48aaf7b7416c7b109dec123fdc577186b9cabe2d83b74cb27d3fa
+size 2213239

wheels/seamless_communication-1.0.0-py3-none-any.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a6641670c404c2eac0c2893f498fc350ca22c5d4c9c67afcec18a578563cb3f
+size 121560