seamless-m4t-v2-large

Runtime error

App Files Files Community

mintox

by mortimerp9 - opened Nov 28, 2023

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+10

-12

Files changed (5) hide show

Dockerfile +1 -1
README.md +1 -1
app.py +5 -7
requirements.txt +1 -1
whl/seamless_communication-1.0.0-py3-none-any.whl +2 -2

Dockerfile CHANGED Viewed

@@ -45,7 +45,7 @@ RUN pyenv install ${PYTHON_VERSION} && \
 COPY --chown=1000 . ${HOME}/app
 RUN pip install -r ${HOME}/app/requirements.txt && \
-    pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.0/cu121 && \
     pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
 ENV PYTHONPATH=${HOME}/app \

 COPY --chown=1000 . ${HOME}/app
 RUN pip install -r ${HOME}/app/requirements.txt && \
+    pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.0/cu121 && \
     pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
 ENV PYTHONPATH=${HOME}/app \

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ sdk: docker
 pinned: false
 suggested_hardware: t4-medium
 models:
-  - facebook/seamless-m4t-v2-large
   - facebook/SONAR
 ---

 pinned: false
 suggested_hardware: t4-medium
 models:
+  - facebook/seamless-m4t-large
   - facebook/SONAR
 ---

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ from lang_list import (
 CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
 if not CHECKPOINTS_PATH.exists():
-    snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
 asset_store.env_resolvers.clear()
 asset_store.env_resolvers.append(lambda: "demo")
 demo_metadata = [
@@ -45,8 +45,7 @@ DESCRIPTION = """\
 [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
 translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
 This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-to-Text (S2TT), Text-to-Speech (T2ST)
-translation and more, without relying on multiple separate models. The model is also in use on the
-[SeamlessM4T demo website](https://seamless.metademolab.com/m4t?utm_source=huggingface&utm_medium=web&utm_campaign=seamless&utm_content=m4tspace).
 """
 CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
@@ -105,8 +104,8 @@ def run_s2tt(input_audio: str, source_language: str, target_language: str) -> st
     out_texts, _ = translator.predict(
         input=input_audio,
         task_str="S2TT",
-        src_lang=source_language_code,
         tgt_lang=target_language_code,
     )
     return str(out_texts[0])
@@ -117,8 +116,8 @@ def run_t2st(input_text: str, source_language: str, target_language: str) -> tup
     out_texts, out_audios = translator.predict(
         input=input_text,
         task_str="T2ST",
-        src_lang=source_language_code,
         tgt_lang=target_language_code,
     )
     out_text = str(out_texts[0])
     out_wav = out_audios.audio_wavs[0].cpu().detach().numpy()
@@ -131,8 +130,8 @@ def run_t2tt(input_text: str, source_language: str, target_language: str) -> str
     out_texts, _ = translator.predict(
         input=input_text,
         task_str="T2TT",
-        src_lang=source_language_code,
         tgt_lang=target_language_code,
     )
     return str(out_texts[0])
@@ -143,7 +142,6 @@ def run_asr(input_audio: str, target_language: str) -> str:
     out_texts, _ = translator.predict(
         input=input_audio,
         task_str="ASR",
-        src_lang=target_language_code,
         tgt_lang=target_language_code,
     )
     return str(out_texts[0])

 CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
 if not CHECKPOINTS_PATH.exists():
+    snapshot_download(repo_id="meta-private/M4Tv2", repo_type="model", local_dir=CHECKPOINTS_PATH)
 asset_store.env_resolvers.clear()
 asset_store.env_resolvers.append(lambda: "demo")
 demo_metadata = [
 [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
 translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
 This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-to-Text (S2TT), Text-to-Speech (T2ST)
+translation and more, without relying on multiple separate models.
 """
 CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
     out_texts, _ = translator.predict(
         input=input_audio,
         task_str="S2TT",
         tgt_lang=target_language_code,
+        src_lang=source_language_code,
     )
     return str(out_texts[0])
     out_texts, out_audios = translator.predict(
         input=input_text,
         task_str="T2ST",
         tgt_lang=target_language_code,
+        src_lang=source_language_code,
     )
     out_text = str(out_texts[0])
     out_wav = out_audios.audio_wavs[0].cpu().detach().numpy()
     out_texts, _ = translator.predict(
         input=input_text,
         task_str="T2TT",
         tgt_lang=target_language_code,
+        src_lang=source_language_code,
     )
     return str(out_texts[0])
     out_texts, _ = translator.predict(
         input=input_audio,
         task_str="ASR",
         tgt_lang=target_language_code,
     )
     return str(out_texts[0])

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio==4.9.0
 omegaconf==2.3.0
 torch==2.1.0
 torchaudio==2.1.0

+gradio==4.5.0
 omegaconf==2.3.0
 torch==2.1.0
 torchaudio==2.1.0

whl/seamless_communication-1.0.0-py3-none-any.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
-size 201552

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3380dc7d6613c4dc9ef4d78fd3dcf1a50f7c6a659b8ba0b37ad5237533d002e
+size 234811