Dockerfile CHANGED
@@ -45,7 +45,7 @@ RUN pyenv install ${PYTHON_VERSION} && \
45
 
46
  COPY --chown=1000 . ${HOME}/app
47
  RUN pip install -r ${HOME}/app/requirements.txt && \
48
- pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.0/cu121 && \
49
  pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
50
 
51
  ENV PYTHONPATH=${HOME}/app \
 
45
 
46
  COPY --chown=1000 . ${HOME}/app
47
  RUN pip install -r ${HOME}/app/requirements.txt && \
48
+ pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.0/cu121 && \
49
  pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
50
 
51
  ENV PYTHONPATH=${HOME}/app \
README.md CHANGED
@@ -7,7 +7,7 @@ sdk: docker
7
  pinned: false
8
  suggested_hardware: t4-medium
9
  models:
10
- - facebook/seamless-m4t-v2-large
11
  - facebook/SONAR
12
  ---
13
 
 
7
  pinned: false
8
  suggested_hardware: t4-medium
9
  models:
10
+ - facebook/seamless-m4t-large
11
  - facebook/SONAR
12
  ---
13
 
app.py CHANGED
@@ -23,7 +23,7 @@ from lang_list import (
23
 
24
  CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
25
  if not CHECKPOINTS_PATH.exists():
26
- snapshot_download(repo_id="facebook/seamless-m4t-v2-large", repo_type="model", local_dir=CHECKPOINTS_PATH)
27
  asset_store.env_resolvers.clear()
28
  asset_store.env_resolvers.append(lambda: "demo")
29
  demo_metadata = [
@@ -45,8 +45,7 @@ DESCRIPTION = """\
45
  [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
46
  translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
47
  This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-to-Text (S2TT), Text-to-Speech (T2ST)
48
- translation and more, without relying on multiple separate models. The model is also in use on the
49
- [SeamlessM4T demo website](https://seamless.metademolab.com/m4t?utm_source=huggingface&utm_medium=web&utm_campaign=seamless&utm_content=m4tspace).
50
  """
51
 
52
  CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
@@ -105,8 +104,8 @@ def run_s2tt(input_audio: str, source_language: str, target_language: str) -> st
105
  out_texts, _ = translator.predict(
106
  input=input_audio,
107
  task_str="S2TT",
108
- src_lang=source_language_code,
109
  tgt_lang=target_language_code,
 
110
  )
111
  return str(out_texts[0])
112
 
@@ -117,8 +116,8 @@ def run_t2st(input_text: str, source_language: str, target_language: str) -> tup
117
  out_texts, out_audios = translator.predict(
118
  input=input_text,
119
  task_str="T2ST",
120
- src_lang=source_language_code,
121
  tgt_lang=target_language_code,
 
122
  )
123
  out_text = str(out_texts[0])
124
  out_wav = out_audios.audio_wavs[0].cpu().detach().numpy()
@@ -131,8 +130,8 @@ def run_t2tt(input_text: str, source_language: str, target_language: str) -> str
131
  out_texts, _ = translator.predict(
132
  input=input_text,
133
  task_str="T2TT",
134
- src_lang=source_language_code,
135
  tgt_lang=target_language_code,
 
136
  )
137
  return str(out_texts[0])
138
 
@@ -143,7 +142,6 @@ def run_asr(input_audio: str, target_language: str) -> str:
143
  out_texts, _ = translator.predict(
144
  input=input_audio,
145
  task_str="ASR",
146
- src_lang=target_language_code,
147
  tgt_lang=target_language_code,
148
  )
149
  return str(out_texts[0])
 
23
 
24
  CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
25
  if not CHECKPOINTS_PATH.exists():
26
+ snapshot_download(repo_id="meta-private/M4Tv2", repo_type="model", local_dir=CHECKPOINTS_PATH)
27
  asset_store.env_resolvers.clear()
28
  asset_store.env_resolvers.append(lambda: "demo")
29
  demo_metadata = [
 
45
  [SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
46
  translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
47
  This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-to-Text (S2TT), Text-to-Speech (T2ST)
48
+ translation and more, without relying on multiple separate models.
 
49
  """
50
 
51
  CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
 
104
  out_texts, _ = translator.predict(
105
  input=input_audio,
106
  task_str="S2TT",
 
107
  tgt_lang=target_language_code,
108
+ src_lang=source_language_code,
109
  )
110
  return str(out_texts[0])
111
 
 
116
  out_texts, out_audios = translator.predict(
117
  input=input_text,
118
  task_str="T2ST",
 
119
  tgt_lang=target_language_code,
120
+ src_lang=source_language_code,
121
  )
122
  out_text = str(out_texts[0])
123
  out_wav = out_audios.audio_wavs[0].cpu().detach().numpy()
 
130
  out_texts, _ = translator.predict(
131
  input=input_text,
132
  task_str="T2TT",
 
133
  tgt_lang=target_language_code,
134
+ src_lang=source_language_code,
135
  )
136
  return str(out_texts[0])
137
 
 
142
  out_texts, _ = translator.predict(
143
  input=input_audio,
144
  task_str="ASR",
 
145
  tgt_lang=target_language_code,
146
  )
147
  return str(out_texts[0])
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==4.9.0
2
  omegaconf==2.3.0
3
  torch==2.1.0
4
  torchaudio==2.1.0
 
1
+ gradio==4.5.0
2
  omegaconf==2.3.0
3
  torch==2.1.0
4
  torchaudio==2.1.0
whl/seamless_communication-1.0.0-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
3
- size 201552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3380dc7d6613c4dc9ef4d78fd3dcf1a50f7c6a659b8ba0b37ad5237533d002e
3
+ size 234811