ReneeYe commited on
Commit
17b21b3
β€’
1 Parent(s): 715d968
app.py CHANGED
@@ -42,8 +42,6 @@ os.system("git clone https://github.com/ReneeYe/ConST")
42
  os.system("mv ConST ConST_git")
43
  os.system('mv -n ConST_git/* ./')
44
  os.system("rm -rf ConST_git")
45
- # os.system("python3 setup.py install")
46
- # os.system("python3 setup.py build_ext --inplace")
47
  os.system("pip3 install --editable ./")
48
  os.system("mkdir -p data checkpoint")
49
 
@@ -144,12 +142,16 @@ iface = gr.Interface(
144
  fn=run,
145
  inputs=inputs,
146
  outputs=[gr.outputs.Textbox(label="The translation")],
147
- examples=[['case1.wav', "German"],['case2.wav', "German"], ['case3.wav', "German"]],
148
  title="ConST: an end-to-end speech translator",
149
- description="End-to-end Speech Translation Live Demo for English to eight European languages.",
150
- article="ConST is an end-to-end speech translation model (see paper at https://arxiv.org/abs/2205.02444 ). "
151
- "Its motivation is to use contrastive learning method to learn similar representations for semantically similar speech and text.",
152
- theme="seafoam",
153
- layout='vertical',
 
 
 
 
154
  )
155
  iface.launch()
42
  os.system("mv ConST ConST_git")
43
  os.system('mv -n ConST_git/* ./')
44
  os.system("rm -rf ConST_git")
 
 
45
  os.system("pip3 install --editable ./")
46
  os.system("mkdir -p data checkpoint")
47
 
142
  fn=run,
143
  inputs=inputs,
144
  outputs=[gr.outputs.Textbox(label="The translation")],
145
+ examples=[['short-case.wav', "German"], ['long-case.wav', "German"]],
146
  title="ConST: an end-to-end speech translator",
147
+ description='ConST is an end-to-end speech-to-text translation model, whose algorithm corresponds to the '
148
+ 'NAACL 2022 paper *"Cross-modal Contrastive Learning for Speech Translation"* (see the paper at https://arxiv.org/abs/2205.02444 for more details).'
149
+ 'This is a live demo for ConST, to translate English into eight European languages.',
150
+ article="- The motivation of the ConST model is to use the contrastive learning method to learn similar representations for semantically similar speech and text, " \
151
+ "thus leveraging MT to help improve ST performance. \n"
152
+ "- The models you are experiencing are trained based on the MuST-C dataset (https://ict.fbk.eu/must-c/), " \
153
+ "which only contains about 250k parallel data at each translation direction. \n"
154
+ "- If you want to know how to train the models, you may refer to https://github.com/ReneeYe/ConST.",
155
+ theme="peach",
156
  )
157
  iface.launch()
case3.wav β†’ long-case.wav RENAMED
File without changes
case2.wav β†’ short-case.wav RENAMED
File without changes