ReneeYe commited on
Commit
d7b2919
β€’
1 Parent(s): b124b4a

init commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ ConST
.idea/ConST-speech2text-translator.iml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="TestRunnerService">
9
+ <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10
+ </component>
11
+ </module>
.idea/dictionaries/yerong.xml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ <component name="ProjectDictionaryState">
2
+ <dictionary name="yerong" />
3
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="JavaScriptSettings">
4
+ <option name="languageLevel" value="ES6" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ConST-speech2text-translator.iml" filepath="$PROJECT_DIR$/.idea/ConST-speech2text-translator.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
.idea/workspace.xml ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="71b24c87-38d2-4265-a415-d8cbcc4803e1" name="Default Changelist" comment="">
5
+ <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
6
+ </list>
7
+ <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
8
+ <option name="SHOW_DIALOG" value="false" />
9
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
10
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
11
+ <option name="LAST_RESOLUTION" value="IGNORE" />
12
+ </component>
13
+ <component name="FileEditorManager">
14
+ <leaf>
15
+ <file pinned="false" current-in-tab="false">
16
+ <entry file="file://$PROJECT_DIR$/README.md">
17
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
18
+ <state split_layout="SPLIT">
19
+ <first_editor relative-caret-position="45">
20
+ <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" />
21
+ </first_editor>
22
+ <second_editor />
23
+ </state>
24
+ </provider>
25
+ </entry>
26
+ </file>
27
+ <file pinned="false" current-in-tab="true">
28
+ <entry file="file://$PROJECT_DIR$/app.py">
29
+ <provider selected="true" editor-type-id="text-editor">
30
+ <state relative-caret-position="331">
31
+ <caret line="29" column="35" selection-start-line="29" selection-start-column="35" selection-end-line="29" selection-end-column="35" />
32
+ <folding>
33
+ <element signature="e#124#130#0" expanded="true" />
34
+ </folding>
35
+ </state>
36
+ </provider>
37
+ </entry>
38
+ </file>
39
+ </leaf>
40
+ </component>
41
+ <component name="FileTemplateManagerImpl">
42
+ <option name="RECENT_TEMPLATES">
43
+ <list>
44
+ <option value="Python Script" />
45
+ </list>
46
+ </option>
47
+ </component>
48
+ <component name="Git.Settings">
49
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
50
+ </component>
51
+ <component name="IdeDocumentHistory">
52
+ <option name="CHANGED_PATHS">
53
+ <list>
54
+ <option value="$PROJECT_DIR$/README.md" />
55
+ <option value="$PROJECT_DIR$/app.py" />
56
+ </list>
57
+ </option>
58
+ </component>
59
+ <component name="ProjectConfigurationFiles">
60
+ <option name="files">
61
+ <list>
62
+ <option value="$PROJECT_DIR$/.idea/ConST-speech2text-translator.iml" />
63
+ <option value="$PROJECT_DIR$/.idea/misc.xml" />
64
+ <option value="$PROJECT_DIR$/.idea/vcs.xml" />
65
+ <option value="$PROJECT_DIR$/.idea/modules.xml" />
66
+ <option value="$PROJECT_DIR$/.idea/dictionaries/yerong.xml" />
67
+ </list>
68
+ </option>
69
+ </component>
70
+ <component name="ProjectFrameBounds">
71
+ <option name="y" value="25" />
72
+ <option name="width" value="1440" />
73
+ <option name="height" value="812" />
74
+ </component>
75
+ <component name="ProjectView">
76
+ <navigator proportions="" version="1">
77
+ <foldersAlwaysOnTop value="true" />
78
+ </navigator>
79
+ <panes>
80
+ <pane id="ProjectPane">
81
+ <subPane>
82
+ <expand>
83
+ <path>
84
+ <item name="ConST-speech2text-translator" type="b2602c69:ProjectViewProjectNode" />
85
+ <item name="ConST-speech2text-translator" type="462c0819:PsiDirectoryNode" />
86
+ </path>
87
+ <path>
88
+ <item name="ConST-speech2text-translator" type="b2602c69:ProjectViewProjectNode" />
89
+ <item name="ConST-speech2text-translator" type="462c0819:PsiDirectoryNode" />
90
+ <item name="results" type="462c0819:PsiDirectoryNode" />
91
+ </path>
92
+ </expand>
93
+ <select />
94
+ </subPane>
95
+ </pane>
96
+ <pane id="Scope" />
97
+ </panes>
98
+ </component>
99
+ <component name="PropertiesComponent">
100
+ <property name="WebServerToolWindowFactoryState" value="false" />
101
+ <property name="last_opened_file_path" value="$PROJECT_DIR$" />
102
+ <property name="node.js.detected.package.eslint" value="true" />
103
+ <property name="node.js.detected.package.tslint" value="true" />
104
+ <property name="node.js.path.for.package.eslint" value="project" />
105
+ <property name="node.js.path.for.package.tslint" value="project" />
106
+ <property name="node.js.selected.package.eslint" value="(autodetect)" />
107
+ <property name="node.js.selected.package.tslint" value="(autodetect)" />
108
+ <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
109
+ <property name="nodejs_npm_path_reset_for_default_project" value="true" />
110
+ <property name="settings.editor.selected.configurable" value="preferences.keymap" />
111
+ </component>
112
+ <component name="RunDashboard">
113
+ <option name="ruleStates">
114
+ <list>
115
+ <RuleState>
116
+ <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
117
+ </RuleState>
118
+ <RuleState>
119
+ <option name="name" value="StatusDashboardGroupingRule" />
120
+ </RuleState>
121
+ </list>
122
+ </option>
123
+ </component>
124
+ <component name="SvnConfiguration">
125
+ <configuration />
126
+ </component>
127
+ <component name="TaskManager">
128
+ <task active="true" id="Default" summary="Default task">
129
+ <changelist id="71b24c87-38d2-4265-a415-d8cbcc4803e1" name="Default Changelist" comment="" />
130
+ <created>1652505218797</created>
131
+ <option name="number" value="Default" />
132
+ <option name="presentableId" value="Default" />
133
+ <updated>1652505218797</updated>
134
+ <workItem from="1652505222853" duration="2924000" />
135
+ </task>
136
+ <servers />
137
+ </component>
138
+ <component name="TimeTrackingManager">
139
+ <option name="totallyTimeSpent" value="2924000" />
140
+ </component>
141
+ <component name="ToolWindowManager">
142
+ <frame x="0" y="25" width="1440" height="812" extended-state="0" />
143
+ <editor active="true" />
144
+ <layout>
145
+ <window_info id="Favorites" side_tool="true" />
146
+ <window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.24964234" />
147
+ <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
148
+ <window_info anchor="bottom" id="Docker" show_stripe_button="false" />
149
+ <window_info anchor="bottom" id="Database Changes" />
150
+ <window_info anchor="bottom" id="Version Control" weight="0.32913166" />
151
+ <window_info anchor="bottom" id="Python Console" />
152
+ <window_info active="true" anchor="bottom" id="Terminal" visible="true" weight="0.3557423" />
153
+ <window_info anchor="bottom" id="Event Log" side_tool="true" />
154
+ <window_info anchor="bottom" id="Message" order="0" />
155
+ <window_info anchor="bottom" id="Find" order="1" />
156
+ <window_info anchor="bottom" id="Run" order="2" />
157
+ <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
158
+ <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
159
+ <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
160
+ <window_info anchor="bottom" id="TODO" order="6" />
161
+ <window_info anchor="right" id="SciView" />
162
+ <window_info anchor="right" id="Database" />
163
+ <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
164
+ <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
165
+ <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
166
+ </layout>
167
+ </component>
168
+ <component name="TypeScriptGeneratedFilesManager">
169
+ <option name="version" value="1" />
170
+ </component>
171
+ <component name="editorHistoryManager">
172
+ <entry file="file://$PROJECT_DIR$/README.md">
173
+ <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
174
+ <state split_layout="SPLIT">
175
+ <first_editor relative-caret-position="45">
176
+ <caret line="3" column="6" selection-start-line="3" selection-start-column="6" selection-end-line="3" selection-end-column="6" />
177
+ </first_editor>
178
+ <second_editor />
179
+ </state>
180
+ </provider>
181
+ </entry>
182
+ <entry file="file://$PROJECT_DIR$/app.py">
183
+ <provider selected="true" editor-type-id="text-editor">
184
+ <state relative-caret-position="331">
185
+ <caret line="29" column="35" selection-start-line="29" selection-start-column="35" selection-end-line="29" selection-end-column="35" />
186
+ <folding>
187
+ <element signature="e#124#130#0" expanded="true" />
188
+ </folding>
189
+ </state>
190
+ </provider>
191
+ </entry>
192
+ </component>
193
+ </project>
app.py CHANGED
@@ -1,9 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
 
 
 
3
 
4
- def greet(name):
5
- return "Hello " + name + "!!"
6
 
 
 
 
 
7
 
8
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
9
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """
4
+ @Author : Rong Ye
5
+ @Time : May 2022
6
+ @Contact : yerong@bytedance
7
+ @Description:
8
+ """
9
+
10
+ import os
11
+ import shutil
12
+ import yaml
13
+ import torchaudio
14
  import gradio as gr
15
+ from huggingface_hub import snapshot_download
16
+
17
+
18
+ LANGUAGE_CODES = {
19
+ "German": "de",
20
+ "Spanish": "es",
21
+ "French": "fr",
22
+ "Italian": "it",
23
+ "Netherlands": "nl",
24
+ "Portuguese": "pt",
25
+ "Romanian": "ro",
26
+ "Russian": "ru",
27
+ }
28
+
29
+ LANG_GEN_SETUPS = {
30
+ "de": {"beam": 10, "lenpen": 0.7},
31
+ "es": {"beam": 10, "lenpen": 0.7},
32
+ "fr": {"beam": 10, "lenpen": 0.7},
33
+ "it": {"beam": 10, "lenpen": 0.7},
34
+ "nl": {"beam": 10, "lenpen": 0.7},
35
+ "pt": {"beam": 10, "lenpen": 0.7},
36
+ "ro": {"beam": 10, "lenpen": 0.7},
37
+ "ru": {"beam": 10, "lenpen": 0.1},
38
+ }
39
+
40
+ os.system("git clone https://github.com/ReneeYe/ConST")
41
+ os.system('mv ConST/* ./')
42
+ os.system("pip3 install -r requirements.txt")
43
+ os.system("python3 setup.py install")
44
+ os.system("python3 ConST/setup.py build_ext --inplace")
45
+ os.system("mkdir -p data checkpoint")
46
+
47
+
48
+ huggingface_model_dir = snapshot_download(repo_id="ReneeYe/ConST_en2x_models")
49
+ print(huggingface_model_dir)
50
+
51
+ def convert_audio_to_16k_wav(audio_input):
52
+ num_frames = torchaudio.info(audio_input.name).num_frames
53
+ filename = audio_input.name.split("/")[-1]
54
+ shutil.copy(audio_input.name, f'data/{filename}')
55
+ return f'data/{filename}', num_frames
56
+
57
+
58
+ def prepare_tsv(file_name, n_frame, language, task="ST"):
59
+ tgt_lang = LANGUAGE_CODES[language]
60
+ with open("data/test_case.tsv", "w") as f:
61
+ f.write("id\taudio\tn_frames\ttgt_text\tspeaker\tsrc_lang\ttgt_lang\tsrc_text\n")
62
+ f.write(f"sample\t{file_name}\t{n_frame}\tThis is in {tgt_lang}.\tspk.1\ten\t{tgt_lang}\tThis is English.\n")
63
+
64
+
65
+ def get_vocab_and_yaml(language):
66
+ tgt_lang = LANGUAGE_CODES[language]
67
+ # get: spm_ende.model and spm_ende.txt, and save to data/xxx
68
+ # if exist, no need to download
69
+ shutil.copy(os.path.join(huggingface_model_dir, f"vocabulary/spm_en{tgt_lang}.model"), "./data")
70
+ shutil.copy(os.path.join(huggingface_model_dir, f"vocabulary/spm_en{tgt_lang}.txt"), "./data")
71
+
72
+ # write yaml file
73
+ abs_path = os.popen("pwd").read().strip()
74
+ yaml_dict = LANG_GEN_SETUPS["tgt_lang"]
75
+ yaml_dict["input_channels"] = 1
76
+ yaml_dict["use_audio_input"] = True
77
+ yaml_dict["prepend_tgt_lang_tag"] = True
78
+ yaml_dict["prepend_src_lang_tag"] = True
79
+ yaml_dict["audio_root"] = os.path.join(abs_path, "data")
80
+ yaml_dict["vocab_filename"] = f"spm_en{tgt_lang}.txt"
81
+ yaml_dict["bpe_tokenizer"] = {"bpe": "sentencepiece",
82
+ "sentencepiece_model": os.path.join(abs_path, f"data/spm_en{tgt_lang}.model")}
83
+ with open("data/config.yaml", "w") as f:
84
+ yaml.dump(yaml_dict, f)
85
+
86
+
87
+ def get_model(language):
88
+ # download models to checkpoint/xxx
89
+ return os.path.join(huggingface_model_dir, f"models/const_en{LANGUAGE_CODES[language]}.pt")
90
+
91
+
92
+ def generate(model_path):
93
+ os.system(f"fairseq-generate data/ --gen-subset test_case --task speech_to_text --prefix-size 1 \
94
+ --max-tokens 4000000 --max-source-positions 4000000 \
95
+ --config-yaml config.yaml --path {model_path} | tee temp.txt")
96
+ output = os.popen("grep ^D temp.txt | sort -n -k 2 -t '-' | cut -f 3")
97
+ return output.read().strip()
98
+
99
+
100
+ def remove_temp_files():
101
+ os.remove("temp.txt")
102
+ os.remove("data/test_case.tsv")
103
+
104
+
105
+ def run(audio_file, language):
106
+ converted_audio_file, n_frame = convert_audio_to_16k_wav(audio_file)
107
+ prepare_tsv(converted_audio_file, n_frame, language)
108
+ get_vocab_and_yaml(language)
109
+ model_path = get_model(language)
110
+ generated_output = generate(model_path)
111
+ remove_temp_files()
112
+ return generated_output
113
+
114
 
115
+ def greet(audio_file, language):
116
+ print(audio_file.name)
117
+ return f"Hello {language}!!"
118
 
 
 
119
 
120
+ inputs = [
121
+ gr.inputs.Audio(source="microphone", type="file", label="Record something (in English)..."),
122
+ gr.inputs.Dropdown(list(LANGUAGE_CODES.keys()), default="German", label="From English to Languages X..."),
123
+ ]
124
 
125
+ iface = gr.Interface(
126
+ fn=run,
127
+ inputs=inputs,
128
+ outputs=[gr.outputs.Textbox(label="The translation")],
129
+ examples=[['case1.wav', "German"],['case2.wav', "German"], ['case3.wav', "German"]],
130
+ title="ConST: an end-to-end speech translator",
131
+ description="End-to-end Speech Translation Live Demo for English to eight European languages.",
132
+ article="ConST is an end-to-end speech translation model (see paper <a href='https://arxiv.org/abs/2205.02444', target='_blank'>here</a>). "
133
+ "Its motivation is to use contrastive learning method to learn similar representations for semantically similar speech and text.",
134
+ theme="seafoam",
135
+ layout='vertical',
136
+ # analytics_enabled=False,
137
+ # flagging_dir='results/flagged/',
138
+ # allow_flagging=True,
139
+ # flagging_options=['Interesting!', 'Error: Claim Phrase Parsing', 'Error: Local Premise',
140
+ # 'Error: Require Commonsense', 'Error: Evidence Retrieval'],
141
+ enable_queue=True
142
+ )
143
+ iface.launch(inline=False)
case1.wav ADDED
Binary file (108 kB). View file
 
case2.wav ADDED
Binary file (125 kB). View file
 
case3.wav ADDED
Binary file (235 kB). View file
 
gradio_queue.db ADDED
File without changes