cahya commited on
Commit
ec50559
1 Parent(s): 2f2d456

clean up the repo

Browse files
.gitignore CHANGED
@@ -1 +1,2 @@
1
- checkpoint-*/
 
 
1
+ checkpoint-*/
2
+ .idea/
.idea/.gitignore DELETED
File without changes
.idea/inspectionProfiles/profiles_settings.xml DELETED
@@ -1,6 +0,0 @@
1
- <component name="InspectionProjectProfileManager">
2
- <settings>
3
- <option name="USE_PROJECT_PROFILE" value="false" />
4
- <version value="1.0" />
5
- </settings>
6
- </component>
 
 
 
 
 
 
 
.idea/misc.xml DELETED
@@ -1,4 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
4
- </project>
 
 
 
 
 
.idea/modules.xml DELETED
@@ -1,8 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectModuleManager">
4
- <modules>
5
- <module fileurl="file://$PROJECT_DIR$/.idea/wav2vec2-base-turkish.iml" filepath="$PROJECT_DIR$/.idea/wav2vec2-base-turkish.iml" />
6
- </modules>
7
- </component>
8
- </project>
 
 
 
 
 
 
 
 
 
.idea/vcs.xml DELETED
@@ -1,8 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="VcsDirectoryMappings">
4
- <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
- <mapping directory="$PROJECT_DIR$/output" vcs="Git" />
6
- <mapping directory="$PROJECT_DIR$/wav2vec2-base-turkish" vcs="Git" />
7
- </component>
8
- </project>
 
 
 
 
 
 
 
 
 
.idea/wav2vec2-base-turkish.iml DELETED
@@ -1,12 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module type="PYTHON_MODULE" version="4">
3
- <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$" />
5
- <orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
6
- <orderEntry type="sourceFolder" forTests="false" />
7
- </component>
8
- <component name="PyDocumentationSettings">
9
- <option name="format" value="PLAIN" />
10
- <option name="myDocStringFormat" value="Plain" />
11
- </component>
12
- </module>
 
 
 
 
 
 
 
 
 
 
 
 
 
.idea/workspace.xml DELETED
@@ -1,234 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="AutoImportSettings">
4
- <option name="autoReloadType" value="SELECTIVE" />
5
- </component>
6
- <component name="ChangeListManager">
7
- <list default="true" id="c9169370-1a11-41d7-9648-02694630edd2" name="Changes" comment="">
8
- <change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
9
- <change beforePath="$PROJECT_DIR$/wav2vec2-base-turkish/preprocessor_config.json" beforeDir="false" afterPath="$PROJECT_DIR$/wav2vec2-base-turkish/preprocessor_config.json" afterDir="false" />
10
- <change beforePath="$PROJECT_DIR$/wav2vec2-base-turkish/special_tokens_map.json" beforeDir="false" afterPath="$PROJECT_DIR$/wav2vec2-base-turkish/special_tokens_map.json" afterDir="false" />
11
- <change beforePath="$PROJECT_DIR$/wav2vec2-base-turkish/tokenizer_config.json" beforeDir="false" afterPath="$PROJECT_DIR$/wav2vec2-base-turkish/tokenizer_config.json" afterDir="false" />
12
- </list>
13
- <option name="SHOW_DIALOG" value="false" />
14
- <option name="HIGHLIGHT_CONFLICTS" value="true" />
15
- <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
16
- <option name="LAST_RESOLUTION" value="IGNORE" />
17
- </component>
18
- <component name="FileTemplateManagerImpl">
19
- <option name="RECENT_TEMPLATES">
20
- <list>
21
- <option value="Python Script" />
22
- </list>
23
- </option>
24
- </component>
25
- <component name="Git.Settings">
26
- <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
27
- </component>
28
- <component name="MarkdownSettingsMigration">
29
- <option name="stateVersion" value="1" />
30
- </component>
31
- <component name="ProjectId" id="24KJYqb7IjKGUSNqPXUOjBSp5gx" />
32
- <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
33
- <component name="ProjectViewState">
34
- <option name="hideEmptyMiddlePackages" value="true" />
35
- <option name="showLibraryContents" value="true" />
36
- </component>
37
- <component name="PropertiesComponent">
38
- <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
39
- <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
40
- <property name="WebServerToolWindowFactoryState" value="false" />
41
- <property name="last_opened_file_path" value="$PROJECT_DIR$/../wav2vec2-base-turkish-artificial-cv" />
42
- <property name="node.js.detected.package.eslint" value="true" />
43
- <property name="node.js.detected.package.tslint" value="true" />
44
- <property name="node.js.selected.package.eslint" value="(autodetect)" />
45
- <property name="node.js.selected.package.tslint" value="(autodetect)" />
46
- <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
47
- </component>
48
- <component name="RecentsManager">
49
- <key name="CopyFile.RECENT_KEYS">
50
- <recent name="$PROJECT_DIR$/language_model" />
51
- </key>
52
- </component>
53
- <component name="RunManager" selected="Python.eval">
54
- <configuration name="eval" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
55
- <module name="wav2vec2-base-turkish" />
56
- <option name="INTERPRETER_OPTIONS" value="" />
57
- <option name="PARENT_ENVS" value="true" />
58
- <envs>
59
- <env name="PYTHONUNBUFFERED" value="1" />
60
- </envs>
61
- <option name="SDK_HOME" value="" />
62
- <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
63
- <option name="IS_MODULE_SDK" value="true" />
64
- <option name="ADD_CONTENT_ROOTS" value="true" />
65
- <option name="ADD_SOURCE_ROOTS" value="true" />
66
- <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
67
- <option name="SCRIPT_NAME" value="$PROJECT_DIR$/eval.py" />
68
- <option name="PARAMETERS" value="--model_id cahya/wav2vec2-base-turkish --dataset ./dataset/common_voice --config tr --data_dir /mnt/mldata/data/ASR/turkish/cv-corpus-6.1-2020-12-11 --split test --log_outputs" />
69
- <option name="SHOW_COMMAND_LINE" value="false" />
70
- <option name="EMULATE_TERMINAL" value="false" />
71
- <option name="MODULE_MODE" value="false" />
72
- <option name="REDIRECT_INPUT" value="false" />
73
- <option name="INPUT_FILE" value="" />
74
- <method v="2" />
75
- </configuration>
76
- <configuration name="ngram" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
77
- <module name="wav2vec2-base-turkish" />
78
- <option name="INTERPRETER_OPTIONS" value="" />
79
- <option name="PARENT_ENVS" value="true" />
80
- <envs>
81
- <env name="PYTHONUNBUFFERED" value="1" />
82
- </envs>
83
- <option name="SDK_HOME" value="" />
84
- <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
85
- <option name="IS_MODULE_SDK" value="true" />
86
- <option name="ADD_CONTENT_ROOTS" value="true" />
87
- <option name="ADD_SOURCE_ROOTS" value="true" />
88
- <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
89
- <option name="SCRIPT_NAME" value="$PROJECT_DIR$/ngram.py" />
90
- <option name="PARAMETERS" value="" />
91
- <option name="SHOW_COMMAND_LINE" value="false" />
92
- <option name="EMULATE_TERMINAL" value="false" />
93
- <option name="MODULE_MODE" value="false" />
94
- <option name="REDIRECT_INPUT" value="false" />
95
- <option name="INPUT_FILE" value="" />
96
- <method v="2" />
97
- </configuration>
98
- <configuration name="run_speech_recognition_ctc" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
99
- <module name="wav2vec2-base-turkish" />
100
- <option name="INTERPRETER_OPTIONS" value="" />
101
- <option name="PARENT_ENVS" value="true" />
102
- <envs>
103
- <env name="PYTHONUNBUFFERED" value="1" />
104
- </envs>
105
- <option name="SDK_HOME" value="" />
106
- <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
107
- <option name="IS_MODULE_SDK" value="true" />
108
- <option name="ADD_CONTENT_ROOTS" value="true" />
109
- <option name="ADD_SOURCE_ROOTS" value="true" />
110
- <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
111
- <option name="SCRIPT_NAME" value="$PROJECT_DIR$/run_speech_recognition_ctc.py" />
112
- <option name="PARAMETERS" value="--dataset_name=&quot;common_voice&quot; --model_name_or_path=&quot;cahya/wav2vec2-base-turkish-artificial-cv&quot; --dataset_config_name=&quot;tr&quot; --output_dir=&quot;./output&quot; --overwrite_output_dir --num_train_epochs=&quot;1&quot; --per_device_train_batch_size=&quot;2&quot; --per_device_eval_batch_size=&quot;2&quot; --gradient_accumulation_steps=&quot;4&quot; --learning_rate=&quot;7.5e-7&quot; --warmup_steps=&quot;2000&quot; --length_column_name=&quot;input_length&quot; --evaluation_strategy=&quot;steps&quot; --text_column_name=&quot;sentence&quot; --save_steps=&quot;500&quot; --eval_steps=&quot;500&quot; --logging_steps=&quot;100&quot; --layerdrop=&quot;0.0&quot; --activation_dropout=&quot;0.1&quot; --save_total_limit=&quot;3&quot; --freeze_feature_encoder --feat_proj_dropout=&quot;0.0&quot; --mask_time_prob=&quot;0.75&quot; --mask_time_length=&quot;10&quot; --mask_feature_prob=&quot;0.25&quot; --mask_feature_length=&quot;64&quot; --gradient_checkpointing --use_auth_token --fp16=false --group_by_length --do_train=true --do_eval=true --push_to_hub --chars_to_ignore , ? . ! \; \: \&quot;\&quot; \% \' \&quot; \' \' \` … \’ » « \‘ '“' '”' � é û" />
113
- <option name="SHOW_COMMAND_LINE" value="false" />
114
- <option name="EMULATE_TERMINAL" value="false" />
115
- <option name="MODULE_MODE" value="false" />
116
- <option name="REDIRECT_INPUT" value="false" />
117
- <option name="INPUT_FILE" value="" />
118
- <method v="2" />
119
- </configuration>
120
- <configuration name="test-vocab" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
121
- <module name="wav2vec2-base-turkish" />
122
- <option name="INTERPRETER_OPTIONS" value="" />
123
- <option name="PARENT_ENVS" value="true" />
124
- <envs>
125
- <env name="PYTHONUNBUFFERED" value="1" />
126
- </envs>
127
- <option name="SDK_HOME" value="" />
128
- <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
129
- <option name="IS_MODULE_SDK" value="true" />
130
- <option name="ADD_CONTENT_ROOTS" value="true" />
131
- <option name="ADD_SOURCE_ROOTS" value="true" />
132
- <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
133
- <option name="SCRIPT_NAME" value="$PROJECT_DIR$/test-vocab.py" />
134
- <option name="PARAMETERS" value="" />
135
- <option name="SHOW_COMMAND_LINE" value="false" />
136
- <option name="EMULATE_TERMINAL" value="false" />
137
- <option name="MODULE_MODE" value="false" />
138
- <option name="REDIRECT_INPUT" value="false" />
139
- <option name="INPUT_FILE" value="" />
140
- <method v="2" />
141
- </configuration>
142
- <recent_temporary>
143
- <list>
144
- <item itemvalue="Python.ngram" />
145
- <item itemvalue="Python.test-vocab" />
146
- </list>
147
- </recent_temporary>
148
- </component>
149
- <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
150
- <component name="TaskManager">
151
- <task active="true" id="Default" summary="Default task">
152
- <changelist id="c9169370-1a11-41d7-9648-02694630edd2" name="Changes" comment="" />
153
- <created>1643373395175</created>
154
- <option name="number" value="Default" />
155
- <option name="presentableId" value="Default" />
156
- <updated>1643373395175</updated>
157
- <workItem from="1643373396354" duration="4135000" />
158
- <workItem from="1643465640987" duration="10417000" />
159
- <workItem from="1643646039461" duration="615000" />
160
- <workItem from="1643706833181" duration="7235000" />
161
- </task>
162
- <task id="LOCAL-00001" summary="update unigrams.txt">
163
- <created>1643492393885</created>
164
- <option name="number" value="00001" />
165
- <option name="presentableId" value="LOCAL-00001" />
166
- <option name="project" value="LOCAL" />
167
- <updated>1643492393885</updated>
168
- </task>
169
- <task id="LOCAL-00002" summary="Add check_sound.ipynb">
170
- <created>1643710330931</created>
171
- <option name="number" value="00002" />
172
- <option name="presentableId" value="LOCAL-00002" />
173
- <option name="project" value="LOCAL" />
174
- <updated>1643710330931</updated>
175
- </task>
176
- <option name="localTasksCounter" value="3" />
177
- <servers />
178
- </component>
179
- <component name="TypeScriptGeneratedFilesManager">
180
- <option name="version" value="3" />
181
- </component>
182
- <component name="VcsManagerConfiguration">
183
- <MESSAGE value="update unigrams.txt" />
184
- <MESSAGE value="Add check_sound.ipynb" />
185
- <option name="LAST_COMMIT_MESSAGE" value="Add check_sound.ipynb" />
186
- </component>
187
- <component name="XDebuggerManager">
188
- <breakpoint-manager>
189
- <breakpoints>
190
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
191
- <url>file://$PROJECT_DIR$/run_speech_recognition_ctc.py</url>
192
- <line>747</line>
193
- <option name="timeStamp" value="1" />
194
- </line-breakpoint>
195
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
196
- <url>file://$PROJECT_DIR$/ngram.py</url>
197
- <line>5</line>
198
- <option name="timeStamp" value="2" />
199
- </line-breakpoint>
200
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
201
- <url>file://$PROJECT_DIR$/test-vocab.py</url>
202
- <line>5</line>
203
- <option name="timeStamp" value="3" />
204
- </line-breakpoint>
205
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
206
- <url>file://$PROJECT_DIR$/eval.py</url>
207
- <line>67</line>
208
- <option name="timeStamp" value="4" />
209
- </line-breakpoint>
210
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
211
- <url>file://$PROJECT_DIR$/eval.py</url>
212
- <line>86</line>
213
- <option name="timeStamp" value="5" />
214
- </line-breakpoint>
215
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
216
- <url>file://$PROJECT_DIR$/../transformers/src/transformers/pipelines/base.py</url>
217
- <line>1026</line>
218
- <option name="timeStamp" value="6" />
219
- </line-breakpoint>
220
- <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
221
- <url>file://$PROJECT_DIR$/../transformers/src/transformers/pipelines/automatic_speech_recognition.py</url>
222
- <line>312</line>
223
- <option name="timeStamp" value="7" />
224
- </line-breakpoint>
225
- </breakpoints>
226
- </breakpoint-manager>
227
- </component>
228
- <component name="com.intellij.coverage.CoverageDataManagerImpl">
229
- <SUITE FILE_PATH="coverage/wav2vec2_base_turkish$test_vocab.coverage" NAME="test-vocab Coverage Results" MODIFIED="1643475983769" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
230
- <SUITE FILE_PATH="coverage/wav2vec2_base_turkish$eval.coverage" NAME="eval Coverage Results" MODIFIED="1643720743098" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
231
- <SUITE FILE_PATH="coverage/wav2vec2_base_turkish$ngram.coverage" NAME="ngram Coverage Results" MODIFIED="1643492280791" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
232
- <SUITE FILE_PATH="coverage/wav2vec2_base_turkish$run_speech_recognition_ctc.coverage" NAME="run_speech_recognition_ctc Coverage Results" MODIFIED="1643376049209" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
233
- </component>
234
- </project>