davidkim205 commited on
Commit
577164e
·
verified ·
1 Parent(s): f3af1d6

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +160 -0
  2. .idea/.gitignore +3 -0
  3. .idea/inspectionProfiles/profiles_settings.xml +6 -0
  4. .idea/misc.xml +7 -0
  5. .idea/modules.xml +8 -0
  6. .idea/translation.iml +8 -0
  7. .idea/vcs.xml +6 -0
  8. .idea/workspace.xml +217 -0
  9. README.md +336 -8
  10. assets/iris-icon.jpeg +0 -0
  11. assets/plot-bleu-by-sentence-length.png +0 -0
  12. assets/plot-bleu-by-src.png +0 -0
  13. assets/plot-bleu.png +0 -0
  14. create_table.py +124 -0
  15. data/komt-1810k-test.jsonl +0 -0
  16. data/komt-dataset-length.jsonl +0 -0
  17. evaluate.py +116 -0
  18. leaderboard.py +68 -0
  19. model.py +175 -0
  20. models/TowerInstruct.py +47 -0
  21. models/gemma.py +81 -0
  22. models/gugugo.py +74 -0
  23. models/iris_7b.py +81 -0
  24. models/madlad400.py +39 -0
  25. models/mbart50.py +45 -0
  26. models/nllb200.py +48 -0
  27. models/synatra.py +63 -0
  28. requirements.txt +19 -0
  29. results/result-iris_7b-.jsonl +0 -0
  30. results/result-iris_7b-checkpoint-105000.jsonl +0 -0
  31. results/result-iris_7b-checkpoint-110000.jsonl +0 -0
  32. results/result-iris_7b-checkpoint-115000.jsonl +0 -0
  33. results/result-iris_7b-checkpoint-120000.jsonl +0 -0
  34. results/result-iris_7b-checkpoint-125000.jsonl +0 -0
  35. results/result-iris_7b-iris_7b.jsonl +0 -0
  36. results/result_self-google.jsonl +0 -0
  37. results_bleu/result_bleu-Synatra-7B-v0.3-Translation.jsonl +0 -0
  38. results_bleu/result_bleu-TowerInstruct.jsonl +0 -0
  39. results_bleu/result_bleu-azure.jsonl +0 -0
  40. results_bleu/result_bleu-deepl.jsonl +0 -0
  41. results_bleu/result_bleu-google.jsonl +0 -0
  42. results_bleu/result_bleu-gugugo.jsonl +0 -0
  43. results_bleu/result_bleu-iris_7b.jsonl +0 -0
  44. results_bleu/result_bleu-madlad400.jsonl +0 -0
  45. results_bleu/result_bleu-nllb200.jsonl +0 -0
  46. results_bleu/result_bleu-papago.jsonl +0 -0
  47. results_length/Gugugo-koen-7B-V1.1-result.jsonl +0 -0
  48. results_length/Synatra-7B-v0.3-Translation-result.jsonl +0 -0
  49. results_length/TowerInstruct-7B-v0.1-result.jsonl +0 -0
  50. results_length/azure-result.jsonl +0 -0
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Promptify" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="translation" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/translation.iml" filepath="$PROJECT_DIR$/.idea/translation.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/translation.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="translation" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
.idea/workspace.xml ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="AutoImportSettings">
4
+ <option name="autoReloadType" value="SELECTIVE" />
5
+ </component>
6
+ <component name="ChangeListManager">
7
+ <list default="true" id="3b99adef-9597-4d3b-ace1-b9d588ce4682" name="Changes" comment="">
8
+ <change beforePath="$PROJECT_DIR$/leaderboard.py" beforeDir="false" afterPath="$PROJECT_DIR$/leaderboard.py" afterDir="false" />
9
+ <change beforePath="$PROJECT_DIR$/model.py" beforeDir="false" afterPath="$PROJECT_DIR$/model.py" afterDir="false" />
10
+ <change beforePath="$PROJECT_DIR$/translation2.py" beforeDir="false" afterPath="$PROJECT_DIR$/translation2.py" afterDir="false" />
11
+ </list>
12
+ <option name="SHOW_DIALOG" value="false" />
13
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
14
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
15
+ <option name="LAST_RESOLUTION" value="IGNORE" />
16
+ </component>
17
+ <component name="FileTemplateManagerImpl">
18
+ <option name="RECENT_TEMPLATES">
19
+ <list>
20
+ <option value="Python Script" />
21
+ </list>
22
+ </option>
23
+ </component>
24
+ <component name="Git.Settings">
25
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
26
+ </component>
27
+ <component name="MarkdownSettingsMigration">
28
+ <option name="stateVersion" value="1" />
29
+ </component>
30
+ <component name="ProjectColorInfo">{
31
+ &quot;associatedIndex&quot;: 6
32
+ }</component>
33
+ <component name="ProjectId" id="2ccs1q2ofZLAMdjdjZEwtumXnHp" />
34
+ <component name="ProjectViewState">
35
+ <option name="hideEmptyMiddlePackages" value="true" />
36
+ <option name="showLibraryContents" value="true" />
37
+ </component>
38
+ <component name="PropertiesComponent">{
39
+ &quot;keyToString&quot;: {
40
+ &quot;Python.TowerInstruct.executor&quot;: &quot;Debug&quot;,
41
+ &quot;Python.evaluate.executor&quot;: &quot;Debug&quot;,
42
+ &quot;Python.gugugo.executor&quot;: &quot;Debug&quot;,
43
+ &quot;Python.madlad400.executor&quot;: &quot;Debug&quot;,
44
+ &quot;Python.mbart-large-50.executor&quot;: &quot;Debug&quot;,
45
+ &quot;Python.nllb-200-distilled.executor&quot;: &quot;Debug&quot;,
46
+ &quot;Python.synatra.executor&quot;: &quot;Debug&quot;,
47
+ &quot;Python.tokenizer.executor&quot;: &quot;Debug&quot;,
48
+ &quot;Python.translation.executor&quot;: &quot;Debug&quot;,
49
+ &quot;Python.translation2.executor&quot;: &quot;Debug&quot;,
50
+ &quot;Python.translation_conv.executor&quot;: &quot;Debug&quot;,
51
+ &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
52
+ &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
53
+ &quot;last_opened_file_path&quot;: &quot;/work/translation/models&quot;,
54
+ &quot;settings.editor.selected.configurable&quot;: &quot;com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable&quot;
55
+ }
56
+ }</component>
57
+ <component name="RecentsManager">
58
+ <key name="CopyFile.RECENT_KEYS">
59
+ <recent name="$PROJECT_DIR$/models" />
60
+ <recent name="$PROJECT_DIR$/assets" />
61
+ <recent name="$PROJECT_DIR$" />
62
+ <recent name="$PROJECT_DIR$/results" />
63
+ <recent name="$PROJECT_DIR$/data" />
64
+ </key>
65
+ <key name="MoveFile.RECENT_KEYS">
66
+ <recent name="$PROJECT_DIR$" />
67
+ <recent name="$PROJECT_DIR$/models" />
68
+ </key>
69
+ </component>
70
+ <component name="RunManager" selected="Python.evaluate">
71
+ <configuration name="evaluate" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
72
+ <module name="translation" />
73
+ <option name="ENV_FILES" value="" />
74
+ <option name="INTERPRETER_OPTIONS" value="" />
75
+ <option name="PARENT_ENVS" value="true" />
76
+ <envs>
77
+ <env name="PYTHONUNBUFFERED" value="1" />
78
+ </envs>
79
+ <option name="SDK_HOME" value="" />
80
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
81
+ <option name="IS_MODULE_SDK" value="true" />
82
+ <option name="ADD_CONTENT_ROOTS" value="true" />
83
+ <option name="ADD_SOURCE_ROOTS" value="true" />
84
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/evaluate.py" />
85
+ <option name="PARAMETERS" value="results" />
86
+ <option name="SHOW_COMMAND_LINE" value="false" />
87
+ <option name="EMULATE_TERMINAL" value="false" />
88
+ <option name="MODULE_MODE" value="false" />
89
+ <option name="REDIRECT_INPUT" value="false" />
90
+ <option name="INPUT_FILE" value="" />
91
+ <method v="2" />
92
+ </configuration>
93
+ <configuration name="synatra" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
94
+ <module name="translation" />
95
+ <option name="ENV_FILES" value="" />
96
+ <option name="INTERPRETER_OPTIONS" value="" />
97
+ <option name="PARENT_ENVS" value="true" />
98
+ <envs>
99
+ <env name="PYTHONUNBUFFERED" value="1" />
100
+ </envs>
101
+ <option name="SDK_HOME" value="" />
102
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/models" />
103
+ <option name="IS_MODULE_SDK" value="true" />
104
+ <option name="ADD_CONTENT_ROOTS" value="true" />
105
+ <option name="ADD_SOURCE_ROOTS" value="true" />
106
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/models/synatra.py" />
107
+ <option name="PARAMETERS" value="" />
108
+ <option name="SHOW_COMMAND_LINE" value="false" />
109
+ <option name="EMULATE_TERMINAL" value="false" />
110
+ <option name="MODULE_MODE" value="false" />
111
+ <option name="REDIRECT_INPUT" value="false" />
112
+ <option name="INPUT_FILE" value="" />
113
+ <method v="2" />
114
+ </configuration>
115
+ <configuration name="tokenizer" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
116
+ <module name="translation" />
117
+ <option name="ENV_FILES" value="" />
118
+ <option name="INTERPRETER_OPTIONS" value="" />
119
+ <option name="PARENT_ENVS" value="true" />
120
+ <envs>
121
+ <env name="PYTHONUNBUFFERED" value="1" />
122
+ </envs>
123
+ <option name="SDK_HOME" value="" />
124
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/utils" />
125
+ <option name="IS_MODULE_SDK" value="true" />
126
+ <option name="ADD_CONTENT_ROOTS" value="true" />
127
+ <option name="ADD_SOURCE_ROOTS" value="true" />
128
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/utils/tokenizer.py" />
129
+ <option name="PARAMETERS" value="" />
130
+ <option name="SHOW_COMMAND_LINE" value="false" />
131
+ <option name="EMULATE_TERMINAL" value="false" />
132
+ <option name="MODULE_MODE" value="false" />
133
+ <option name="REDIRECT_INPUT" value="false" />
134
+ <option name="INPUT_FILE" value="" />
135
+ <method v="2" />
136
+ </configuration>
137
+ <configuration name="translation" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
138
+ <module name="translation" />
139
+ <option name="ENV_FILES" value="" />
140
+ <option name="INTERPRETER_OPTIONS" value="" />
141
+ <option name="PARENT_ENVS" value="true" />
142
+ <envs>
143
+ <env name="PYTHONUNBUFFERED" value="1" />
144
+ <env name="CUDA_VISIBLE_DEVICES" value="1" />
145
+ </envs>
146
+ <option name="SDK_HOME" value="" />
147
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
148
+ <option name="IS_MODULE_SDK" value="true" />
149
+ <option name="ADD_CONTENT_ROOTS" value="true" />
150
+ <option name="ADD_SOURCE_ROOTS" value="true" />
151
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/translation.py" />
152
+ <option name="PARAMETERS" value="" />
153
+ <option name="SHOW_COMMAND_LINE" value="false" />
154
+ <option name="EMULATE_TERMINAL" value="false" />
155
+ <option name="MODULE_MODE" value="false" />
156
+ <option name="REDIRECT_INPUT" value="false" />
157
+ <option name="INPUT_FILE" value="" />
158
+ <method v="2" />
159
+ </configuration>
160
+ <configuration name="translation2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
161
+ <module name="translation" />
162
+ <option name="ENV_FILES" value="" />
163
+ <option name="INTERPRETER_OPTIONS" value="" />
164
+ <option name="PARENT_ENVS" value="true" />
165
+ <envs>
166
+ <env name="PYTHONUNBUFFERED" value="1" />
167
+ </envs>
168
+ <option name="SDK_HOME" value="" />
169
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
170
+ <option name="IS_MODULE_SDK" value="true" />
171
+ <option name="ADD_CONTENT_ROOTS" value="true" />
172
+ <option name="ADD_SOURCE_ROOTS" value="true" />
173
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/translation2.py" />
174
+ <option name="PARAMETERS" value="" />
175
+ <option name="SHOW_COMMAND_LINE" value="false" />
176
+ <option name="EMULATE_TERMINAL" value="false" />
177
+ <option name="MODULE_MODE" value="false" />
178
+ <option name="REDIRECT_INPUT" value="false" />
179
+ <option name="INPUT_FILE" value="" />
180
+ <method v="2" />
181
+ </configuration>
182
+ <list>
183
+ <item itemvalue="Python.evaluate" />
184
+ <item itemvalue="Python.tokenizer" />
185
+ <item itemvalue="Python.translation2" />
186
+ <item itemvalue="Python.synatra" />
187
+ <item itemvalue="Python.translation" />
188
+ </list>
189
+ <recent_temporary>
190
+ <list>
191
+ <item itemvalue="Python.evaluate" />
192
+ <item itemvalue="Python.translation2" />
193
+ <item itemvalue="Python.tokenizer" />
194
+ <item itemvalue="Python.translation" />
195
+ <item itemvalue="Python.synatra" />
196
+ </list>
197
+ </recent_temporary>
198
+ </component>
199
+ <component name="SharedIndexes">
200
+ <attachedChunks>
201
+ <set>
202
+ <option value="bundled-python-sdk-09665e90c3a7-d3b881c8e49f-com.jetbrains.pycharm.community.sharedIndexes.bundled-PC-233.15026.15" />
203
+ </set>
204
+ </attachedChunks>
205
+ </component>
206
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
207
+ <component name="TaskManager">
208
+ <task active="true" id="Default" summary="Default task">
209
+ <changelist id="3b99adef-9597-4d3b-ace1-b9d588ce4682" name="Changes" comment="" />
210
+ <created>1708421988537</created>
211
+ <option name="number" value="Default" />
212
+ <option name="presentableId" value="Default" />
213
+ <updated>1708421988537</updated>
214
+ </task>
215
+ <servers />
216
+ </component>
217
+ </project>
README.md CHANGED
@@ -1,12 +1,340 @@
1
  ---
2
- title: Ko Translation Leaderbaord
3
- emoji: 🔥
4
- colorFrom: blue
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.25.0
8
- app_file: app.py
9
- pinned: false
10
  ---
 
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ko-translation-leaderbaord
3
+ app_file: leaderboard.py
 
 
4
  sdk: gradio
5
+ sdk_version: 3.50.2
 
 
6
  ---
7
+ # Iris Translation
8
+ ![iris-icon.jpeg](assets%2Firis-icon.jpeg)
9
 
10
+ Welcome to Iris Translation, a project designed to evaluate Korean-to-English translation models. Our project provides a comprehensive framework for evaluating the Iris model that we have developed.
11
+
12
+
13
+
14
+ ## Models
15
+
16
+ 번역 품질을 비교하기 위해 사용한 모델입니다. 모두 실행 가능하며 결과를 확인할 수 있습니다.
17
+
18
+ - [davidkim205/iris-7b](https://huggingface.co/davidkim205/iris-7b)
19
+ - [squarelike/Gugugo-koen-7B-V1.1](https://huggingface.co/squarelike/Gugugo-koen-7B-V1.1)
20
+ - [maywell/Synatra-7B-v0.3-Translation](https://huggingface.co/maywell/Synatra-7B-v0.3-Translation)
21
+ - [Unbabel/TowerInstruct-7B-v0.1](https://huggingface.co/Unbabel/TowerInstruct-7B-v0.1)
22
+ - [jbochi/madlad400-10b-mt](https://huggingface.co/jbochi/madlad400-10b-mt)
23
+ - [facebook/mbart-large-50-many-to-many-mmt](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt)
24
+ - [facebook/nllb-200-distilled-1.3B](https://huggingface.co/facebook/nllb-200-distilled-1.3B)
25
+
26
+
27
+
28
+ ## Installation
29
+
30
+ ```
31
+ conda create -n translation python=3.10
32
+ conda activate translation
33
+
34
+ pip install -r requirements.txt
35
+ ```
36
+
37
+
38
+ ## Usage
39
+
40
+ 입력으로 주어지는 기본 파일은 `./data/komt-1810k-test.jsonl`입니다. 다음은 데이터의 JSON 스키마 예시입니다.
41
+
42
+ ```json
43
+ {
44
+ "conversations":[
45
+ {
46
+ "from":"human",
47
+ "value":"다음 문장을 한글로 번역하세요.\nLet's make a graph here showing different levels of interest in activities."
48
+ },
49
+ {
50
+ "from":"gpt",
51
+ "value":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다."
52
+ }
53
+ ],
54
+ "src":"aihub-MTPE"
55
+ }
56
+ ```
57
+
58
+ ### translate(Bleu)
59
+
60
+ 모델을 사용한 번역 결과와 실제 번역 결과를 비교하여 bleu score를 구합니다.
61
+
62
+ ```
63
+ python translation.py --model davidkim205/iris-7b
64
+ ```
65
+
66
+ 결과 파일의 경로는 `results_bleu/iris-7b-result.jsonl`입니다.
67
+
68
+ JSON 스키마 예시
69
+
70
+ - reference: 실제 정답 번역문
71
+ - generation: 모델이 생성한 번역문
72
+
73
+ ```json
74
+ {
75
+ "index":0,
76
+ "reference":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다.",
77
+ "generation":"여기서 활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다.",
78
+ "bleu":0.917,
79
+ "lang":"en",
80
+ "model":"davidkim205/iris-7b",
81
+ "src":"aihub-MTPE",
82
+ "conversations":[
83
+ {
84
+ "from":"human",
85
+ "value":"다음 문장을 한글로 번역하세요.\nLet's make a graph here showing different levels of interest in activities."
86
+ },
87
+ {
88
+ "from":"gpt",
89
+ "value":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다."
90
+ }
91
+ ]
92
+ }
93
+ ```
94
+
95
+ ### translate_self(SBleu)
96
+
97
+ 모델 번역 결과를 다시 번역하여 원문과의 bleu score를 비교합니다.
98
+
99
+ ```
100
+ python translation_self.py --model davidkim205/iris-7b
101
+ ```
102
+
103
+ 결과 파일의 경로는 `results_self/iris-7b-result.jsonl`입니다.
104
+
105
+ JSON 스키마 예시
106
+
107
+ - reference: 원문
108
+ - generation: 모델 재번역 결과
109
+ - generation1: 모델 번역문
110
+
111
+ ```json
112
+ {
113
+ "index":0,
114
+ "reference":"Let's make a graph here showing different levels of interest in activities.",
115
+ "generation":"let's create a graph that shows different levels of interest in activities here",
116
+ "generation1":"여기서 활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다.",
117
+ "bleu":0.49,
118
+ "lang":"en",
119
+ "model":"davidkim205/iris-7b",
120
+ "src":"aihub-MTPE",
121
+ "conversations":[
122
+ {
123
+ "from":"human",
124
+ "value":"다음 문장을 한글로 번역하세요.\nLet's make a graph here showing different levels of interest in activities."
125
+ },
126
+ {
127
+ "from":"gpt",
128
+ "value":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다."
129
+ }
130
+ ]
131
+ }
132
+ ```
133
+
134
+ ### translate2(Bleu and SBleu)
135
+ translate와 translate_self를 모두 수행하여 bleu 및 sbleu를 모두 비교할 수 있습니다.
136
+
137
+ ```
138
+ python translation2.py --model davidkim205/iris-7b
139
+ ```
140
+
141
+ - translate를 수행하여 `results_bleu/iris-7b-result.jsonl`에 저장
142
+ - translate_self를 수행하여 `results_self/iris-7b-result.jsonl`에 저장
143
+
144
+ 각 파일은 위에서 생성한 두 파일과 동일한 결과를 갖습니다.
145
+
146
+
147
+
148
+ ## Evaluation
149
+
150
+ 두 가지 방식으로 번역 결과를 검증합니다.
151
+
152
+ 1. 실제 번역과 모델 번역을 비교하여 평가
153
+
154
+ ```
155
+ python evaluate.py results_bleu/
156
+ ```
157
+
158
+ output
159
+
160
+ ```
161
+ bleu scores
162
+ result_bleu-nllb200.jsonl: 0.26, out_of_range_count=3, duplicate=1
163
+ result_bleu-madlad400.jsonl: 0.29, out_of_range_count=6, duplicate=3
164
+ result_bleu-TowerInstruct.jsonl: 0.32, out_of_range_count=9, duplicate=1
165
+ result_bleu-gugugo.jsonl: 0.32, out_of_range_count=3, duplicate=1
166
+ result_bleu-Synatra-7B-v0.3-Translation.jsonl: 0.35, out_of_range_count=2, duplicate=1
167
+ result_bleu-deepl.jsonl: 0.39, out_of_range_count=1, duplicate=0
168
+ result_bleu-azure.jsonl: 0.40, out_of_range_count=2, duplicate=0
169
+ result_bleu-google.jsonl: 0.40, out_of_range_count=3, duplicate=0
170
+ result_bleu-papago.jsonl: 0.43, out_of_range_count=3, duplicate=0
171
+ result_bleu-iris_7b.jsonl: 0.40, out_of_range_count=3, duplicate=0
172
+ ```
173
+
174
+ 2. 원문을 2번 번역(영->한->영)한 결과와 비교하여 평가
175
+
176
+ ```
177
+ python evaluate.py results_self/
178
+ ```
179
+
180
+ output
181
+
182
+ ```
183
+ bleu scores
184
+ result_self-nllb200.jsonl: 0.30, out_of_range_count=1, duplicate=1
185
+ result_self-gugugo.jsonl: 0.36, out_of_range_count=1, duplicate=1
186
+ result_self-madlad400.jsonl: 0.38, out_of_range_count=3, duplicate=2
187
+ result_self-TowerInstruct.jsonl: 0.39, out_of_range_count=3, duplicate=0
188
+ result_self-Synatra-7B-v0.3-Translation.jsonl: 0.41, out_of_range_count=2, duplicate=1
189
+ result_self-deepl.jsonl: 0.45, out_of_range_count=0, duplicate=0
190
+ result_self-papago.jsonl: 0.49, out_of_range_count=0, duplicate=0
191
+ result_self-azure.jsonl: 0.49, out_of_range_count=0, duplicate=1
192
+ result_self-google.jsonl: 0.49, out_of_range_count=0, duplicate=0
193
+ result_self-papago.jsonl: 0.51, out_of_range_count=0, duplicate=0
194
+ result_self-iris_7b.jsonl: 0.43, out_of_range_count=1, duplicate=0
195
+ ```
196
+
197
+ **평가 요소**
198
+
199
+ - BLEU: 실제 번역과 모델 번역의 bleu score 평균
200
+ - SBLEU: 원문과 재번역의 bleu score 평균
201
+ - Duplicate: 번역 시 중복된 텍스트를 생성하는 경우
202
+ - Length Exceeds: 모델 번역과 실제 번역 길이의 불일치(0.2 < length < 2 기준)
203
+
204
+ ### BLEU
205
+
206
+ 각 모델별로 평가한 결과입니다. iris-7b 모델의 평가는 아래와 같습니다.
207
+
208
+ - 모든 평가에서 기존 모델들보다 높은 번역 성능
209
+ - 평균적으로 클라우드 번역과 동일한 번역 성능
210
+ - 중복 문장 생성 및 길이 초과 문제는 클라우드 번역과 동일한 수준
211
+
212
+ ![plot-bleu.png](assets%2Fplot-bleu.png)
213
+
214
+ Duplicate(중복 문장 생성)와 Length Exceeds(길이 초과)는 translation(bleu)의 지표입니다.
215
+
216
+ | TYPE | Model | BLEU | SBLEU | Duplicate | Length Exceeds |
217
+ | ----------- | :---------------------------------- | ---- | ----- | --------- | -------------- |
218
+ | HuggingFace | facebook/nllb-200-distilled-1.3B | 0.26 | 0.30 | 1 | 3 |
219
+ | HuggingFace | jbochi/madlad400-10b-mt | 0.29 | 0.38 | 3 | 6 |
220
+ | HuggingFace | Unbabel/TowerInstruct-7B-v0.1 | 0.32 | 0.39 | 1 | 9 |
221
+ | HuggingFace | squarelike/Gugugo-koen-7B-V1.1 | 0.32 | 0.36 | 1 | 3 |
222
+ | HuggingFace | maywell/Synatra-7B-v0.3-Translation | 0.35 | 0.41 | 1 | 2 |
223
+ | Cloud | deepl | 0.39 | 0.45 | 0 | 1 |
224
+ | Cloud | azure | 0.40 | 0.49 | 0 | 3 |
225
+ | Cloud | google | 0.40 | 0.49 | 0 | 2 |
226
+ | Cloud | papago | 0.43 | 0.51 | 0 | 3 |
227
+ | HuggingFace | davidkim205/iris-7b (**ours**) | 0.40 | 0.43 | 0 | 3 |
228
+
229
+ * SBLEU: Self-evaluation BLEU
230
+
231
+ ### BLEU by source
232
+
233
+ 분야별로 테스트 데이터셋 번역 품질을 평가한 결과입니다. iris-7b 모델의 평가는 아래와 같습니다.
234
+
235
+ - 모든 분야에서 기존 번역모델을 압도하는 성능
236
+ - 많은 분야에서 클라우드 번역과 비슷하거나, 더 나은 성능
237
+ - 과학 분야, 신조어 분야의 번역 품질이 매우 우수
238
+
239
+ ![plot-bleu-by-src.png](assets%2Fplot-bleu-by-src.png)
240
+
241
+ | Type | Model | Average | MTPE | techsci2 | expertise | humanities | sharegpt-deepl-ko-translation | MT-new-corpus | socialsci | korean-parallel-corpora | parallel-translation | food | techsci | para_pat | speechtype-based-machine-translation | koopus100 | basicsci | broadcast-content | patent | colloquial |
242
+ | ----------- | :---------------------------------- | ------- | ---: | -------: | --------: | ---------: | ----------------------------: | ------------: | --------: | ----------------------: | -------------------: | ---: | ------: | -------: | -----------------------------------: | --------: | -------: | ----------------: | -----: | ---------: |
243
+ | HuggingFace | facebook/nllb-200-distilled-1.3B | 0.26 | 0.44 | 0.28 | 0.16 | 0.23 | 0.44 | 0.34 | 0.27 | 0.10 | 0.23 | 0.37 | 0.28 | 0.19 | 0.29 | 0.23 | 0.15 | 0.33 | 0.09 | 0.29 |
244
+ | HuggingFace | jbochi/madlad400-10b-mt | 0.29 | 0.45 | 0.29 | 0.20 | 0.29 | 0.40 | 0.36 | 0.39 | 0.12 | 0.22 | 0.46 | 0.30 | 0.23 | 0.48 | 0.23 | 0.19 | 0.36 | 0.01 | 0.33 |
245
+ | HuggingFace | Unbabel/TowerInstruct-7B-v0.1 | 0.32 | 0.46 | 0.33 | 0.28 | 0.27 | 0.30 | 0.39 | 0.37 | 0.14 | 0.35 | 0.47 | 0.39 | 0.29 | 0.41 | 0.21 | 0.22 | 0.36 | 0.15 | 0.33 |
246
+ | HuggingFace | squarelike/Gugugo-koen-7B-V1.1 | 0.32 | 0.46 | 0.27 | 0.28 | 0.22 | 0.66 | 0.33 | 0.36 | 0.10 | 0.29 | 0.45 | 0.34 | 0.24 | 0.42 | 0.22 | 0.23 | 0.42 | 0.20 | 0.26 |
247
+ | HuggingFace | maywell/Synatra-7B-v0.3-Translation | 0.35 | 0.43 | 0.36 | 0.27 | 0.23 | 0.70 | 0.37 | 0.31 | 0.13 | 0.34 | 0.52 | 0.35 | 0.29 | 0.44 | 0.21 | 0.24 | 0.46 | 0.28 | 0.37 |
248
+ | Cloud | deepl | 0.39 | 0.59 | 0.33 | 0.31 | 0.32 | 0.70 | 0.48 | 0.38 | 0.14 | 0.38 | 0.55 | 0.41 | 0.33 | 0.48 | 0.24 | 0.28 | 0.42 | 0.37 | 0.36 |
249
+ | Cloud | azure | 0.40 | 0.57 | 0.36 | 0.35 | 0.29 | 0.63 | 0.46 | 0.39 | 0.16 | 0.38 | 0.56 | 0.39 | 0.33 | 0.54 | 0.22 | 0.29 | 0.52 | 0.35 | 0.41 |
250
+ | Cloud | google | 0.40 | 0.62 | 0.39 | 0.32 | 0.32 | 0.60 | 0.45 | 0.45 | 0.14 | 0.38 | 0.59 | 0.43 | 0.34 | 0.45 | 0.22 | 0.28 | 0.47 | 0.39 | 0.36 |
251
+ | Cloud | papago | 0.43 | 0.56 | 0.43 | 0.41 | 0.30 | 0.55 | 0.58 | 0.56 | 0.16 | 0.37 | 0.67 | 0.52 | 0.35 | 0.53 | 0.21 | 0.35 | 0.45 | 0.37 | 0.46 |
252
+ | HuggingFace | davidkim205/iris-7b (**ours**) | 0.40 | 0.49 | 0.37 | 0.34 | 0.31 | 0.72 | 0.48 | 0.43 | 0.11 | 0.33 | 0.56 | 0.46 | 0.34 | 0.43 | 0.20 | 0.30 | 0.47 | 0.41 | 0.40 |
253
+
254
+ ### BLEU by sentence length
255
+
256
+ 텍스트의 길이에 따라 4구간으로 데이터를 50개씩 샘플링하여 번역한 평균 점수입니다. 평가에 사용된 데이터셋은 다음과 같습니다.
257
+
258
+ - `data/komt-dataset-100.jsonl`
259
+ - `data/komt-dataset-500.jsonl`
260
+ - `data/komt-dataset-1000.jsonl`
261
+ - `data/komt-dataset-1500.jsonl`
262
+
263
+ 번역 및 bleu score 결과는 `results_length/`아래에 저장되어 있습니다.
264
+
265
+ 놀랍게도, iris-7b 모델은 모든 구간에서 대부분의 클라우드 번역보다 높은 성능을 보입니다.
266
+
267
+ - ~100: (0, 100]
268
+ - ~500: (100, 500]
269
+ - ~1000: (500, 1000]
270
+ - ~1500: (1000, 1500]
271
+
272
+ ![plot-bleu-by-sentence-length.png](assets%2Fplot-bleu-by-sentence-length.png)
273
+
274
+ | Type | Model | Average | ~100(50) | ~500(50) | ~1000(50) | ~1500(50) |
275
+ | ----------- | :---------------------------------- | ------- | -------: | -------: | --------: | --------: |
276
+ | HuggingFace | facebook/nllb-200-distilled-1.3B | 0.24 | 0.31 | 0.31 | 0.22 | 0.13 |
277
+ | HuggingFace | jbochi/madlad400-10b-mt | 0.22 | 0.35 | 0.37 | 0.08 | 0.10 |
278
+ | HuggingFace | Unbabel/TowerInstruct-7B-v0.1 | 0.32 | 0.41 | 0.31 | 0.24 | 0.32 |
279
+ | HuggingFace | squarelike/Gugugo-koen-7B-V1.1 | 0.45 | 0.37 | 0.48 | 0.52 | 0.43 |
280
+ | HuggingFace | maywell/Synatra-7B-v0.3-Translation | 0.50 | 0.41 | 0.57 | 0.57 | 0.51 |
281
+ | Cloud | deepl | 0.53 | 0.44 | 0.56 | 0.64 | 0.50 |
282
+ | Cloud | azure | 0.47 | 0.46 | 0.47 | 0.52 | 0.44 |
283
+ | Cloud | google | 0.51 | 0.50 | 0.49 | 0.54 | 0.51 |
284
+ | Cloud | papago | 0.46 | 0.50 | 0.46 | 0.43 | 0.45 |
285
+ | HuggingFace | davidkim205/iris-7b (**ours**) | 0.56 | 0.51 | 0.58 | 0.62 | 0.54 |
286
+
287
+
288
+
289
+ ## test dataset info
290
+
291
+ 테스트 데이터셋은 18가지 분야의 데이터 10개로, 총 180개로 이루어져 있습니다.
292
+
293
+ `koopus100` 데이터셋은 길이가 짧고 원문과 번역문이 일치하지 않는 데이터가 존재하여 품질이 낮습니다.
294
+
295
+ ```
296
+ text: All right
297
+ translation: 별로 그럴 기분 아니야 - I'm not in the mood.
298
+
299
+ text: Do you have a fever?
300
+ translation: 뭐라고 했어?
301
+ ```
302
+
303
+ `korean-parallel-corpora` 데이터셋은 번역문에 한영이 혼용되거나, 잘못 번역되어 품질이 낮습니다.
304
+
305
+ ```
306
+ text: S. Korea mulls missile defense system 한국, 자체적 미사일 방어체계 수립 검토     2007.03
307
+ translation: South Korea maintains a mandatory draft system under which all able-bodied men over 20 must serve in the military for 24 to 27 months.
308
+
309
+ text: A United States intelligence agency has been collecting data on the phone calls of tens of millions of Americans, a report in USA Today has alleged.
310
+ translation: NSA collects Americans’phone clall data미 국가안보국, 미국민 통화 내용 수집2006.07
311
+
312
+ text: I see the guy as more like John Wayne, which is to say I don't like his politics but he's endearing in a strange, goofy, awkward way, and he did capture the imagination of the country,\" he said.
313
+ translation: 베트남전에 참전했던 스톤 감독은 비판적으로 호평을 받고 정치적인 성향이 많은 영화를 제작한 것으로 유명하다.
314
+
315
+ text: The Sahara is advancing into Ghana and Nigeria at the rate of 3,510 square kilometers per year.
316
+ translation: 카자흐스탄 또한 사막화로 인해 1980년 이후 농경지의 50%가 사라졌으며 사하라 사막은 매년 3510㎢씩 커져가며 가나와 나이지리아를 위협하고 있다.
317
+ ```
318
+
319
+ 아래 표에는 각 src의 비율과 개수, 설명이 정리되어 있습니다.
320
+
321
+ | src | ratio | description |
322
+ | ------------------------------------------ | ----- | ------------------------------------------------------------ |
323
+ | aihub-MTPE | 5.56% | 기계번역 품질 사후검증 데이터셋 |
324
+ | aihub-techsci2 | 5.56% | ICT, 전기/전자 등 기술과학 분야 한영 번역 데이터셋 |
325
+ | aihub-expertise | 5.56% | 의료, 금융, 스포츠 등 전문분야 한영 번역 데이터셋 |
326
+ | aihub-humanities | 5.56% | 인문학 분야 한영 번역 데이터셋 |
327
+ | sharegpt-deepl-ko-translation | 5.56% | shareGPT 데이터셋을 질답 형식에서 한영 번역 형식으로 변환한 데이터셋 |
328
+ | aihub-MT-new-corpus | 5.56% | 기계 번역 앱 구축용 한영 번역 데이터셋 |
329
+ | aihub-socialsci | 5.56% | 법률, 교육, 경제 등 사회과학 분야 한영 번역 데이터셋 |
330
+ | korean-parallel-corpora | 5.56% | 한영 번역 병렬 데이터셋 |
331
+ | aihub-parallel-translation | 5.56% | 발화 유형 및 분야별 한영 번역 데이터셋 |
332
+ | aihub-food | 5.56% | 식품 분야 영한 번역 데이터셋 |
333
+ | aihub-techsci | 5.56% | ICT, 전기/전자 등 기술과학 분야 한영 번역 데이터셋 |
334
+ | para_pat | 5.56% | ParaPat 데이터셋의 영어-한국어 subset |
335
+ | aihub-speechtype-based-machine-translation | 5.56% | 발화 유형별 영한 번역 데이터셋 |
336
+ | koopus100 | 5.56% | OPUS-100 데이터셋의 영어-한국어 subset |
337
+ | aihub-basicsci | 5.56% | 수학, 물리학 등 기초과학 분야 한영 번역 데이터셋 |
338
+ | aihub-broadcast-content | 5.56% | 방송 콘텐츠 분야 한영 번역 데이터셋 |
339
+ | aihub-patent | 5.56% | 특허명세서 영한 번역 데이터셋 |
340
+ | aihub-colloquial | 5.56% | 신조어, 약어 등을 포함하는 구어체 한영 번역 데이터셋 |
assets/iris-icon.jpeg ADDED
assets/plot-bleu-by-sentence-length.png ADDED
assets/plot-bleu-by-src.png ADDED
assets/plot-bleu.png ADDED
create_table.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+
4
+ from collections import defaultdict
5
+ from evaluate import is_duplicated, is_length_exceed, get_average
6
+ from utils.decorate import cloud_model, decorate_model_name
7
+ from utils.file_handler import load_json
8
+
9
+
10
+ # results_bleu/에서 집계
11
+ def aggregate_bleu(json_data, bleu_table, src_table, length_table):
12
+ duplicate_count = defaultdict(int)
13
+ length_exceeds_count = defaultdict(int)
14
+
15
+ for data in json_data:
16
+ src_table[data["model"]]["Average"].append(data["bleu"])
17
+ src_table[data["model"]][data["src"]].append(data["bleu"])
18
+ if is_duplicated(data["generation"]):
19
+ duplicate_count[data["model"]] += 1
20
+ if is_length_exceed(data["reference"], data["generation"]):
21
+ length_exceeds_count[data["model"]] += 1
22
+ for model, row in src_table.items():
23
+ src_table[model] = dict((attr, get_average(val)) for attr, val in row.items())
24
+
25
+ # bleu, duplicate, length exceeds 추가
26
+ for model in src_table:
27
+ bleu_table[model]["Average"].append(src_table[model]["Average"])
28
+ bleu_table[model]["Bleu"] = src_table[model]["Average"]
29
+ bleu_table[model]["Duplicate"] = duplicate_count[model]
30
+ bleu_table[model]["Length Exceeds"] = length_exceeds_count[model]
31
+
32
+
33
+ # results_self/에서 집계
34
+ def aggregate_self(json_data, bleu_table, src_table, length_table):
35
+ sbleu_score = defaultdict(list)
36
+ for data in json_data:
37
+ sbleu_score[data["model"]].append(data["bleu"])
38
+
39
+ # sbleu 추가
40
+ for model in sbleu_score:
41
+ bleu_table[model]["SBleu"] = get_average(sbleu_score[model])
42
+ bleu_table[model]["Average"].append(bleu_table[model]["SBleu"])
43
+
44
+
45
+ # results_length/에서 집계
46
+ def aggregate_length(json_data, bleu_table, src_table, length_table):
47
+ for data in json_data:
48
+ length_table[data["model"]]["Average"].append(data["bleu"])
49
+ length_table[data["model"]][f"~{data['length']}"].append(data["bleu"])
50
+ for model, row in length_table.items():
51
+ length_table[model] = dict(
52
+ (attr, get_average(val)) for attr, val in row.items()
53
+ )
54
+
55
+ # bleu-sl 추가
56
+ for model in length_table:
57
+ bleu_table[model]["Bleu-SL"] = length_table[model]["Average"]
58
+ bleu_table[model]["Average"].append(bleu_table[model]["Bleu-SL"])
59
+ bleu_table[model]["Average"] = get_average(bleu_table[model]["Average"])
60
+
61
+
62
+ def create():
63
+ results_dirs = {
64
+ "results_bleu/": aggregate_bleu,
65
+ "results_self/": aggregate_self,
66
+ "results_length/": aggregate_length,
67
+ }
68
+
69
+ bleu_table = defaultdict(lambda: defaultdict(list))
70
+ src_table = defaultdict(lambda: defaultdict(list))
71
+ length_table = defaultdict(lambda: defaultdict(list))
72
+ tables = {
73
+ "bleu_and_sbleu": bleu_table,
74
+ "bleu_by_src": src_table,
75
+ "bleu_by_length": length_table,
76
+ }
77
+
78
+ # bleu score 집계
79
+ for dir, aggregate in results_dirs.items():
80
+ json_data = []
81
+ for filename in os.listdir(dir):
82
+ if not filename.endswith(".jsonl"):
83
+ continue
84
+ file_path = os.path.join(dir, filename)
85
+ json_data += load_json(file_path)
86
+ aggregate(json_data, *tables.values())
87
+
88
+ # table dataframe 생성
89
+ for table in tables:
90
+ df = pd.DataFrame.from_dict(tables[table], orient="index")
91
+ if table == "bleu_and_sbleu":
92
+ df["Duplicate"] = df.pop("Duplicate")
93
+ df["Length Exceeds"] = df.pop("Length Exceeds")
94
+ df.reset_index(inplace=True, names="Model")
95
+ df["Model"] = list(map(decorate_model_name, df["Model"]))
96
+ df.insert(
97
+ 0,
98
+ "Type",
99
+ list(
100
+ map(
101
+ lambda x: "Cloud" if x in cloud_model else "HuggingFace",
102
+ df["Model"],
103
+ )
104
+ ),
105
+ )
106
+ df.insert(
107
+ 0, "Rank", df["Average"].rank(method="min", ascending=False).astype(int)
108
+ )
109
+ df = df.sort_values(by="Rank")
110
+ tables[table] = df
111
+
112
+ return tables.values()
113
+
114
+
115
+ def main():
116
+ tables = list(create())
117
+ print("# dataframe")
118
+ print(tables[0], "\n\n")
119
+ print("# markdown")
120
+ print(tables[0].to_markdown(index=False))
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()
data/komt-1810k-test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/komt-dataset-length.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
evaluate.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from collections import defaultdict
4
+ from nltk.translate.bleu_score import corpus_bleu
5
+ import statistics
6
+ import argparse
7
+ import json
8
+ import os
9
+ import re
10
+ from collections import Counter
11
+
12
+ def is_duplicated(text, top_k=10, min_word_len=0):
13
+ words = re.findall(r'\b\w+\b', text)
14
+ word_freq = Counter(words)
15
+
16
+ # 단어 최소 글자수 제한
17
+ if min_word_len > 0:
18
+ for word, count in list(word_freq.items()):
19
+ if len(word) <= min_word_len:
20
+ del word_freq[word]
21
+
22
+ if len(word_freq) == 0:
23
+ return False
24
+
25
+ if len(word_freq) == 1 and word_freq.most_common(1)[0][1] > 5:
26
+ return word_freq.most_common(1)
27
+
28
+ top_items = word_freq.most_common(top_k)
29
+ frequencies = [frequency for item, frequency in top_items]
30
+ mean_frequency = sum(frequencies) / len(frequencies)
31
+
32
+ prev_frequency = 0
33
+ index = 0
34
+
35
+ if mean_frequency < 5:
36
+ return False
37
+
38
+ for item, frequency in top_items:
39
+ if (prev_frequency - frequency) > mean_frequency:
40
+ if index <= 1:
41
+ return False
42
+ # print(prev_frequency, frequency, mean_frequency, item)
43
+ return top_items
44
+
45
+ prev_frequency = frequency
46
+ index += 1
47
+
48
+ return False
49
+
50
+ def is_length_exceed(reference, generation, min_ratio=0.2, max_ratio=2):
51
+ return not min_ratio <= (len(generation) / len(reference)) <= max_ratio
52
+
53
+ def get_average(a):
54
+ if isinstance(a, list):
55
+ return round(sum(a) / len(a), 2)
56
+ return a
57
+
58
+
59
+ def main():
60
+ parser = argparse.ArgumentParser("argument")
61
+ parser.add_argument(
62
+ "directory",
63
+ type=str,
64
+ help="input_file",
65
+ )
66
+ parser.add_argument('--detail', action='store_true', help='detail')
67
+ args = parser.parse_args()
68
+
69
+ # 각 파일별로 src에 대한 bleu 점수를 저장할 딕셔너리
70
+ file_src_bleu_scores = defaultdict(list)
71
+ file_length_ratio = defaultdict(list)
72
+ file_duplicated = defaultdict(list)
73
+ file_duplicated_detail = defaultdict(list)
74
+ # 디렉토리 내의 모든 파일에 대해 반복
75
+ for filename in os.listdir(args.directory):
76
+ if filename.endswith('.jsonl'): # JSONL 파일인 경우에만 처리
77
+ file_path = os.path.join(args.directory, filename)
78
+ with open(file_path, 'r', encoding='utf-8') as file:
79
+ for index, line in enumerate(file):
80
+ data = json.loads(line)
81
+ src = data['src']
82
+ bleu_score = data['bleu']
83
+ file_src_bleu_scores[filename].append(bleu_score)
84
+
85
+ # check_length
86
+ reference_length = len(data['reference'])
87
+ generation_length = len(data['generation'])
88
+ file_length_ratio[filename].append(round(generation_length / reference_length, 1))
89
+
90
+ # check duplication
91
+ word_count = is_duplicated(data['generation'])
92
+ file_duplicated[filename].append(0 if word_count is False else 1)
93
+ if word_count != False:
94
+ file_duplicated_detail[filename].append({'index':index, 'count':word_count,'generation':data['generation']})
95
+
96
+ sorted_items = sorted(file_src_bleu_scores.items(), key=lambda x: statistics.mean(x[1]))
97
+ # 각 파일별로 src에 대한 bleu 평균 계산
98
+ print('bleu scores')
99
+ for filename, src_bleu_scores in sorted_items:
100
+ avg_bleu = sum(src_bleu_scores) / len(src_bleu_scores)
101
+ length_raio=[]
102
+ cur_length_ratio = file_length_ratio[filename]
103
+ ratio_mean = round(statistics.mean(cur_length_ratio), 1)
104
+ for index, ratio in enumerate(cur_length_ratio):
105
+ if ratio < 0.2 or ratio > 2.0:
106
+ length_raio.append((index,ratio))
107
+ print(f"{filename}: {avg_bleu:.2f}, out_of_range_count={len(length_raio)}, duplicate={sum(file_duplicated[filename])}")
108
+ if args.detail:
109
+ print(f'\t error length:{length_raio}')
110
+ if args.detail:
111
+ print(f"\t duplication")
112
+ for info in file_duplicated_detail[filename]:
113
+ print('\t\t', info)
114
+
115
+ if __name__ == "__main__":
116
+ main()
leaderboard.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from create_table import create
5
+
6
+
7
+ # 테이블 업데이트
8
+ def refresh():
9
+ table1, table2, table3 = create()
10
+ return table1, table2, table3
11
+
12
+
13
+ with gr.Blocks() as demo:
14
+ # 테이블 초기화
15
+ table1, table2, table3 = create()
16
+ with gr.Row():
17
+ gr.Markdown(
18
+ """
19
+ # 🏆 Iris Translation Leaderboard
20
+ Iris Translation is a project designed to evaluate Korean-to-English translation models
21
+
22
+ ## github
23
+ - https://github.com/davidkim205/translation
24
+
25
+ ## How to add model
26
+ If you want to add a new model, please write the model name and template in the [github issue](https://github.com/davidkim205/translation/issues).
27
+
28
+ ## evaluation criteria
29
+ - **Bleu**: average bleu score
30
+ - **SBleu**: Self-Bleu(double translation evaluation)
31
+ - **Bleu-SL**: bleu by sentence length
32
+ - **Duplicate**: count of repetitive sentence generation
33
+ - **Length Exceeds**: count of mismatches in translated sentence lengths exceeding the threshold
34
+ """
35
+ )
36
+ with gr.Row():
37
+ with gr.Tab("bleu and sbleu"):
38
+ with gr.Group():
39
+ table1 = gr.Dataframe(value=table1, datatype="html")
40
+ with gr.Accordion("Show Chart", open=False):
41
+ gr.Image(
42
+ "assets/plot-bleu.png",
43
+ show_download_button=False,
44
+ container=False,
45
+ )
46
+ with gr.Tab("bleu by src"):
47
+ with gr.Group():
48
+ table2 = gr.Dataframe(value=table2, datatype="html")
49
+ with gr.Accordion("Show Chart", open=False):
50
+ gr.Image(
51
+ "assets/plot-bleu-by-src.png",
52
+ show_download_button=False,
53
+ container=False,
54
+ )
55
+ with gr.Tab("bleu by sentence length"):
56
+ with gr.Group():
57
+ table3 = gr.Dataframe(value=table3, datatype="html")
58
+ with gr.Accordion("Show Chart", open=False):
59
+ gr.Image(
60
+ "assets/plot-bleu-by-sentence-length.png",
61
+ show_download_button=False,
62
+ container=False,
63
+ )
64
+
65
+ refresh_btn = gr.Button(value="Refresh")
66
+ refresh_btn.click(refresh, outputs=[table1, table2, table3])
67
+
68
+ demo.launch(server_name='0.0.0.0', share=True)
model.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
2
+ import torch
3
+ from utils.simple_bleu import simple_score
4
+ import torch
5
+
6
+ templates = {
7
+ 'gemma': {
8
+ 'stop_words': ['<eos>', ''],
9
+ 'ko2en': '<bos><start_of_turn>user\n다음 문장을 영어로 번역하세요.{0}<end_of_turn>\n<start_of_turn>model:',
10
+ 'en2ko': '<bos><start_of_turn>user\n다음 문장을 한글로 번역하세요.{0}<end_of_turn>\n<start_of_turn>model:',
11
+ 'trim_keywords': ['<eos>', ''],
12
+ },
13
+ 'openchat': {
14
+ 'stop_words': ['<eos>', '<|end_of_turn|>'],
15
+ 'ko2en': '<s> GPT4 Correct User: 다음 문장을 영어로 번역하세요. {0}<|end_of_turn|> GPT4 Correct Assistant:',
16
+ 'en2ko': '<s> GPT4 Correct User: 다음 문장을 한글로 번역하세요. {0}<|end_of_turn|> GPT4 Correct Assistant:',
17
+ 'trim_keywords': ['<eos>', '<|end_of_turn|>'],
18
+ },
19
+ 'qwen': {
20
+ 'stop_words': ['<eos>', '<|im_end|>'],
21
+ 'ko2en': '<|im_start|>system \n You are a helpful assistant<|im_end|>\n <|im_start|>다음 문장을 영어로 번역하세요. \n {0}<|im_end|>\n<|im_start|>assistant\n',
22
+ 'ko2en': '<|im_start|>system \n You are a helpful assistant<|im_end|>\n <|im_start|>다음 문장을 한글로 번역하세요. \n {0}<|im_end|>\n<|im_start|>assistant\n',
23
+ 'trim_keywords': ['<eos>', '<|im_end|>'],
24
+ },
25
+ #
26
+ # <|im_start|>assistant
27
+ # "Do you exist?"<|im_end|>
28
+ # ]
29
+ 'davidkim205/iris-7b': {
30
+ 'stop_words': ['</s>'],
31
+ 'ko2en': '[INST] 다음 문장을 영어로 번역하세요.{0} [/INST]',
32
+ 'en2ko': '[INST] 다음 문장을 한글로 번역하세요.{0} [/INST]',
33
+ 'trim_keywords': ['</s>'],
34
+ },
35
+ 'squarelike/Gugugo-koen-7B-V1.1': {
36
+ 'stop_words': ['</s>', '</끝>'],
37
+ 'ko2en': '### 한국어: {0}</끝>\n### 영어:',
38
+ 'en2ko': "### 영어: {0}</끝>\n### 한국어:",
39
+ 'trim_keywords': ['</s>', '</끝>'],
40
+ },
41
+ 'maywell/Synatra-7B-v0.3-Translation': {
42
+ 'stop_words': ['</s>', '</끝>', '<|im_end|>'],
43
+ 'ko2en': '<|im_start|>system\n주어진 문장을 영어로 번역해라.<|im_end|>\n<|im_start|>user\n{0}<|im_end|>\n<|im_start|>assistant',
44
+ 'en2ko': '<|im_start|>system\n주어진 문장을 한국어로 번역해라.<|im_end|>\n<|im_start|>user\n{0}<|im_end|>\n<|im_start|>assistant',
45
+ 'trim_keywords': ['<|im_end|>'],
46
+ },
47
+ 'Unbabel/TowerInstruct-7B-v0.1': {
48
+ 'stop_words': ['</s>', '</끝>', '<|im_end|>'],
49
+ 'ko2en': '<|im_start|>user\nTranslate the following text from English into Korean.\nKorean: {0}\nEnglish:<|im_end|>\n<|im_start|>assistant',
50
+ 'en2ko': '<|im_start|>user\nTranslate the following text from Korean into English.\nEnglish: {0}\nKorean:<|im_end|>\n<|im_start|>assistant',
51
+ 'trim_keywords': ['<|im_end|>'],
52
+ },
53
+ }
54
+
55
+ model_info = {'model': None, 'tokenizer': None, 'stopping_criteria': None}
56
+
57
+
58
+ class LocalStoppingCriteria(StoppingCriteria):
59
+
60
+ def __init__(self, tokenizer, stop_words=[]):
61
+ super().__init__()
62
+
63
+ stops = [tokenizer(stop_word, return_tensors='pt', add_special_tokens=False)['input_ids'].squeeze() for
64
+ stop_word in stop_words]
65
+ print('stop_words', stop_words)
66
+ print('stop_words_ids', stops)
67
+ self.stop_words = stop_words
68
+ self.stops = [stop.cuda() for stop in stops]
69
+ self.tokenizer = tokenizer
70
+
71
+ def _compare_token(self, input_ids):
72
+ for stop in self.stops:
73
+ if len(stop.size()) != 1:
74
+ continue
75
+ stop_len = len(stop)
76
+ if torch.all((stop == input_ids[0][-stop_len:])).item():
77
+ return True
78
+
79
+ return False
80
+
81
+ def _compare_decode(self, input_ids):
82
+ input_str = self.tokenizer.decode(input_ids[0])
83
+ for stop_word in self.stop_words:
84
+ if input_str.endswith(stop_word):
85
+ return True
86
+ return False
87
+
88
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
89
+ return self._compare_decode(input_ids)
90
+
91
+
92
+ def trim_sentence(sentence, keywords):
93
+ for keyword in keywords:
94
+ if keyword in sentence:
95
+ # 키워드를 찾은 경우, 해당 인덱스를 기준으로 문장을 자름
96
+ index = sentence.find(keyword)
97
+ trimmed_sentence = sentence[:index]
98
+ sentence = trimmed_sentence.strip() # 좌우 공백 제거 후 반환
99
+ return sentence
100
+
101
+
102
+ def load_model(path, template_name=None):
103
+ global model_info
104
+ print('load_model', path)
105
+ if template_name == None:
106
+ template_name = path
107
+ if templates.get(template_name) == None:
108
+ template_name = 'davidkim205/iris-7b'
109
+ model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='auto')
110
+ tokenizer = AutoTokenizer.from_pretrained(path)
111
+
112
+ model_info['model'] = model
113
+ model_info['tokenizer'] = tokenizer
114
+ model_info['template'] = templates[template_name]
115
+
116
+ stop_words = templates[template_name]['stop_words']
117
+ stopping_criteria = StoppingCriteriaList([LocalStoppingCriteria(tokenizer=tokenizer, stop_words=stop_words)])
118
+ model_info['stopping_criteria'] = stopping_criteria
119
+
120
+
121
+ def generate(prompt):
122
+ global model_info
123
+ if model_info['model'] == None:
124
+ print('model is null, load the model first.')
125
+ return ''
126
+ model = model_info['model']
127
+ tokenizer = model_info['tokenizer']
128
+ stopping_criteria = model_info['stopping_criteria']
129
+ encoding = tokenizer(
130
+ prompt,
131
+ return_tensors='pt',
132
+ return_token_type_ids=False
133
+ ).to("cuda")
134
+ gen_tokens = model.generate(
135
+ **encoding,
136
+ max_new_tokens=2048,
137
+ temperature=1.0,
138
+ num_beams=5,
139
+ stopping_criteria=stopping_criteria
140
+ )
141
+ prompt_end_size = encoding.input_ids.shape[1]
142
+ result = tokenizer.decode(gen_tokens[0, prompt_end_size:])
143
+ result = trim_sentence(result, model_info['template']['trim_keywords'])
144
+ return result
145
+
146
+
147
+ def translate_ko2en(text):
148
+ global model_info
149
+ prompt = model_info['template']['ko2en'].format(text)
150
+ return generate(prompt)
151
+
152
+
153
+ def translate_en2ko(text):
154
+ global model_info
155
+ prompt = model_info['template']['en2ko'].format(text)
156
+ return generate(prompt)
157
+
158
+
159
+ def main():
160
+ load_model("davidkim205/iris-7b")
161
+ # load_model("squarelike/Gugugo-koen-7B-V1.1")
162
+ # load_model("maywell/Synatra-7B-v0.3-Translation")
163
+ # load_model("Unbabel/TowerInstruct-7B-v0.1")
164
+ while True:
165
+ text = input('>')
166
+ en_text = translate_ko2en(text)
167
+ ko_text = translate_en2ko(en_text)
168
+ print('------------------')
169
+ print('en_text', en_text)
170
+ print('ko_text', ko_text)
171
+ print('score', simple_score(text, ko_text))
172
+
173
+
174
+ if __name__ == "__main__":
175
+ main()
models/TowerInstruct.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ from utils.simple_bleu import simple_score
4
+
5
+ pipe = pipeline("text-generation", model="Unbabel/TowerInstruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
6
+
7
+ def translate_ko2en(text):
8
+ messages = [
9
+ {"role": "user", "content": f"Translate the following text from Korean into English.\n: Korean:{text}\nEnglish:"},
10
+ ]
11
+ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
12
+ outputs = pipe(prompt, max_new_tokens=2048, do_sample=False)
13
+ result = outputs[0]["generated_text"]
14
+ result = result.split('<|im_start|>assistant')[1]
15
+ result = result.replace('\n:', '')
16
+ result = result.lstrip('\n')
17
+ result = result.lstrip(':')
18
+ return result
19
+
20
+
21
+
22
+ def translate_en2ko(text):
23
+ messages = [
24
+ {"role": "user",
25
+ "content": f"Translate the following text from English into Korean.\nEnglish: {text} \nKorean:"},
26
+ ]
27
+ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
28
+ outputs = pipe(prompt, max_new_tokens=2048, do_sample=False)
29
+ result = outputs[0]["generated_text"]
30
+ result = result.split('<|im_start|>assistant')[1]
31
+ result = result.replace('\n:', '')
32
+ result = result.lstrip('\n')
33
+ result = result.lstrip(':')
34
+ return result
35
+
36
+ def main():
37
+ while True:
38
+ text = input('>')
39
+ en_text = translate_ko2en(text)
40
+ ko_text = translate_en2ko(en_text)
41
+ print('en_text', en_text)
42
+ print('ko_text', ko_text)
43
+ print('score', simple_score(text, ko_text))
44
+
45
+
46
+ if __name__ == "__main__":
47
+ main()
models/gemma.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
2
+ import torch
3
+ from utils.simple_bleu import simple_score
4
+ import torch
5
+
6
+ repo = "davidkim205/iris-7b"
7
+ model = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch.bfloat16, device_map='auto')
8
+ tokenizer = AutoTokenizer.from_pretrained(repo)
9
+ # model = None
10
+ # tokenizer = None
11
+
12
+ class StoppingCriteriaSub(StoppingCriteria):
13
+ def __init__(self, stops=[], encounters=1):
14
+ super().__init__()
15
+ self.stops = [stop for stop in stops]
16
+
17
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
18
+ for stop in self.stops:
19
+ if torch.all((stop == input_ids[0][-len(stop):])).item():
20
+ return True
21
+
22
+ return False
23
+
24
+
25
+ stop_words_ids = torch.tensor(
26
+ [[829, 45107, 29958], [1533, 45107, 29958], [829, 45107, 29958], [21106, 45107, 29958]]).to("cuda")
27
+ stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
28
+
29
+ def load_model(path):
30
+ global model, tokenizer
31
+ print('load_model', path)
32
+ model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='auto')
33
+ tokenizer = AutoTokenizer.from_pretrained(path)
34
+
35
+
36
+ def generate(prompt):
37
+ gened = model.generate(
38
+ **tokenizer(
39
+ prompt,
40
+ return_tensors='pt',
41
+ return_token_type_ids=False
42
+ ).to("cuda"),
43
+ max_new_tokens=2048,
44
+ temperature=0.3,
45
+ num_beams=5,
46
+ stopping_criteria=stopping_criteria
47
+ )
48
+ result = tokenizer.decode(gened[0][1:]).replace(prompt + " ", "").replace("</끝>", "")
49
+ result = result.replace('</s>', '')
50
+ result = result.replace('### 한국어: ', '')
51
+ result = result.replace('### 영어: ', '')
52
+ return result
53
+
54
+
55
+ def translate_ko2en(text):
56
+ prompt = f"[INST] 다음 문장을 영어로 번역하세요.{text} [/INST]"
57
+ return generate(prompt)
58
+
59
+
60
+ def translate_en2ko(text):
61
+ prompt = f"[INST] 다음 문장을 한글로 번역하세요.{text} [/INST]"
62
+ return generate(prompt)
63
+
64
+
65
+ def main():
66
+ while True:
67
+ text = input('>')
68
+ en_text = translate_ko2en(text)
69
+ ko_text = translate_en2ko(en_text)
70
+ print('en_text', en_text)
71
+ print('ko_text', ko_text)
72
+ print('score', simple_score(text, ko_text))
73
+ """
74
+ >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
75
+ en_text We have 30 million files and 2.5 billion tokens. We approach Phi1.5's dataset composition, but we use the open-source model, Mixtral 8x7B, and we are licensed according to the Apache2.0 license.
76
+ ko_text 3,000만 개의 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 집합에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스를 받았습니다.
77
+ score 0.6154733407407874
78
+ """
79
+
80
+ if __name__ == "__main__":
81
+ main()
models/gugugo.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
2
+ import torch
3
+ from utils.simple_bleu import simple_score
4
+ import torch
5
+
6
+ repo = "squarelike/Gugugo-koen-7B-V1.1"
7
+ model = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch.bfloat16, device_map='auto')
8
+ tokenizer = AutoTokenizer.from_pretrained(repo)
9
+
10
+
11
+ class StoppingCriteriaSub(StoppingCriteria):
12
+ def __init__(self, stops=[], encounters=1):
13
+ super().__init__()
14
+ self.stops = [stop for stop in stops]
15
+
16
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
17
+ for stop in self.stops:
18
+ if torch.all((stop == input_ids[0][-len(stop):])).item():
19
+ return True
20
+
21
+ return False
22
+
23
+
24
+ stop_words_ids = torch.tensor(
25
+ [[829, 45107, 29958], [1533, 45107, 29958], [829, 45107, 29958], [21106, 45107, 29958]]).to("cuda")
26
+ stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
27
+
28
+
29
+ def generate(prompt):
30
+ gened = model.generate(
31
+ **tokenizer(
32
+ prompt,
33
+ return_tensors='pt',
34
+ return_token_type_ids=False
35
+ ).to("cuda"),
36
+ max_new_tokens=2048,
37
+ temperature=0.1,
38
+ num_beams=5,
39
+ stopping_criteria=stopping_criteria
40
+ )
41
+ result = tokenizer.decode(gened[0][1:]).replace(prompt + " ", "").replace("</끝>", "")
42
+ result = result.replace('</s>', '')
43
+ result = result.replace('### 한국어: ', '')
44
+ result = result.replace('### 영어: ', '')
45
+ return result
46
+
47
+
48
+ def translate_ko2en(text):
49
+ prompt = f"### 한국어: {text}</끝>\n### 영어:"
50
+ return generate(prompt)
51
+
52
+
53
+ def translate_en2ko(text):
54
+ prompt = f"### 영어: {text}</끝>\n### 한국어:"
55
+ return generate(prompt)
56
+
57
+
58
+ def main():
59
+ while True:
60
+ text = input('>')
61
+ en_text = translate_ko2en(text)
62
+ ko_text = translate_en2ko(en_text)
63
+ print('en_text', en_text)
64
+ print('ko_text', ko_text)
65
+ print('score', simple_score(text, ko_text))
66
+ """
67
+ >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
68
+ en_text We have 30 million files and 2.5 billion tokens. We approach Phi1.5's dataset composition, but we use the open-source model, Mixtral 8x7B, and we are licensed according to the Apache2.0 license.
69
+ ko_text 3,000만 개의 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 집합에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스를 받았습니다.
70
+ score 0.6154733407407874
71
+ """
72
+
73
+ if __name__ == "__main__":
74
+ main()
models/iris_7b.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
2
+ import torch
3
+ from utils.simple_bleu import simple_score
4
+ import torch
5
+
6
+ repo = "davidkim205/iris-7b"
7
+ model = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch.bfloat16, device_map='auto')
8
+ tokenizer = AutoTokenizer.from_pretrained(repo)
9
+ # model = None
10
+ # tokenizer = None
11
+
12
+ class StoppingCriteriaSub(StoppingCriteria):
13
+ def __init__(self, stops=[], encounters=1):
14
+ super().__init__()
15
+ self.stops = [stop for stop in stops]
16
+
17
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
18
+ for stop in self.stops:
19
+ if torch.all((stop == input_ids[0][-len(stop):])).item():
20
+ return True
21
+
22
+ return False
23
+
24
+
25
+ stop_words_ids = torch.tensor(
26
+ [[829, 45107, 29958], [1533, 45107, 29958], [829, 45107, 29958], [21106, 45107, 29958]]).to("cuda")
27
+ stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
28
+
29
+ def load_model(path):
30
+ global model, tokenizer
31
+ print('load_model', path)
32
+ model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='auto')
33
+ tokenizer = AutoTokenizer.from_pretrained(path)
34
+
35
+
36
+ def generate(prompt):
37
+ gened = model.generate(
38
+ **tokenizer(
39
+ prompt,
40
+ return_tensors='pt',
41
+ return_token_type_ids=False
42
+ ).to("cuda"),
43
+ max_new_tokens=2048,
44
+ temperature=0.3,
45
+ num_beams=5,
46
+ stopping_criteria=stopping_criteria
47
+ )
48
+ result = tokenizer.decode(gened[0][1:]).replace(prompt + " ", "").replace("</끝>", "")
49
+ result = result.replace('</s>', '')
50
+ result = result.replace('### 한국어: ', '')
51
+ result = result.replace('### 영어: ', '')
52
+ return result
53
+
54
+
55
+ def translate_ko2en(text):
56
+ prompt = f"[INST] 다음 문장을 영어로 번역하세요.{text} [/INST]"
57
+ return generate(prompt)
58
+
59
+
60
+ def translate_en2ko(text):
61
+ prompt = f"[INST] 다음 문장을 한글로 번역하세요.{text} [/INST]"
62
+ return generate(prompt)
63
+
64
+
65
+ def main():
66
+ while True:
67
+ text = input('>')
68
+ en_text = translate_ko2en(text)
69
+ ko_text = translate_en2ko(en_text)
70
+ print('en_text', en_text)
71
+ print('ko_text', ko_text)
72
+ print('score', simple_score(text, ko_text))
73
+ """
74
+ >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
75
+ en_text We have 30 million files and 2.5 billion tokens. We approach Phi1.5's dataset composition, but we use the open-source model, Mixtral 8x7B, and we are licensed according to the Apache2.0 license.
76
+ ko_text 3,000만 개의 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 집합에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스를 받았습니다.
77
+ score 0.6154733407407874
78
+ """
79
+
80
+ if __name__ == "__main__":
81
+ main()
models/madlad400.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
2
+ from utils.simple_bleu import simple_score
3
+ import torch
4
+
5
+ model_name = 'jbochi/madlad400-10b-mt'
6
+ model = T5ForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
7
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
8
+
9
+
10
+ def translate_ko2en(text):
11
+ text = f"<2en> {text}"
12
+ input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
13
+ outputs = model.generate(input_ids=input_ids, max_new_tokens=2048)
14
+
15
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
16
+ return result
17
+
18
+
19
+ def translate_en2ko(text):
20
+ text = f"<2ko> {text}"
21
+ input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
22
+ outputs = model.generate(input_ids=input_ids, max_new_tokens=2048)
23
+
24
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+ return result
26
+
27
+
28
+ def main():
29
+ while True:
30
+ text = input('>')
31
+ en_text = translate_ko2en(text)
32
+ ko_text = translate_en2ko(en_text)
33
+ print('en_text', en_text)
34
+ print('ko_text', ko_text)
35
+ print('score', simple_score(text, ko_text))
36
+
37
+
38
+ if __name__ == "__main__":
39
+ main()
models/mbart50.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
2
+ import torch
3
+ from utils.simple_bleu import simple_score
4
+
5
+ model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", torch_dtype=torch.bfloat16, device_map="auto")
6
+ tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
7
+
8
+
9
+ def translate_ko2en(text):
10
+ tokenizer.src_lang = "ko_KR"
11
+ input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
12
+ outputs = model.generate(input_ids=input_ids, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
13
+
14
+ outputs = tokenizer.decode(outputs[0], skip_special_tokens=True)
15
+
16
+ return outputs
17
+
18
+
19
+ def translate_en2ko(text):
20
+ tokenizer.src_lang = "en_XX"
21
+ input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
22
+ outputs = model.generate(input_ids=input_ids, forced_bos_token_id=tokenizer.lang_code_to_id["ko_KR"], max_new_tokens=2048)
23
+
24
+ outputs = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+
26
+ return outputs
27
+
28
+
29
+ def main():
30
+ while True:
31
+ text = input('>')
32
+ en_text = translate_ko2en(text)
33
+ ko_text = translate_en2ko(en_text)
34
+ print('en_text', en_text)
35
+ print('ko_text', ko_text)
36
+ print('score', simple_score(text, ko_text))
37
+ """
38
+ >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
39
+ en_text It has over 30 million files and 2.5 billion tokens, accesses the data set configuration of Phi1.5, but uses an open-source model, Mixtral 8x7B, and is licensed under the Apache 2.0 license.
40
+ ko_text 30만개의 파일과 2.5억개의 토큰을 가지고 있고, Phi1.5의 데이터 세트 configuration에 접근하지만, 오픈소스 모델인 Mixtral 8x7B를 사용하고, Apache 2.0 라이센스 아래 licenc를 가지고 있습니다.
41
+ score 0.14724623770949022
42
+ """
43
+
44
+ if __name__ == "__main__":
45
+ main()
models/nllb200.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+ from utils.simple_bleu import simple_score
3
+ import torch
4
+
5
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-1.3B", torch_dtype=torch.bfloat16, device_map="auto")
6
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-1.3B")
7
+
8
+
9
+ def translate_ko2en(text):
10
+ batched_input = [text]
11
+ inputs = tokenizer(batched_input, return_tensors="pt", padding=True)
12
+
13
+ translated_tokens = model.generate(
14
+ **inputs.to(model.device), forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"]
15
+ )
16
+ result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
17
+ return result
18
+
19
+
20
+ def translate_en2ko(text):
21
+ batched_input = [text]
22
+ inputs = tokenizer(batched_input, return_tensors="pt", padding=True)
23
+
24
+ translated_tokens = model.generate(
25
+ **inputs.to(model.device), forced_bos_token_id=tokenizer.lang_code_to_id["kor_Hang"], max_new_tokens=2048)
26
+
27
+ result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
28
+ return result
29
+
30
+
31
+ def main():
32
+ while True:
33
+ text = input('>')
34
+ en_text = translate_ko2en(text)
35
+ ko_text = translate_en2ko(en_text)
36
+ print('en_text', en_text)
37
+ print('ko_text', ko_text)
38
+ print('score', simple_score(text, ko_text))
39
+ """
40
+ >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
41
+ en_text There are over 30 million files and 250 billion tokens. Phi1.5's data set configuration is accessible but uses the open source model Mixtral 8x7B and is licensed under the Apache 2.0 license.
42
+ ko_text 300만 개 이상의 파일과 25억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성은 액세스 가능하지만 오픈 소스 모델 Mixtral 8x7B를 사용하고 Apache 2.0 라이선스에 따라 라이선스됩니다.
43
+ score 0.3090015909429233
44
+ """
45
+
46
+
47
+ if __name__ == "__main__":
48
+ main()
models/synatra.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.simple_bleu import simple_score
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ device = "cuda" # the device to load the model onto
6
+
7
+ model = AutoModelForCausalLM.from_pretrained("maywell/Synatra-7B-v0.3-Translation", torch_dtype=torch.bfloat16, device_map="auto")
8
+ tokenizer = AutoTokenizer.from_pretrained("maywell/Synatra-7B-v0.3-Translation")
9
+
10
+
11
+ def translate_ko2en(text):
12
+ messages = [
13
+ {"role": "system", "content": "주어진 문장을 영어로 번역해라."},
14
+ {"role": "user", "content": text},
15
+ ]
16
+
17
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
18
+
19
+ model_inputs = encodeds.to(device)
20
+ model.to(device)
21
+
22
+ generated_ids = model.generate(model_inputs, max_new_tokens=2048, do_sample=True)
23
+ output = tokenizer.batch_decode(generated_ids)[0]
24
+ if output.endswith("<|im_end|>"):
25
+ output = output[:-len("<|im_end|>")]
26
+ output = output.split('<|im_end|>')[-1]
27
+ return output
28
+
29
+
30
+
31
+ def translate_en2ko(text):
32
+ messages = [
33
+ {"role": "system", "content": "주어진 문장을 한국어로 번역해라."},
34
+ {"role": "user", "content": text},
35
+ ]
36
+
37
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
38
+
39
+ model_inputs = encodeds.to(device)
40
+ model.to(device)
41
+
42
+ generated_ids = model.generate(model_inputs, max_new_tokens=2048, do_sample=True)
43
+ output = tokenizer.batch_decode(generated_ids)[0]
44
+ if output.endswith("<|im_end|>"):
45
+ output = output[:-len("<|im_end|>")]
46
+ output = output.split('<|im_end|>')[-1]
47
+ return output
48
+
49
+ def main():
50
+ while True:
51
+ text = input('>')
52
+ en_text = translate_ko2en(text)
53
+ ko_text = translate_en2ko(en_text)
54
+ print('------en_text--------')
55
+ print(en_text)
56
+ print('------ko_text--------')
57
+ print(ko_text)
58
+
59
+ print('score', simple_score(text, ko_text))
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=1.13.1
2
+ transformers>=4.36.2
3
+ datasets>=2.14.3
4
+ accelerate>=0.21.0
5
+ peft>=0.7.0
6
+ trl>=0.7.6
7
+ gradio>=3.38.0,<4.0.0
8
+ scipy
9
+ einops
10
+ sentencepiece
11
+ protobuf
12
+ jieba
13
+ rouge-chinese
14
+ nltk
15
+ uvicorn
16
+ pydantic
17
+ sse-starlette
18
+ matplotlib
19
+ kiwipiepy
results/result-iris_7b-.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result-iris_7b-checkpoint-105000.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result-iris_7b-checkpoint-110000.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result-iris_7b-checkpoint-115000.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result-iris_7b-checkpoint-120000.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result-iris_7b-checkpoint-125000.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result-iris_7b-iris_7b.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results/result_self-google.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-Synatra-7B-v0.3-Translation.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-TowerInstruct.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-azure.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-deepl.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-google.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-gugugo.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-iris_7b.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-madlad400.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-nllb200.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_bleu/result_bleu-papago.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_length/Gugugo-koen-7B-V1.1-result.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_length/Synatra-7B-v0.3-Translation-result.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_length/TowerInstruct-7B-v0.1-result.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_length/azure-result.jsonl ADDED
The diff for this file is too large to render. See raw diff