Spaces:

davidkim205
/

ko-translation-leaderbaord

Running

App Files Files Community

davidkim205 commited on Apr 4, 2024

Commit

577164e

verified ·

1 Parent(s): f3af1d6

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +160 -0
.idea/.gitignore +3 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/translation.iml +8 -0
.idea/vcs.xml +6 -0
.idea/workspace.xml +217 -0
README.md +336 -8
assets/iris-icon.jpeg +0 -0
assets/plot-bleu-by-sentence-length.png +0 -0
assets/plot-bleu-by-src.png +0 -0
assets/plot-bleu.png +0 -0
create_table.py +124 -0
data/komt-1810k-test.jsonl +0 -0
data/komt-dataset-length.jsonl +0 -0
evaluate.py +116 -0
leaderboard.py +68 -0
model.py +175 -0
models/TowerInstruct.py +47 -0
models/gemma.py +81 -0
models/gugugo.py +74 -0
models/iris_7b.py +81 -0
models/madlad400.py +39 -0
models/mbart50.py +45 -0
models/nllb200.py +48 -0
models/synatra.py +63 -0
requirements.txt +19 -0
results/result-iris_7b-.jsonl +0 -0
results/result-iris_7b-checkpoint-105000.jsonl +0 -0
results/result-iris_7b-checkpoint-110000.jsonl +0 -0
results/result-iris_7b-checkpoint-115000.jsonl +0 -0
results/result-iris_7b-checkpoint-120000.jsonl +0 -0
results/result-iris_7b-checkpoint-125000.jsonl +0 -0
results/result-iris_7b-iris_7b.jsonl +0 -0
results/result_self-google.jsonl +0 -0
results_bleu/result_bleu-Synatra-7B-v0.3-Translation.jsonl +0 -0
results_bleu/result_bleu-TowerInstruct.jsonl +0 -0
results_bleu/result_bleu-azure.jsonl +0 -0
results_bleu/result_bleu-deepl.jsonl +0 -0
results_bleu/result_bleu-google.jsonl +0 -0
results_bleu/result_bleu-gugugo.jsonl +0 -0
results_bleu/result_bleu-iris_7b.jsonl +0 -0
results_bleu/result_bleu-madlad400.jsonl +0 -0
results_bleu/result_bleu-nllb200.jsonl +0 -0
results_bleu/result_bleu-papago.jsonl +0 -0
results_length/Gugugo-koen-7B-V1.1-result.jsonl +0 -0
results_length/Synatra-7B-v0.3-Translation-result.jsonl +0 -0
results_length/TowerInstruct-7B-v0.1-result.jsonl +0 -0
results_length/azure-result.jsonl +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Promptify" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="translation" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/translation.iml" filepath="$PROJECT_DIR$/.idea/translation.iml" />
+    </modules>
+  </component>
+</project>

.idea/translation.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="translation" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,217 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="3b99adef-9597-4d3b-ace1-b9d588ce4682" name="Changes" comment="">
+      <change beforePath="$PROJECT_DIR$/leaderboard.py" beforeDir="false" afterPath="$PROJECT_DIR$/leaderboard.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/model.py" beforeDir="false" afterPath="$PROJECT_DIR$/model.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/translation2.py" beforeDir="false" afterPath="$PROJECT_DIR$/translation2.py" afterDir="false" />
+    </list>
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectColorInfo">{
+  &quot;associatedIndex&quot;: 6
+}</component>
+  <component name="ProjectId" id="2ccs1q2ofZLAMdjdjZEwtumXnHp" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;Python.TowerInstruct.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.evaluate.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.gugugo.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.madlad400.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.mbart-large-50.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.nllb-200-distilled.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.synatra.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.tokenizer.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.translation.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.translation2.executor&quot;: &quot;Debug&quot;,
+    &quot;Python.translation_conv.executor&quot;: &quot;Debug&quot;,
+    &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
+    &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
+    &quot;last_opened_file_path&quot;: &quot;/work/translation/models&quot;,
+    &quot;settings.editor.selected.configurable&quot;: &quot;com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable&quot;
+  }
+}</component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/models" />
+      <recent name="$PROJECT_DIR$/assets" />
+      <recent name="$PROJECT_DIR$" />
+      <recent name="$PROJECT_DIR$/results" />
+      <recent name="$PROJECT_DIR$/data" />
+    </key>
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$" />
+      <recent name="$PROJECT_DIR$/models" />
+    </key>
+  </component>
+  <component name="RunManager" selected="Python.evaluate">
+    <configuration name="evaluate" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="translation" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/evaluate.py" />
+      <option name="PARAMETERS" value="results" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="synatra" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="translation" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/models" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/models/synatra.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="tokenizer" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="translation" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/utils" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/utils/tokenizer.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="translation" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="translation" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+        <env name="CUDA_VISIBLE_DEVICES" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/translation.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="translation2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="translation" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/translation2.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <list>
+      <item itemvalue="Python.evaluate" />
+      <item itemvalue="Python.tokenizer" />
+      <item itemvalue="Python.translation2" />
+      <item itemvalue="Python.synatra" />
+      <item itemvalue="Python.translation" />
+    </list>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.evaluate" />
+        <item itemvalue="Python.translation2" />
+        <item itemvalue="Python.tokenizer" />
+        <item itemvalue="Python.translation" />
+        <item itemvalue="Python.synatra" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-python-sdk-09665e90c3a7-d3b881c8e49f-com.jetbrains.pycharm.community.sharedIndexes.bundled-PC-233.15026.15" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="3b99adef-9597-4d3b-ace1-b9d588ce4682" name="Changes" comment="" />
+      <created>1708421988537</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1708421988537</updated>
+    </task>
+    <servers />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -1,12 +1,340 @@
 ---
-title: Ko Translation Leaderbaord
-emoji: 🔥
-colorFrom: blue
-colorTo: pink
 sdk: gradio
-sdk_version: 4.25.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ko-translation-leaderbaord
+app_file: leaderboard.py
 sdk: gradio
+sdk_version: 3.50.2
 ---
+# Iris Translation
+![iris-icon.jpeg](assets%2Firis-icon.jpeg)
+Welcome to Iris Translation, a project designed to evaluate Korean-to-English translation models. Our project provides a comprehensive framework for evaluating the Iris model that we have developed.
+## Models
+번역 품질을 비교하기 위해 사용한 모델입니다. 모두 실행 가능하며 결과를 확인할 수 있습니다.
+- [davidkim205/iris-7b](https://huggingface.co/davidkim205/iris-7b)
+- [squarelike/Gugugo-koen-7B-V1.1](https://huggingface.co/squarelike/Gugugo-koen-7B-V1.1)
+- [maywell/Synatra-7B-v0.3-Translation](https://huggingface.co/maywell/Synatra-7B-v0.3-Translation)
+- [Unbabel/TowerInstruct-7B-v0.1](https://huggingface.co/Unbabel/TowerInstruct-7B-v0.1)
+- [jbochi/madlad400-10b-mt](https://huggingface.co/jbochi/madlad400-10b-mt)
+- [facebook/mbart-large-50-many-to-many-mmt](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt)
+- [facebook/nllb-200-distilled-1.3B](https://huggingface.co/facebook/nllb-200-distilled-1.3B)
+## Installation
+```
+conda create -n translation python=3.10
+conda activate translation
+pip install -r requirements.txt
+```
+## Usage
+입력으로 주어지는 기본 파일은 `./data/komt-1810k-test.jsonl`입니다. 다음은 데이터의 JSON 스키마 예시입니다.
+```json
+{
+    "conversations":[
+        {
+            "from":"human",
+            "value":"다음 문장을 한글로 번역하세요.\nLet's make a graph here showing different levels of interest in activities."
+        },
+        {
+            "from":"gpt",
+            "value":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다."
+        }
+    ],
+    "src":"aihub-MTPE"
+}
+```
+### translate(Bleu)
+모델을 사용한 번역 결과와 실제 번역 결과를 비교하여 bleu score를 구합니다.
+```
+python translation.py --model davidkim205/iris-7b
+```
+결과 파일의 경로는 `results_bleu/iris-7b-result.jsonl`입니다.
+JSON 스키마 예시
+- reference: 실제 정답 번역문
+- generation: 모델이 생성한 번역문
+```json
+{
+    "index":0,
+    "reference":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다.",
+    "generation":"여기서 활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다.",
+    "bleu":0.917,
+    "lang":"en",
+    "model":"davidkim205/iris-7b",
+    "src":"aihub-MTPE",
+    "conversations":[
+        {
+            "from":"human",
+            "value":"다음 문장을 한글로 번역하세요.\nLet's make a graph here showing different levels of interest in activities."
+        },
+        {
+            "from":"gpt",
+            "value":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다."
+        }
+    ]
+}
+```
+### translate_self(SBleu)
+모델 번역 결과를 다시 번역하여 원문과의 bleu score를 비교합니다.
+```
+python translation_self.py --model davidkim205/iris-7b
+```
+결과 파일의 경로는 `results_self/iris-7b-result.jsonl`입니다.
+JSON 스키마 예시
+- reference: 원문
+- generation: 모델 재번역 결과
+- generation1: 모델 번역문
+```json
+{
+    "index":0,
+    "reference":"Let's make a graph here showing different levels of interest in activities.",
+    "generation":"let's create a graph that shows different levels of interest in activities here",
+    "generation1":"여기서 활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다.",
+    "bleu":0.49,
+    "lang":"en",
+    "model":"davidkim205/iris-7b",
+    "src":"aihub-MTPE",
+    "conversations":[
+        {
+            "from":"human",
+            "value":"다음 문장을 한글로 번역하세요.\nLet's make a graph here showing different levels of interest in activities."
+        },
+        {
+            "from":"gpt",
+            "value":"활동에 대한 다양한 수준의 관심을 보여주는 그래프를 만들어 보겠습니다."
+        }
+    ]
+}
+```
+### translate2(Bleu and SBleu)
+translate와 translate_self를 모두 수행하여 bleu 및 sbleu를 모두 비교할 수 있습니다.
+```
+python translation2.py --model davidkim205/iris-7b
+```
+- translate를 수행하여 `results_bleu/iris-7b-result.jsonl`에 저장
+- translate_self를 수행하여 `results_self/iris-7b-result.jsonl`에 저장
+각 파일은 위에서 생성한 두 파일과 동일한 결과를 갖습니다.
+## Evaluation
+두 가지 방식으로 번역 결과를 검증합니다.
+1. 실제 번역과 모델 번역을 비교하여 평가
+```
+python evaluate.py results_bleu/
+```
+output
+```
+bleu scores
+result_bleu-nllb200.jsonl: 0.26, out_of_range_count=3, duplicate=1
+result_bleu-madlad400.jsonl: 0.29, out_of_range_count=6, duplicate=3
+result_bleu-TowerInstruct.jsonl: 0.32, out_of_range_count=9, duplicate=1
+result_bleu-gugugo.jsonl: 0.32, out_of_range_count=3, duplicate=1
+result_bleu-Synatra-7B-v0.3-Translation.jsonl: 0.35, out_of_range_count=2, duplicate=1
+result_bleu-deepl.jsonl: 0.39, out_of_range_count=1, duplicate=0
+result_bleu-azure.jsonl: 0.40, out_of_range_count=2, duplicate=0
+result_bleu-google.jsonl: 0.40, out_of_range_count=3, duplicate=0
+result_bleu-papago.jsonl: 0.43, out_of_range_count=3, duplicate=0
+result_bleu-iris_7b.jsonl: 0.40, out_of_range_count=3, duplicate=0
+```
+2. 원문을 2번 번역(영->한->영)한 결과와 비교하여 평가
+```
+python evaluate.py results_self/
+```
+output
+```
+bleu scores
+result_self-nllb200.jsonl: 0.30, out_of_range_count=1, duplicate=1
+result_self-gugugo.jsonl: 0.36, out_of_range_count=1, duplicate=1
+result_self-madlad400.jsonl: 0.38, out_of_range_count=3, duplicate=2
+result_self-TowerInstruct.jsonl: 0.39, out_of_range_count=3, duplicate=0
+result_self-Synatra-7B-v0.3-Translation.jsonl: 0.41, out_of_range_count=2, duplicate=1
+result_self-deepl.jsonl: 0.45, out_of_range_count=0, duplicate=0
+result_self-papago.jsonl: 0.49, out_of_range_count=0, duplicate=0
+result_self-azure.jsonl: 0.49, out_of_range_count=0, duplicate=1
+result_self-google.jsonl: 0.49, out_of_range_count=0, duplicate=0
+result_self-papago.jsonl: 0.51, out_of_range_count=0, duplicate=0
+result_self-iris_7b.jsonl: 0.43, out_of_range_count=1, duplicate=0
+```
+**평가 요소**
+- BLEU: 실제 번역과 모델 번역의 bleu score 평균
+- SBLEU:  원문과 재번역의 bleu score 평균
+- Duplicate: 번역 시 중복된 텍스트를 생성하는 경우
+- Length Exceeds: 모델 번역과 실제 번역 길이의 불일치(0.2 < length < 2 기준)
+### BLEU
+각 모델별로 평가한 결과입니다. iris-7b 모델의 평가는 아래와 같습니다.
+- 모든 평가에서 기존 모델들보다 높은 번역 성능
+- 평균적으로 클라우드 번역과 동일한 번역 성능
+- 중복 문장 생성 및 길이 초과 문제는 클라우드 번역과 동일한 수준
+![plot-bleu.png](assets%2Fplot-bleu.png)
+Duplicate(중복 문장 생성)와 Length Exceeds(길이 초과)는 translation(bleu)의 지표입니다.
+| TYPE        | Model                               | BLEU | SBLEU | Duplicate | Length Exceeds |
+| ----------- | :---------------------------------- | ---- | ----- | --------- | -------------- |
+| HuggingFace | facebook/nllb-200-distilled-1.3B    | 0.26 | 0.30  | 1         | 3              |
+| HuggingFace | jbochi/madlad400-10b-mt             | 0.29 | 0.38  | 3         | 6              |
+| HuggingFace | Unbabel/TowerInstruct-7B-v0.1       | 0.32 | 0.39  | 1         | 9              |
+| HuggingFace | squarelike/Gugugo-koen-7B-V1.1      | 0.32 | 0.36  | 1         | 3              |
+| HuggingFace | maywell/Synatra-7B-v0.3-Translation | 0.35 | 0.41  | 1         | 2              |
+| Cloud       | deepl                               | 0.39 | 0.45  | 0         | 1              |
+| Cloud       | azure                               | 0.40 | 0.49  | 0         | 3              |
+| Cloud       | google                              | 0.40 | 0.49  | 0         | 2              |
+| Cloud       | papago                              | 0.43 | 0.51  | 0         | 3              |
+| HuggingFace | davidkim205/iris-7b (**ours**)      | 0.40 | 0.43  | 0         | 3              |
+* SBLEU: Self-evaluation BLEU
+### BLEU by source
+분야별로 테스트 데이터셋 번역 품질을 평가한 결과입니다. iris-7b 모델의 평가는 아래와 같습니다.
+- 모든 분야에서 기존 번역모델을 압도하는 성능
+- 많은 분야에서 클라우드 번역과 비슷하거나, 더 나은 성능
+- 과학 분야, 신조어 분야의 번역 품질이 매우 우수
+![plot-bleu-by-src.png](assets%2Fplot-bleu-by-src.png)
+| Type        | Model                               | Average | MTPE | techsci2 | expertise | humanities | sharegpt-deepl-ko-translation | MT-new-corpus | socialsci | korean-parallel-corpora | parallel-translation | food | techsci | para_pat | speechtype-based-machine-translation | koopus100 | basicsci | broadcast-content | patent | colloquial |
+| ----------- | :---------------------------------- | ------- | ---: | -------: | --------: | ---------: | ----------------------------: | ------------: | --------: | ----------------------: | -------------------: | ---: | ------: | -------: | -----------------------------------: | --------: | -------: | ----------------: | -----: | ---------: |
+| HuggingFace | facebook/nllb-200-distilled-1.3B    | 0.26    | 0.44 |     0.28 |      0.16 |       0.23 |                          0.44 |          0.34 |      0.27 |                    0.10 |                 0.23 | 0.37 |    0.28 |     0.19 |                                 0.29 |      0.23 |     0.15 |              0.33 |   0.09 |       0.29 |
+| HuggingFace | jbochi/madlad400-10b-mt             | 0.29    | 0.45 |     0.29 |      0.20 |       0.29 |                          0.40 |          0.36 |      0.39 |                    0.12 |                 0.22 | 0.46 |    0.30 |     0.23 |                                 0.48 |      0.23 |     0.19 |              0.36 |   0.01 |       0.33 |
+| HuggingFace | Unbabel/TowerInstruct-7B-v0.1       | 0.32    | 0.46 |     0.33 |      0.28 |       0.27 |                          0.30 |          0.39 |      0.37 |                    0.14 |                 0.35 | 0.47 |    0.39 |     0.29 |                                 0.41 |      0.21 |     0.22 |              0.36 |   0.15 |       0.33 |
+| HuggingFace | squarelike/Gugugo-koen-7B-V1.1      | 0.32    | 0.46 |     0.27 |      0.28 |       0.22 |                          0.66 |          0.33 |      0.36 |                    0.10 |                 0.29 | 0.45 |    0.34 |     0.24 |                                 0.42 |      0.22 |     0.23 |              0.42 |   0.20 |       0.26 |
+| HuggingFace | maywell/Synatra-7B-v0.3-Translation | 0.35    | 0.43 |     0.36 |      0.27 |       0.23 |                          0.70 |          0.37 |      0.31 |                    0.13 |                 0.34 | 0.52 |    0.35 |     0.29 |                                 0.44 |      0.21 |     0.24 |              0.46 |   0.28 |       0.37 |
+| Cloud       | deepl                               | 0.39    | 0.59 |     0.33 |      0.31 |       0.32 |                          0.70 |          0.48 |      0.38 |                    0.14 |                 0.38 | 0.55 |    0.41 |     0.33 |                                 0.48 |      0.24 |     0.28 |              0.42 |   0.37 |       0.36 |
+| Cloud       | azure                               | 0.40    | 0.57 |     0.36 |      0.35 |       0.29 |                          0.63 |          0.46 |      0.39 |                    0.16 |                 0.38 | 0.56 |    0.39 |     0.33 |                                 0.54 |      0.22 |     0.29 |              0.52 |   0.35 |       0.41 |
+| Cloud       | google                              | 0.40    | 0.62 |     0.39 |      0.32 |       0.32 |                          0.60 |          0.45 |      0.45 |                    0.14 |                 0.38 | 0.59 |    0.43 |     0.34 |                                 0.45 |      0.22 |     0.28 |              0.47 |   0.39 |       0.36 |
+| Cloud       | papago                              | 0.43    | 0.56 |     0.43 |      0.41 |       0.30 |                          0.55 |          0.58 |      0.56 |                    0.16 |                 0.37 | 0.67 |    0.52 |     0.35 |                                 0.53 |      0.21 |     0.35 |              0.45 |   0.37 |       0.46 |
+| HuggingFace | davidkim205/iris-7b (**ours**)      | 0.40    | 0.49 |     0.37 |      0.34 |       0.31 |                          0.72 |          0.48 |      0.43 |                    0.11 |                 0.33 | 0.56 |    0.46 |     0.34 |                                 0.43 |      0.20 |     0.30 |              0.47 |   0.41 |       0.40 |
+### BLEU by sentence length
+텍스트의 길이에 따라 4구간으로 데이터를 50개씩 샘플링하여 번역한 평균 점수입니다. 평가에 사용된 데이터셋은 다음과 같습니다.
+- `data/komt-dataset-100.jsonl`
+- `data/komt-dataset-500.jsonl`
+- `data/komt-dataset-1000.jsonl`
+- `data/komt-dataset-1500.jsonl`
+번역 및 bleu score 결과는 `results_length/`아래에 저장되어 있습니다.
+놀랍게도, iris-7b 모델은 모든 구간에서 대부분의 클라우드 번역보다 높은 성능을 보입니다.
+- ~100: (0, 100]
+- ~500: (100, 500]
+- ~1000: (500, 1000]
+- ~1500: (1000, 1500]
+![plot-bleu-by-sentence-length.png](assets%2Fplot-bleu-by-sentence-length.png)
+| Type        | Model                               | Average | ~100(50) | ~500(50) | ~1000(50) | ~1500(50) |
+| ----------- | :---------------------------------- | ------- | -------: | -------: | --------: | --------: |
+| HuggingFace | facebook/nllb-200-distilled-1.3B    | 0.24    |     0.31 |     0.31 |      0.22 |      0.13 |
+| HuggingFace | jbochi/madlad400-10b-mt             | 0.22    |     0.35 |     0.37 |      0.08 |      0.10 |
+| HuggingFace | Unbabel/TowerInstruct-7B-v0.1       | 0.32    |     0.41 |     0.31 |      0.24 |      0.32 |
+| HuggingFace | squarelike/Gugugo-koen-7B-V1.1      | 0.45    |     0.37 |     0.48 |      0.52 |      0.43 |
+| HuggingFace | maywell/Synatra-7B-v0.3-Translation | 0.50    |     0.41 |     0.57 |      0.57 |      0.51 |
+| Cloud       | deepl                               | 0.53    |     0.44 |     0.56 |      0.64 |      0.50 |
+| Cloud       | azure                               | 0.47    |     0.46 |     0.47 |      0.52 |      0.44 |
+| Cloud       | google                              | 0.51    |     0.50 |     0.49 |      0.54 |      0.51 |
+| Cloud       | papago                              | 0.46    |     0.50 |     0.46 |      0.43 |      0.45 |
+| HuggingFace | davidkim205/iris-7b (**ours**)      | 0.56    |     0.51 |     0.58 |      0.62 |      0.54 |
+## test dataset info
+테스트 데이터셋은 18가지 분야의 데이터 10개로, 총 180개로 이루어져 있습니다.
+`koopus100` 데이터셋은 길이가 짧고 원문과 번역문이 일치하지 않는 데이터가 존재하여 품질이 낮습니다.
+```
+text: All right
+translation: 별로 그럴 기분 아니야 - I'm not in the mood.
+text: Do you have a fever?
+translation: 뭐라고 했어?
+```
+`korean-parallel-corpora` 데이터셋은 번역문에 한영이 혼용되거나, 잘못 번역되어 품질이 낮습니다.
+```
+text: S. Korea mulls missile defense system 한국, 자체적 미사일 방어체계 수립 검토     2007.03
+translation: South Korea maintains a mandatory draft system under which all able-bodied men over 20 must serve in the military for 24 to 27 months.
+text: A United States intelligence agency has been collecting data on the phone calls of tens of millions of Americans, a report in USA Today has alleged.
+translation: NSA collects Americans’phone clall data미 국가안보국, 미국민 통화 내용 수집2006.07
+text: I see the guy as more like John Wayne, which is to say I don't like his politics but he's endearing in a strange, goofy, awkward way, and he did capture the imagination of the country,\" he said.
+translation: 베트남전에 참전했던 스톤 감독은 비판적으로 호평을 받고 정치적인 성향이 많은 영화를 제작한 것으로 유명하다.
+text: The Sahara is advancing into Ghana and Nigeria at the rate of 3,510 square kilometers per year.
+translation: 카자흐스탄 또한 사막화로 인해 1980년 이후 농경지의 50%가 사라졌으며 사하라 사막은 매년 3510㎢씩 커져가며 가나와 나이지리아를 위협하고 있다.
+```
+아래 표에는 각 src의 비율과 개수, 설명이 정리되어 있습니다.
+| src                                        | ratio | description                                                  |
+| ------------------------------------------ | ----- | ------------------------------------------------------------ |
+| aihub-MTPE                                 | 5.56% | 기계번역 품질 사후검증 데이터셋                              |
+| aihub-techsci2                             | 5.56% | ICT, 전기/전자 등 기술과학 분야 한영 번역 데이터셋           |
+| aihub-expertise                            | 5.56% | 의료, 금융, 스포츠 등 전문분야 한영 번역 데이터셋            |
+| aihub-humanities                           | 5.56% | 인문학 분야 한영 번역 데이터셋                               |
+| sharegpt-deepl-ko-translation              | 5.56% | shareGPT 데이터셋을 질답 형식에서 한영 번역 형식으로 변환한 데이터셋 |
+| aihub-MT-new-corpus                        | 5.56% | 기계 번역 앱 구축용 한영 번역 데이터셋                       |
+| aihub-socialsci                            | 5.56% | 법률, 교육, 경제 등 사회과학 분야 한영 번역 데이터셋         |
+| korean-parallel-corpora                    | 5.56% | 한영 번역 병렬 데이터셋                                      |
+| aihub-parallel-translation                 | 5.56% | 발화 유형 및 분야별 한영 번역 데이터셋                       |
+| aihub-food                                 | 5.56% | 식품 분야 영한 번역 데이터셋                                 |
+| aihub-techsci                              | 5.56% | ICT, 전기/전자 등 기술과학 분야 한영 번역 데이터셋           |
+| para_pat                                   | 5.56% | ParaPat 데이터셋의 영어-한국어 subset                        |
+| aihub-speechtype-based-machine-translation | 5.56% | 발화 유형별 영한 번역 데이터셋                               |
+| koopus100                                  | 5.56% | OPUS-100 데이터셋의 영어-한국어 subset                       |
+| aihub-basicsci                             | 5.56% | 수학, 물리학 등 기초과학 분야 한영 번역 데이터셋             |
+| aihub-broadcast-content                    | 5.56% | 방송 콘텐츠 분야 한영 번역 데이터셋                          |
+| aihub-patent                               | 5.56% | 특허명세서 영한 번역 데이터셋                                |
+| aihub-colloquial                           | 5.56% | 신조어, 약어 등을 포함하는 구어체 한영 번역 데이터셋         |

assets/iris-icon.jpeg ADDED Viewed

assets/plot-bleu-by-sentence-length.png ADDED Viewed

assets/plot-bleu-by-src.png ADDED Viewed

assets/plot-bleu.png ADDED Viewed

create_table.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import pandas as pd
+from collections import defaultdict
+from evaluate import is_duplicated, is_length_exceed, get_average
+from utils.decorate import cloud_model, decorate_model_name
+from utils.file_handler import load_json
+# results_bleu/에서 집계
+def aggregate_bleu(json_data, bleu_table, src_table, length_table):
+    duplicate_count = defaultdict(int)
+    length_exceeds_count = defaultdict(int)
+    for data in json_data:
+        src_table[data["model"]]["Average"].append(data["bleu"])
+        src_table[data["model"]][data["src"]].append(data["bleu"])
+        if is_duplicated(data["generation"]):
+            duplicate_count[data["model"]] += 1
+        if is_length_exceed(data["reference"], data["generation"]):
+            length_exceeds_count[data["model"]] += 1
+    for model, row in src_table.items():
+        src_table[model] = dict((attr, get_average(val)) for attr, val in row.items())
+    # bleu, duplicate, length exceeds 추가
+    for model in src_table:
+        bleu_table[model]["Average"].append(src_table[model]["Average"])
+        bleu_table[model]["Bleu"] = src_table[model]["Average"]
+        bleu_table[model]["Duplicate"] = duplicate_count[model]
+        bleu_table[model]["Length Exceeds"] = length_exceeds_count[model]
+# results_self/에서 집계
+def aggregate_self(json_data, bleu_table, src_table, length_table):
+    sbleu_score = defaultdict(list)
+    for data in json_data:
+        sbleu_score[data["model"]].append(data["bleu"])
+    # sbleu 추가
+    for model in sbleu_score:
+        bleu_table[model]["SBleu"] = get_average(sbleu_score[model])
+        bleu_table[model]["Average"].append(bleu_table[model]["SBleu"])
+# results_length/에서 집계
+def aggregate_length(json_data, bleu_table, src_table, length_table):
+    for data in json_data:
+        length_table[data["model"]]["Average"].append(data["bleu"])
+        length_table[data["model"]][f"~{data['length']}"].append(data["bleu"])
+    for model, row in length_table.items():
+        length_table[model] = dict(
+            (attr, get_average(val)) for attr, val in row.items()
+        )
+    # bleu-sl 추가
+    for model in length_table:
+        bleu_table[model]["Bleu-SL"] = length_table[model]["Average"]
+        bleu_table[model]["Average"].append(bleu_table[model]["Bleu-SL"])
+        bleu_table[model]["Average"] = get_average(bleu_table[model]["Average"])
+def create():
+    results_dirs = {
+        "results_bleu/": aggregate_bleu,
+        "results_self/": aggregate_self,
+        "results_length/": aggregate_length,
+    }
+    bleu_table = defaultdict(lambda: defaultdict(list))
+    src_table = defaultdict(lambda: defaultdict(list))
+    length_table = defaultdict(lambda: defaultdict(list))
+    tables = {
+        "bleu_and_sbleu": bleu_table,
+        "bleu_by_src": src_table,
+        "bleu_by_length": length_table,
+    }
+    # bleu score 집계
+    for dir, aggregate in results_dirs.items():
+        json_data = []
+        for filename in os.listdir(dir):
+            if not filename.endswith(".jsonl"):
+                continue
+            file_path = os.path.join(dir, filename)
+            json_data += load_json(file_path)
+        aggregate(json_data, *tables.values())
+    # table dataframe 생성
+    for table in tables:
+        df = pd.DataFrame.from_dict(tables[table], orient="index")
+        if table == "bleu_and_sbleu":
+            df["Duplicate"] = df.pop("Duplicate")
+            df["Length Exceeds"] = df.pop("Length Exceeds")
+        df.reset_index(inplace=True, names="Model")
+        df["Model"] = list(map(decorate_model_name, df["Model"]))
+        df.insert(
+            0,
+            "Type",
+            list(
+                map(
+                    lambda x: "Cloud" if x in cloud_model else "HuggingFace",
+                    df["Model"],
+                )
+            ),
+        )
+        df.insert(
+            0, "Rank", df["Average"].rank(method="min", ascending=False).astype(int)
+        )
+        df = df.sort_values(by="Rank")
+        tables[table] = df
+    return tables.values()
+def main():
+    tables = list(create())
+    print("# dataframe")
+    print(tables[0], "\n\n")
+    print("# markdown")
+    print(tables[0].to_markdown(index=False))
+if __name__ == "__main__":
+    main()

data/komt-1810k-test.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

data/komt-dataset-length.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

evaluate.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import os
+import json
+from collections import defaultdict
+from nltk.translate.bleu_score import corpus_bleu
+import statistics
+import argparse
+import json
+import os
+import re
+from collections import Counter
+def is_duplicated(text, top_k=10, min_word_len=0):
+    words = re.findall(r'\b\w+\b', text)
+    word_freq = Counter(words)
+    # 단어 최소 글자수 제한
+    if min_word_len > 0:
+        for word, count in list(word_freq.items()):
+            if len(word) <= min_word_len:
+                del word_freq[word]
+    if len(word_freq) == 0:
+        return False
+    if len(word_freq) == 1 and word_freq.most_common(1)[0][1] > 5:
+        return word_freq.most_common(1)
+    top_items = word_freq.most_common(top_k)
+    frequencies = [frequency for item, frequency in top_items]
+    mean_frequency = sum(frequencies) / len(frequencies)
+    prev_frequency = 0
+    index = 0
+    if mean_frequency < 5:
+        return False
+    for item, frequency in top_items:
+        if (prev_frequency - frequency) > mean_frequency:
+            if index <= 1:
+                return False
+            # print(prev_frequency, frequency, mean_frequency, item)
+            return top_items
+        prev_frequency = frequency
+        index += 1
+    return False
+def is_length_exceed(reference, generation, min_ratio=0.2, max_ratio=2):
+    return not min_ratio <= (len(generation) / len(reference)) <= max_ratio
+def get_average(a):
+    if isinstance(a, list):
+        return round(sum(a) / len(a), 2)
+    return a
+def main():
+    parser = argparse.ArgumentParser("argument")
+    parser.add_argument(
+        "directory",
+        type=str,
+        help="input_file",
+    )
+    parser.add_argument('--detail', action='store_true', help='detail')
+    args = parser.parse_args()
+    # 각 파일별로 src에 대한 bleu 점수를 저장할 딕셔너리
+    file_src_bleu_scores = defaultdict(list)
+    file_length_ratio = defaultdict(list)
+    file_duplicated = defaultdict(list)
+    file_duplicated_detail = defaultdict(list)
+    # 디렉토리 내의 모든 파일에 대해 반복
+    for filename in os.listdir(args.directory):
+        if filename.endswith('.jsonl'):  # JSONL 파일인 경우에만 처리
+            file_path = os.path.join(args.directory, filename)
+            with open(file_path, 'r', encoding='utf-8') as file:
+                for index, line in enumerate(file):
+                    data = json.loads(line)
+                    src = data['src']
+                    bleu_score = data['bleu']
+                    file_src_bleu_scores[filename].append(bleu_score)
+                    # check_length
+                    reference_length = len(data['reference'])
+                    generation_length = len(data['generation'])
+                    file_length_ratio[filename].append(round(generation_length / reference_length, 1))
+                    # check duplication
+                    word_count = is_duplicated(data['generation'])
+                    file_duplicated[filename].append(0 if word_count is False else 1)
+                    if word_count != False:
+                        file_duplicated_detail[filename].append({'index':index, 'count':word_count,'generation':data['generation']})
+    sorted_items = sorted(file_src_bleu_scores.items(), key=lambda x: statistics.mean(x[1]))
+    # 각 파일별로 src에 대한 bleu 평균 계산
+    print('bleu scores')
+    for filename, src_bleu_scores in sorted_items:
+        avg_bleu = sum(src_bleu_scores) / len(src_bleu_scores)
+        length_raio=[]
+        cur_length_ratio = file_length_ratio[filename]
+        ratio_mean = round(statistics.mean(cur_length_ratio), 1)
+        for index, ratio in enumerate(cur_length_ratio):
+            if ratio < 0.2 or ratio > 2.0:
+                length_raio.append((index,ratio))
+        print(f"{filename}: {avg_bleu:.2f}, out_of_range_count={len(length_raio)}, duplicate={sum(file_duplicated[filename])}")
+        if args.detail:
+            print(f'\t error length:{length_raio}')
+        if args.detail:
+            print(f"\t duplication")
+            for info in file_duplicated_detail[filename]:
+                print('\t\t', info)
+if __name__ == "__main__":
+    main()

leaderboard.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+import pandas as pd
+from create_table import create
+# 테이블 업데이트
+def refresh():
+    table1, table2, table3 = create()
+    return table1, table2, table3
+with gr.Blocks() as demo:
+    # 테이블 초기화
+    table1, table2, table3 = create()
+    with gr.Row():
+        gr.Markdown(
+            """
+            # 🏆 Iris Translation Leaderboard
+            Iris Translation is a project designed to evaluate Korean-to-English translation models
+            ## github
+            - https://github.com/davidkim205/translation
+            ## How to add model
+            If you want to add a new model, please write the model name and template in the [github issue](https://github.com/davidkim205/translation/issues).
+            ## evaluation criteria
+            - **Bleu**: average bleu score
+            - **SBleu**: Self-Bleu(double translation evaluation)
+            - **Bleu-SL**: bleu by sentence length
+            - **Duplicate**: count of repetitive sentence generation
+            - **Length Exceeds**: count of mismatches in translated sentence lengths exceeding the threshold
+            """
+        )
+    with gr.Row():
+        with gr.Tab("bleu and sbleu"):
+            with gr.Group():
+                table1 = gr.Dataframe(value=table1, datatype="html")
+                with gr.Accordion("Show Chart", open=False):
+                    gr.Image(
+                        "assets/plot-bleu.png",
+                        show_download_button=False,
+                        container=False,
+                    )
+        with gr.Tab("bleu by src"):
+            with gr.Group():
+                table2 = gr.Dataframe(value=table2, datatype="html")
+                with gr.Accordion("Show Chart", open=False):
+                    gr.Image(
+                        "assets/plot-bleu-by-src.png",
+                        show_download_button=False,
+                        container=False,
+                    )
+        with gr.Tab("bleu by sentence length"):
+            with gr.Group():
+                table3 = gr.Dataframe(value=table3, datatype="html")
+                with gr.Accordion("Show Chart", open=False):
+                    gr.Image(
+                        "assets/plot-bleu-by-sentence-length.png",
+                        show_download_button=False,
+                        container=False,
+                    )
+    refresh_btn = gr.Button(value="Refresh")
+    refresh_btn.click(refresh, outputs=[table1, table2, table3])
+demo.launch(server_name='0.0.0.0', share=True)

model.py ADDED Viewed

	@@ -0,0 +1,175 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
+import torch
+from utils.simple_bleu import simple_score
+import torch
+templates = {
+    'gemma': {
+        'stop_words': ['<eos>', ''],
+        'ko2en': '<bos><start_of_turn>user\n다음 문장을 영어로 번역하세요.{0}<end_of_turn>\n<start_of_turn>model:',
+        'en2ko': '<bos><start_of_turn>user\n다음 문장을 한글로 번역하세요.{0}<end_of_turn>\n<start_of_turn>model:',
+        'trim_keywords': ['<eos>', ''],
+    },
+    'openchat': {
+        'stop_words': ['<eos>', '<|end_of_turn|>'],
+        'ko2en': '<s> GPT4 Correct User: 다음 문장을 영어로 번역하세요. {0}<|end_of_turn|> GPT4 Correct Assistant:',
+        'en2ko': '<s> GPT4 Correct User: 다음 문장을 한글로 번역하세요. {0}<|end_of_turn|> GPT4 Correct Assistant:',
+        'trim_keywords': ['<eos>', '<|end_of_turn|>'],
+    },
+    'qwen': {
+        'stop_words': ['<eos>', '<|im_end|>'],
+        'ko2en': '<|im_start|>system \n You are a helpful assistant<|im_end|>\n <|im_start|>다음 문장을 영어로 번역하세요. \n {0}<|im_end|>\n<|im_start|>assistant\n',
+        'ko2en': '<|im_start|>system \n You are a helpful assistant<|im_end|>\n <|im_start|>다음 문장을 한글로 번역하세요. \n {0}<|im_end|>\n<|im_start|>assistant\n',
+        'trim_keywords': ['<eos>', '<|im_end|>'],
+    },
+    #
+    # <|im_start|>assistant
+    # "Do you exist?"<|im_end|>
+    # ]
+    'davidkim205/iris-7b': {
+        'stop_words': ['</s>'],
+        'ko2en': '[INST] 다음 문장을 영어로 번역하세요.{0} [/INST]',
+        'en2ko': '[INST] 다음 문장을 한글로 번역하세요.{0} [/INST]',
+        'trim_keywords': ['</s>'],
+    },
+    'squarelike/Gugugo-koen-7B-V1.1': {
+        'stop_words': ['</s>', '</끝>'],
+        'ko2en': '### 한국어: {0}</끝>\n### 영어:',
+        'en2ko': "### 영어: {0}</끝>\n### 한국어:",
+        'trim_keywords': ['</s>', '</끝>'],
+    },
+    'maywell/Synatra-7B-v0.3-Translation': {
+        'stop_words': ['</s>', '</끝>', '<|im_end|>'],
+        'ko2en': '<|im_start|>system\n주어진 문장을 영어로 번역해라.<|im_end|>\n<|im_start|>user\n{0}<|im_end|>\n<|im_start|>assistant',
+        'en2ko': '<|im_start|>system\n주어진 문장을 한국어로 번역해라.<|im_end|>\n<|im_start|>user\n{0}<|im_end|>\n<|im_start|>assistant',
+        'trim_keywords': ['<|im_end|>'],
+    },
+    'Unbabel/TowerInstruct-7B-v0.1': {
+        'stop_words': ['</s>', '</끝>', '<|im_end|>'],
+        'ko2en': '<|im_start|>user\nTranslate the following text from English into Korean.\nKorean: {0}\nEnglish:<|im_end|>\n<|im_start|>assistant',
+        'en2ko': '<|im_start|>user\nTranslate the following text from Korean into English.\nEnglish: {0}\nKorean:<|im_end|>\n<|im_start|>assistant',
+        'trim_keywords': ['<|im_end|>'],
+    },
+}
+model_info = {'model': None, 'tokenizer': None, 'stopping_criteria': None}
+class LocalStoppingCriteria(StoppingCriteria):
+    def __init__(self, tokenizer, stop_words=[]):
+        super().__init__()
+        stops = [tokenizer(stop_word, return_tensors='pt', add_special_tokens=False)['input_ids'].squeeze() for
+                 stop_word in stop_words]
+        print('stop_words', stop_words)
+        print('stop_words_ids', stops)
+        self.stop_words = stop_words
+        self.stops = [stop.cuda() for stop in stops]
+        self.tokenizer = tokenizer
+    def _compare_token(self, input_ids):
+        for stop in self.stops:
+            if len(stop.size()) != 1:
+                continue
+            stop_len = len(stop)
+            if torch.all((stop == input_ids[0][-stop_len:])).item():
+                return True
+        return False
+    def _compare_decode(self, input_ids):
+        input_str = self.tokenizer.decode(input_ids[0])
+        for stop_word in self.stop_words:
+            if input_str.endswith(stop_word):
+                return True
+        return False
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        return self._compare_decode(input_ids)
+def trim_sentence(sentence, keywords):
+    for keyword in keywords:
+        if keyword in sentence:
+            # 키워드를 찾은 경우, 해당 인덱스를 기준으로 문장을 자름
+            index = sentence.find(keyword)
+            trimmed_sentence = sentence[:index]
+            sentence = trimmed_sentence.strip()  # 좌우 공백 제거 후 반환
+    return sentence
+def load_model(path, template_name=None):
+    global model_info
+    print('load_model', path)
+    if template_name == None:
+        template_name = path
+    if templates.get(template_name) == None:
+        template_name = 'davidkim205/iris-7b'
+    model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='auto')
+    tokenizer = AutoTokenizer.from_pretrained(path)
+    model_info['model'] = model
+    model_info['tokenizer'] = tokenizer
+    model_info['template'] = templates[template_name]
+    stop_words = templates[template_name]['stop_words']
+    stopping_criteria = StoppingCriteriaList([LocalStoppingCriteria(tokenizer=tokenizer, stop_words=stop_words)])
+    model_info['stopping_criteria'] = stopping_criteria
+def generate(prompt):
+    global model_info
+    if model_info['model'] == None:
+        print('model is null, load the model first.')
+        return ''
+    model = model_info['model']
+    tokenizer = model_info['tokenizer']
+    stopping_criteria = model_info['stopping_criteria']
+    encoding = tokenizer(
+        prompt,
+        return_tensors='pt',
+        return_token_type_ids=False
+    ).to("cuda")
+    gen_tokens = model.generate(
+        **encoding,
+        max_new_tokens=2048,
+        temperature=1.0,
+        num_beams=5,
+        stopping_criteria=stopping_criteria
+    )
+    prompt_end_size = encoding.input_ids.shape[1]
+    result = tokenizer.decode(gen_tokens[0, prompt_end_size:])
+    result = trim_sentence(result, model_info['template']['trim_keywords'])
+    return result
+def translate_ko2en(text):
+    global model_info
+    prompt = model_info['template']['ko2en'].format(text)
+    return generate(prompt)
+def translate_en2ko(text):
+    global model_info
+    prompt = model_info['template']['en2ko'].format(text)
+    return generate(prompt)
+def main():
+    load_model("davidkim205/iris-7b")
+    # load_model("squarelike/Gugugo-koen-7B-V1.1")
+    # load_model("maywell/Synatra-7B-v0.3-Translation")
+    # load_model("Unbabel/TowerInstruct-7B-v0.1")
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('------------------')
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+if __name__ == "__main__":
+    main()

models/TowerInstruct.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+from transformers import pipeline
+from utils.simple_bleu import simple_score
+pipe = pipeline("text-generation", model="Unbabel/TowerInstruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
+def translate_ko2en(text):
+    messages = [
+        {"role": "user", "content": f"Translate the following text from Korean into English.\n: Korean:{text}\nEnglish:"},
+    ]
+    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    outputs = pipe(prompt, max_new_tokens=2048, do_sample=False)
+    result = outputs[0]["generated_text"]
+    result = result.split('<|im_start|>assistant')[1]
+    result = result.replace('\n:', '')
+    result = result.lstrip('\n')
+    result = result.lstrip(':')
+    return result
+def translate_en2ko(text):
+    messages = [
+        {"role": "user",
+         "content": f"Translate the following text from English into Korean.\nEnglish: {text} \nKorean:"},
+    ]
+    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    outputs = pipe(prompt, max_new_tokens=2048, do_sample=False)
+    result = outputs[0]["generated_text"]
+    result = result.split('<|im_start|>assistant')[1]
+    result = result.replace('\n:', '')
+    result = result.lstrip('\n')
+    result = result.lstrip(':')
+    return result
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+if __name__ == "__main__":
+    main()

models/gemma.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
+import torch
+from utils.simple_bleu import simple_score
+import torch
+repo = "davidkim205/iris-7b"
+model = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch.bfloat16, device_map='auto')
+tokenizer = AutoTokenizer.from_pretrained(repo)
+# model = None
+# tokenizer = None
+class StoppingCriteriaSub(StoppingCriteria):
+    def __init__(self, stops=[], encounters=1):
+        super().__init__()
+        self.stops = [stop for stop in stops]
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        for stop in self.stops:
+            if torch.all((stop == input_ids[0][-len(stop):])).item():
+                return True
+        return False
+stop_words_ids = torch.tensor(
+    [[829, 45107, 29958], [1533, 45107, 29958], [829, 45107, 29958], [21106, 45107, 29958]]).to("cuda")
+stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
+def load_model(path):
+    global model, tokenizer
+    print('load_model', path)
+    model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='auto')
+    tokenizer = AutoTokenizer.from_pretrained(path)
+def generate(prompt):
+    gened = model.generate(
+        **tokenizer(
+            prompt,
+            return_tensors='pt',
+            return_token_type_ids=False
+        ).to("cuda"),
+        max_new_tokens=2048,
+        temperature=0.3,
+        num_beams=5,
+        stopping_criteria=stopping_criteria
+    )
+    result = tokenizer.decode(gened[0][1:]).replace(prompt + " ", "").replace("</끝>", "")
+    result = result.replace('</s>', '')
+    result = result.replace('### 한국어: ', '')
+    result = result.replace('### 영어: ', '')
+    return result
+def translate_ko2en(text):
+    prompt = f"[INST] 다음 문장을 영어로 번역하세요.{text} [/INST]"
+    return generate(prompt)
+def translate_en2ko(text):
+    prompt = f"[INST] 다음 문장을 한글로 번역하세요.{text} [/INST]"
+    return generate(prompt)
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+    """
+    >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
+en_text We have 30 million files and 2.5 billion tokens. We approach Phi1.5's dataset composition, but we use the open-source model, Mixtral 8x7B, and we are licensed according to the Apache2.0 license.
+ko_text 3,000만 개의 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 집합에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스를 받았습니다.
+score 0.6154733407407874
+    """
+if __name__ == "__main__":
+    main()

models/gugugo.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
+import torch
+from utils.simple_bleu import simple_score
+import torch
+repo = "squarelike/Gugugo-koen-7B-V1.1"
+model = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch.bfloat16, device_map='auto')
+tokenizer = AutoTokenizer.from_pretrained(repo)
+class StoppingCriteriaSub(StoppingCriteria):
+    def __init__(self, stops=[], encounters=1):
+        super().__init__()
+        self.stops = [stop for stop in stops]
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        for stop in self.stops:
+            if torch.all((stop == input_ids[0][-len(stop):])).item():
+                return True
+        return False
+stop_words_ids = torch.tensor(
+    [[829, 45107, 29958], [1533, 45107, 29958], [829, 45107, 29958], [21106, 45107, 29958]]).to("cuda")
+stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
+def generate(prompt):
+    gened = model.generate(
+        **tokenizer(
+            prompt,
+            return_tensors='pt',
+            return_token_type_ids=False
+        ).to("cuda"),
+        max_new_tokens=2048,
+        temperature=0.1,
+        num_beams=5,
+        stopping_criteria=stopping_criteria
+    )
+    result = tokenizer.decode(gened[0][1:]).replace(prompt + " ", "").replace("</끝>", "")
+    result = result.replace('</s>', '')
+    result = result.replace('### 한국어: ', '')
+    result = result.replace('### 영어: ', '')
+    return result
+def translate_ko2en(text):
+    prompt = f"### 한국어: {text}</끝>\n### 영어:"
+    return generate(prompt)
+def translate_en2ko(text):
+    prompt = f"### 영어: {text}</끝>\n### 한국어:"
+    return generate(prompt)
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+    """
+    >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
+en_text We have 30 million files and 2.5 billion tokens. We approach Phi1.5's dataset composition, but we use the open-source model, Mixtral 8x7B, and we are licensed according to the Apache2.0 license.
+ko_text 3,000만 개의 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 집합에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스를 받았습니다.
+score 0.6154733407407874
+    """
+if __name__ == "__main__":
+    main()

models/iris_7b.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
+import torch
+from utils.simple_bleu import simple_score
+import torch
+repo = "davidkim205/iris-7b"
+model = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch.bfloat16, device_map='auto')
+tokenizer = AutoTokenizer.from_pretrained(repo)
+# model = None
+# tokenizer = None
+class StoppingCriteriaSub(StoppingCriteria):
+    def __init__(self, stops=[], encounters=1):
+        super().__init__()
+        self.stops = [stop for stop in stops]
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+        for stop in self.stops:
+            if torch.all((stop == input_ids[0][-len(stop):])).item():
+                return True
+        return False
+stop_words_ids = torch.tensor(
+    [[829, 45107, 29958], [1533, 45107, 29958], [829, 45107, 29958], [21106, 45107, 29958]]).to("cuda")
+stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
+def load_model(path):
+    global model, tokenizer
+    print('load_model', path)
+    model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='auto')
+    tokenizer = AutoTokenizer.from_pretrained(path)
+def generate(prompt):
+    gened = model.generate(
+        **tokenizer(
+            prompt,
+            return_tensors='pt',
+            return_token_type_ids=False
+        ).to("cuda"),
+        max_new_tokens=2048,
+        temperature=0.3,
+        num_beams=5,
+        stopping_criteria=stopping_criteria
+    )
+    result = tokenizer.decode(gened[0][1:]).replace(prompt + " ", "").replace("</끝>", "")
+    result = result.replace('</s>', '')
+    result = result.replace('### 한국어: ', '')
+    result = result.replace('### 영어: ', '')
+    return result
+def translate_ko2en(text):
+    prompt = f"[INST] 다음 문장을 영어로 번역하세요.{text} [/INST]"
+    return generate(prompt)
+def translate_en2ko(text):
+    prompt = f"[INST] 다음 문장을 한글로 번역하세요.{text} [/INST]"
+    return generate(prompt)
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+    """
+    >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
+en_text We have 30 million files and 2.5 billion tokens. We approach Phi1.5's dataset composition, but we use the open-source model, Mixtral 8x7B, and we are licensed according to the Apache2.0 license.
+ko_text 3,000만 개의 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 집합에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스를 받았습니다.
+score 0.6154733407407874
+    """
+if __name__ == "__main__":
+    main()

models/madlad400.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from transformers import T5ForConditionalGeneration, T5Tokenizer
+from utils.simple_bleu import simple_score
+import torch
+model_name = 'jbochi/madlad400-10b-mt'
+model = T5ForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = T5Tokenizer.from_pretrained(model_name)
+def translate_ko2en(text):
+    text = f"<2en> {text}"
+    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
+    outputs = model.generate(input_ids=input_ids, max_new_tokens=2048)
+    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return result
+def translate_en2ko(text):
+    text = f"<2ko> {text}"
+    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
+    outputs = model.generate(input_ids=input_ids, max_new_tokens=2048)
+    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return result
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+if __name__ == "__main__":
+    main()

models/mbart50.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+import torch
+from utils.simple_bleu import simple_score
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+def translate_ko2en(text):
+    tokenizer.src_lang = "ko_KR"
+    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
+    outputs = model.generate(input_ids=input_ids, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+    outputs = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return outputs
+def translate_en2ko(text):
+    tokenizer.src_lang = "en_XX"
+    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
+    outputs = model.generate(input_ids=input_ids, forced_bos_token_id=tokenizer.lang_code_to_id["ko_KR"], max_new_tokens=2048)
+    outputs = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return outputs
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+    """
+    >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
+en_text It has over 30 million files and 2.5 billion tokens, accesses the data set configuration of Phi1.5, but uses an open-source model, Mixtral 8x7B, and is licensed under the Apache 2.0 license.
+ko_text 30만개의 파일과 2.5억개의 토큰을 가지고 있고, Phi1.5의 데이터 세트 configuration에 접근하지만, 오픈소스 모델인 Mixtral 8x7B를 사용하고, Apache 2.0 라이센스 아래 licenc를 가지고 있습니다.
+score 0.14724623770949022
+    """
+if __name__ == "__main__":
+    main()

models/nllb200.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from utils.simple_bleu import simple_score
+import torch
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-1.3B", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-1.3B")
+def translate_ko2en(text):
+    batched_input = [text]
+    inputs = tokenizer(batched_input, return_tensors="pt", padding=True)
+    translated_tokens = model.generate(
+        **inputs.to(model.device), forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"]
+    )
+    result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+    return result
+def translate_en2ko(text):
+    batched_input = [text]
+    inputs = tokenizer(batched_input, return_tensors="pt", padding=True)
+    translated_tokens = model.generate(
+        **inputs.to(model.device), forced_bos_token_id=tokenizer.lang_code_to_id["kor_Hang"], max_new_tokens=2048)
+    result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+    return result
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('en_text', en_text)
+        print('ko_text', ko_text)
+        print('score', simple_score(text, ko_text))
+    """
+    >>? 3천만 개가 넘는 파일과 250억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성에 접근하지만 오픈 소스 모델인 Mixtral 8x7B를 사용하고 Apache2.0 라이선스에 따라 라이선스가 부여됩니다.
+en_text There are over 30 million files and 250 billion tokens. Phi1.5's data set configuration is accessible but uses the open source model Mixtral 8x7B and is licensed under the Apache 2.0 license.
+ko_text 300만 개 이상의 파일과 25억 개의 토큰이 있습니다. Phi1.5의 데이터 세트 구성은 액세스 가능하지만 오픈 소스 모델 Mixtral 8x7B를 사용하고 Apache 2.0 라이선스에 따라 라이선스됩니다.
+score 0.3090015909429233
+    """
+if __name__ == "__main__":
+    main()

models/synatra.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from utils.simple_bleu import simple_score
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("maywell/Synatra-7B-v0.3-Translation", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("maywell/Synatra-7B-v0.3-Translation")
+def translate_ko2en(text):
+    messages = [
+        {"role": "system", "content": "주어진 문장을 영어로 번역해라."},
+        {"role": "user", "content": text},
+    ]
+    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
+    model_inputs = encodeds.to(device)
+    model.to(device)
+    generated_ids = model.generate(model_inputs, max_new_tokens=2048, do_sample=True)
+    output = tokenizer.batch_decode(generated_ids)[0]
+    if output.endswith("<|im_end|>"):
+        output = output[:-len("<|im_end|>")]
+    output =  output.split('<|im_end|>')[-1]
+    return output
+def translate_en2ko(text):
+    messages = [
+        {"role": "system", "content": "주어진 문장을 한국어로 번역해라."},
+        {"role": "user", "content": text},
+    ]
+    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
+    model_inputs = encodeds.to(device)
+    model.to(device)
+    generated_ids = model.generate(model_inputs, max_new_tokens=2048, do_sample=True)
+    output = tokenizer.batch_decode(generated_ids)[0]
+    if output.endswith("<|im_end|>"):
+        output = output[:-len("<|im_end|>")]
+    output =  output.split('<|im_end|>')[-1]
+    return output
+def main():
+    while True:
+        text = input('>')
+        en_text = translate_ko2en(text)
+        ko_text = translate_en2ko(en_text)
+        print('------en_text--------')
+        print(en_text)
+        print('------ko_text--------')
+        print(ko_text)
+        print('score', simple_score(text, ko_text))
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+torch>=1.13.1
+transformers>=4.36.2
+datasets>=2.14.3
+accelerate>=0.21.0
+peft>=0.7.0
+trl>=0.7.6
+gradio>=3.38.0,<4.0.0
+scipy
+einops
+sentencepiece
+protobuf
+jieba
+rouge-chinese
+nltk
+uvicorn
+pydantic
+sse-starlette
+matplotlib
+kiwipiepy

results/result-iris_7b-.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result-iris_7b-checkpoint-105000.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result-iris_7b-checkpoint-110000.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result-iris_7b-checkpoint-115000.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result-iris_7b-checkpoint-120000.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result-iris_7b-checkpoint-125000.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result-iris_7b-iris_7b.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/result_self-google.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-Synatra-7B-v0.3-Translation.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-TowerInstruct.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-azure.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-deepl.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-google.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-gugugo.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-iris_7b.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-madlad400.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-nllb200.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_bleu/result_bleu-papago.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_length/Gugugo-koen-7B-V1.1-result.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_length/Synatra-7B-v0.3-Translation-result.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_length/TowerInstruct-7B-v0.1-result.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results_length/azure-result.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff