Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	
		Ilker Kesen
		
	commited on
		
		
					Commit 
							
							·
						
						500fbd7
	
1
								Parent(s):
							
							74daf31
								
initialize the first version
Browse files- .gitignore +162 -0
 - LICENSE +21 -0
 - README.md +1 -14
 - app.py +197 -0
 - assets/kuis-ai-logo.png +0 -0
 - data.py +121 -0
 - data/datasets.json +185 -0
 - environment.yaml +93 -0
 - process_result.py +72 -0
 - requirements.txt +8 -0
 - results/zero-shot/aya-23-8b.json +161 -0
 - results/zero-shot/aya-expanse-8b.json +159 -0
 - results/zero-shot/aya101.json +172 -0
 - results/zero-shot/commencis-7b.json +172 -0
 - results/zero-shot/kanarya-2b.json +171 -0
 - results/zero-shot/llama-3-8b-instruct.json +160 -0
 - results/zero-shot/llama-3-8b.json +159 -0
 - results/zero-shot/llama-3.1-8b-instruct.json +159 -0
 - results/zero-shot/llama-3.1-8b.json +127 -0
 - results/zero-shot/llama-3.2-1b.json +191 -0
 - results/zero-shot/llama-3.2-3b-instruct.json +191 -0
 - results/zero-shot/mistral-7b.json +165 -0
 - results/zero-shot/trendyol-7b.json +172 -0
 - results/zero-shot/turna.json +172 -0
 - utils.py +28 -0
 
    	
        .gitignore
    ADDED
    
    | 
         @@ -0,0 +1,162 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # Byte-compiled / optimized / DLL files
         
     | 
| 2 | 
         
            +
            __pycache__/
         
     | 
| 3 | 
         
            +
            *.py[cod]
         
     | 
| 4 | 
         
            +
            *$py.class
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # C extensions
         
     | 
| 7 | 
         
            +
            *.so
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            # Distribution / packaging
         
     | 
| 10 | 
         
            +
            .Python
         
     | 
| 11 | 
         
            +
            build/
         
     | 
| 12 | 
         
            +
            develop-eggs/
         
     | 
| 13 | 
         
            +
            dist/
         
     | 
| 14 | 
         
            +
            downloads/
         
     | 
| 15 | 
         
            +
            eggs/
         
     | 
| 16 | 
         
            +
            .eggs/
         
     | 
| 17 | 
         
            +
            lib/
         
     | 
| 18 | 
         
            +
            lib64/
         
     | 
| 19 | 
         
            +
            parts/
         
     | 
| 20 | 
         
            +
            sdist/
         
     | 
| 21 | 
         
            +
            var/
         
     | 
| 22 | 
         
            +
            wheels/
         
     | 
| 23 | 
         
            +
            share/python-wheels/
         
     | 
| 24 | 
         
            +
            *.egg-info/
         
     | 
| 25 | 
         
            +
            .installed.cfg
         
     | 
| 26 | 
         
            +
            *.egg
         
     | 
| 27 | 
         
            +
            MANIFEST
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            # PyInstaller
         
     | 
| 30 | 
         
            +
            #  Usually these files are written by a python script from a template
         
     | 
| 31 | 
         
            +
            #  before PyInstaller builds the exe, so as to inject date/other infos into it.
         
     | 
| 32 | 
         
            +
            *.manifest
         
     | 
| 33 | 
         
            +
            *.spec
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            # Installer logs
         
     | 
| 36 | 
         
            +
            pip-log.txt
         
     | 
| 37 | 
         
            +
            pip-delete-this-directory.txt
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            # Unit test / coverage reports
         
     | 
| 40 | 
         
            +
            htmlcov/
         
     | 
| 41 | 
         
            +
            .tox/
         
     | 
| 42 | 
         
            +
            .nox/
         
     | 
| 43 | 
         
            +
            .coverage
         
     | 
| 44 | 
         
            +
            .coverage.*
         
     | 
| 45 | 
         
            +
            .cache
         
     | 
| 46 | 
         
            +
            nosetests.xml
         
     | 
| 47 | 
         
            +
            coverage.xml
         
     | 
| 48 | 
         
            +
            *.cover
         
     | 
| 49 | 
         
            +
            *.py,cover
         
     | 
| 50 | 
         
            +
            .hypothesis/
         
     | 
| 51 | 
         
            +
            .pytest_cache/
         
     | 
| 52 | 
         
            +
            cover/
         
     | 
| 53 | 
         
            +
             
     | 
| 54 | 
         
            +
            # Translations
         
     | 
| 55 | 
         
            +
            *.mo
         
     | 
| 56 | 
         
            +
            *.pot
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
            # Django stuff:
         
     | 
| 59 | 
         
            +
            *.log
         
     | 
| 60 | 
         
            +
            local_settings.py
         
     | 
| 61 | 
         
            +
            db.sqlite3
         
     | 
| 62 | 
         
            +
            db.sqlite3-journal
         
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
            # Flask stuff:
         
     | 
| 65 | 
         
            +
            instance/
         
     | 
| 66 | 
         
            +
            .webassets-cache
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
            # Scrapy stuff:
         
     | 
| 69 | 
         
            +
            .scrapy
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            # Sphinx documentation
         
     | 
| 72 | 
         
            +
            docs/_build/
         
     | 
| 73 | 
         
            +
             
     | 
| 74 | 
         
            +
            # PyBuilder
         
     | 
| 75 | 
         
            +
            .pybuilder/
         
     | 
| 76 | 
         
            +
            target/
         
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
            # Jupyter Notebook
         
     | 
| 79 | 
         
            +
            .ipynb_checkpoints
         
     | 
| 80 | 
         
            +
             
     | 
| 81 | 
         
            +
            # IPython
         
     | 
| 82 | 
         
            +
            profile_default/
         
     | 
| 83 | 
         
            +
            ipython_config.py
         
     | 
| 84 | 
         
            +
             
     | 
| 85 | 
         
            +
            # pyenv
         
     | 
| 86 | 
         
            +
            #   For a library or package, you might want to ignore these files since the code is
         
     | 
| 87 | 
         
            +
            #   intended to run in multiple environments; otherwise, check them in:
         
     | 
| 88 | 
         
            +
            # .python-version
         
     | 
| 89 | 
         
            +
             
     | 
| 90 | 
         
            +
            # pipenv
         
     | 
| 91 | 
         
            +
            #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
         
     | 
| 92 | 
         
            +
            #   However, in case of collaboration, if having platform-specific dependencies or dependencies
         
     | 
| 93 | 
         
            +
            #   having no cross-platform support, pipenv may install dependencies that don't work, or not
         
     | 
| 94 | 
         
            +
            #   install all needed dependencies.
         
     | 
| 95 | 
         
            +
            #Pipfile.lock
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
            # poetry
         
     | 
| 98 | 
         
            +
            #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
         
     | 
| 99 | 
         
            +
            #   This is especially recommended for binary packages to ensure reproducibility, and is more
         
     | 
| 100 | 
         
            +
            #   commonly ignored for libraries.
         
     | 
| 101 | 
         
            +
            #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
         
     | 
| 102 | 
         
            +
            #poetry.lock
         
     | 
| 103 | 
         
            +
             
     | 
| 104 | 
         
            +
            # pdm
         
     | 
| 105 | 
         
            +
            #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
         
     | 
| 106 | 
         
            +
            #pdm.lock
         
     | 
| 107 | 
         
            +
            #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
         
     | 
| 108 | 
         
            +
            #   in version control.
         
     | 
| 109 | 
         
            +
            #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
         
     | 
| 110 | 
         
            +
            .pdm.toml
         
     | 
| 111 | 
         
            +
            .pdm-python
         
     | 
| 112 | 
         
            +
            .pdm-build/
         
     | 
| 113 | 
         
            +
             
     | 
| 114 | 
         
            +
            # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
         
     | 
| 115 | 
         
            +
            __pypackages__/
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
            # Celery stuff
         
     | 
| 118 | 
         
            +
            celerybeat-schedule
         
     | 
| 119 | 
         
            +
            celerybeat.pid
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
            # SageMath parsed files
         
     | 
| 122 | 
         
            +
            *.sage.py
         
     | 
| 123 | 
         
            +
             
     | 
| 124 | 
         
            +
            # Environments
         
     | 
| 125 | 
         
            +
            .env
         
     | 
| 126 | 
         
            +
            .venv
         
     | 
| 127 | 
         
            +
            env/
         
     | 
| 128 | 
         
            +
            venv/
         
     | 
| 129 | 
         
            +
            ENV/
         
     | 
| 130 | 
         
            +
            env.bak/
         
     | 
| 131 | 
         
            +
            venv.bak/
         
     | 
| 132 | 
         
            +
             
     | 
| 133 | 
         
            +
            # Spyder project settings
         
     | 
| 134 | 
         
            +
            .spyderproject
         
     | 
| 135 | 
         
            +
            .spyproject
         
     | 
| 136 | 
         
            +
             
     | 
| 137 | 
         
            +
            # Rope project settings
         
     | 
| 138 | 
         
            +
            .ropeproject
         
     | 
| 139 | 
         
            +
             
     | 
| 140 | 
         
            +
            # mkdocs documentation
         
     | 
| 141 | 
         
            +
            /site
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
            # mypy
         
     | 
| 144 | 
         
            +
            .mypy_cache/
         
     | 
| 145 | 
         
            +
            .dmypy.json
         
     | 
| 146 | 
         
            +
            dmypy.json
         
     | 
| 147 | 
         
            +
             
     | 
| 148 | 
         
            +
            # Pyre type checker
         
     | 
| 149 | 
         
            +
            .pyre/
         
     | 
| 150 | 
         
            +
             
     | 
| 151 | 
         
            +
            # pytype static type analyzer
         
     | 
| 152 | 
         
            +
            .pytype/
         
     | 
| 153 | 
         
            +
             
     | 
| 154 | 
         
            +
            # Cython debug symbols
         
     | 
| 155 | 
         
            +
            cython_debug/
         
     | 
| 156 | 
         
            +
             
     | 
| 157 | 
         
            +
            # PyCharm
         
     | 
| 158 | 
         
            +
            #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
         
     | 
| 159 | 
         
            +
            #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
         
     | 
| 160 | 
         
            +
            #  and can be added to the global gitignore or merged into this file.  For a more nuclear
         
     | 
| 161 | 
         
            +
            #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
         
     | 
| 162 | 
         
            +
            #.idea/
         
     | 
    	
        LICENSE
    ADDED
    
    | 
         @@ -0,0 +1,21 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            MIT License
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            Copyright (c) 2024 KUIS AI Center
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         
     | 
| 6 | 
         
            +
            of this software and associated documentation files (the "Software"), to deal
         
     | 
| 7 | 
         
            +
            in the Software without restriction, including without limitation the rights
         
     | 
| 8 | 
         
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         
     | 
| 9 | 
         
            +
            copies of the Software, and to permit persons to whom the Software is
         
     | 
| 10 | 
         
            +
            furnished to do so, subject to the following conditions:
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            The above copyright notice and this permission notice shall be included in all
         
     | 
| 13 | 
         
            +
            copies or substantial portions of the Software.
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         
     | 
| 16 | 
         
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         
     | 
| 17 | 
         
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         
     | 
| 18 | 
         
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         
     | 
| 19 | 
         
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         
     | 
| 20 | 
         
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         
     | 
| 21 | 
         
            +
            SOFTWARE.
         
     | 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,14 +1 @@ 
     | 
|
| 1 | 
         
            -
             
     | 
| 2 | 
         
            -
            title: Pergel
         
     | 
| 3 | 
         
            -
            emoji: 📈
         
     | 
| 4 | 
         
            -
            colorFrom: blue
         
     | 
| 5 | 
         
            -
            colorTo: pink
         
     | 
| 6 | 
         
            -
            sdk: streamlit
         
     | 
| 7 | 
         
            -
            sdk_version: 1.40.2
         
     | 
| 8 | 
         
            -
            app_file: app.py
         
     | 
| 9 | 
         
            -
            pinned: false
         
     | 
| 10 | 
         
            -
            license: mit
         
     | 
| 11 | 
         
            -
            short_description: 'Pergel: A Unified Benchmark for Evaluating Turkish LLMs'
         
     | 
| 12 | 
         
            -
            ---
         
     | 
| 13 | 
         
            -
             
     | 
| 14 | 
         
            -
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         
     | 
| 
         | 
|
| 1 | 
         
            +
            # Cetvel-leaderboard
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,197 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import streamlit as st
         
     | 
| 2 | 
         
            +
            import pandas as pd
         
     | 
| 3 | 
         
            +
            import json
         
     | 
| 4 | 
         
            +
            from utils import read_results, preprocess_path, get_model_url
         
     | 
| 5 | 
         
            +
            from data import Tasks, Metrics, DATASET_TASK_DICT, TASK_METRIC_DICT, DATASET_GROUPS
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            st.set_page_config(
         
     | 
| 9 | 
         
            +
                page_title='Cetvel 📏',
         
     | 
| 10 | 
         
            +
                layout='centered',
         
     | 
| 11 | 
         
            +
            )
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            @st.cache_data
         
     | 
| 15 | 
         
            +
            def cache_results(path):
         
     | 
| 16 | 
         
            +
                json_results = read_results(path)
         
     | 
| 17 | 
         
            +
                results = list()
         
     | 
| 18 | 
         
            +
                for entry in json_results:
         
     | 
| 19 | 
         
            +
                    row = {
         
     | 
| 20 | 
         
            +
                        'model': entry['model']['model'],
         
     | 
| 21 | 
         
            +
                        'num_parameters': entry['model']['num_parameters'],
         
     | 
| 22 | 
         
            +
                        'url': get_model_url(entry['model']),
         
     | 
| 23 | 
         
            +
                        'architecture': entry['model']['architecture'],
         
     | 
| 24 | 
         
            +
                        'type': entry['model']['type'],
         
     | 
| 25 | 
         
            +
                        'precision': entry['model']['dtype'],
         
     | 
| 26 | 
         
            +
                    }
         
     | 
| 27 | 
         
            +
                    for result in entry['results']:
         
     | 
| 28 | 
         
            +
                        task = result['task']
         
     | 
| 29 | 
         
            +
                        metric = TASK_METRIC_DICT.get(task)
         
     | 
| 30 | 
         
            +
                        score = result.get(metric)
         
     | 
| 31 | 
         
            +
                        score = 100 * score if metric != Metrics.WER and score is not None else score
         
     | 
| 32 | 
         
            +
                        row[result['name']] = score
         
     | 
| 33 | 
         
            +
                    results.append(row)
         
     | 
| 34 | 
         
            +
                df = pd.DataFrame(results)
         
     | 
| 35 | 
         
            +
                for group, metadata in DATASET_GROUPS.items():
         
     | 
| 36 | 
         
            +
                    df[group] = df[metadata['datasets']].mean(axis=1)
         
     | 
| 37 | 
         
            +
                return df
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
            @st.cache_data
         
     | 
| 41 | 
         
            +
            def cache_datasets(path):
         
     | 
| 42 | 
         
            +
                path = preprocess_path(path)
         
     | 
| 43 | 
         
            +
                with open(path, 'r') as f:
         
     | 
| 44 | 
         
            +
                    datasets = json.load(f)
         
     | 
| 45 | 
         
            +
                for key in datasets.keys():
         
     | 
| 46 | 
         
            +
                    datasets[key]['dataset'] = key
         
     | 
| 47 | 
         
            +
                return datasets
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
            def create_column_configs(items):
         
     | 
| 51 | 
         
            +
                column_configs = dict()
         
     | 
| 52 | 
         
            +
                for key, metadata in items.items():
         
     | 
| 53 | 
         
            +
                    column_configs[key] = st.column_config.NumberColumn(
         
     | 
| 54 | 
         
            +
                        metadata.get('name', key),
         
     | 
| 55 | 
         
            +
                        help=metadata['description'],
         
     | 
| 56 | 
         
            +
                        min_value=0,
         
     | 
| 57 | 
         
            +
                        format="%2.2f"
         
     | 
| 58 | 
         
            +
                    )
         
     | 
| 59 | 
         
            +
                return column_configs
         
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
            def insert_average(df, keys):
         
     | 
| 63 | 
         
            +
                df = df.copy(deep=True)
         
     | 
| 64 | 
         
            +
                df['average'] = df.loc[:, [x for x in df.columns if x in keys]].mean(axis=1)
         
     | 
| 65 | 
         
            +
                df.insert(1, 'average', df.pop('average'))
         
     | 
| 66 | 
         
            +
                df.index += 1
         
     | 
| 67 | 
         
            +
                return df.sort_values(by=['average'], ascending=False)
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
            MODEL_SPEC_CONFIGS = {
         
     | 
| 71 | 
         
            +
                'model': st.column_config.TextColumn(
         
     | 
| 72 | 
         
            +
                    'Model',
         
     | 
| 73 | 
         
            +
                    help='Large Language Model (LLM) used for the experiments.',
         
     | 
| 74 | 
         
            +
                    max_chars=120,
         
     | 
| 75 | 
         
            +
                    
         
     | 
| 76 | 
         
            +
                ),
         
     | 
| 77 | 
         
            +
                'url': st.column_config.LinkColumn(
         
     | 
| 78 | 
         
            +
                    'URL',
         
     | 
| 79 | 
         
            +
                    help='Model URL.',
         
     | 
| 80 | 
         
            +
                    display_text='Click',
         
     | 
| 81 | 
         
            +
                ),
         
     | 
| 82 | 
         
            +
                'num_parameters': st.column_config.TextColumn(
         
     | 
| 83 | 
         
            +
                    '#params',
         
     | 
| 84 | 
         
            +
                    help='Approximate number of parameters.',
         
     | 
| 85 | 
         
            +
                ),
         
     | 
| 86 | 
         
            +
                'type': st.column_config.TextColumn(
         
     | 
| 87 | 
         
            +
                    'Type',
         
     | 
| 88 | 
         
            +
                    help='Model type based on training objective.',
         
     | 
| 89 | 
         
            +
                ),
         
     | 
| 90 | 
         
            +
                'average': st.column_config.NumberColumn(
         
     | 
| 91 | 
         
            +
                    'Avg.',
         
     | 
| 92 | 
         
            +
                    help='Average across task or dataset performances.',
         
     | 
| 93 | 
         
            +
                    format="%2.2f",
         
     | 
| 94 | 
         
            +
                )
         
     | 
| 95 | 
         
            +
            }
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
             
     | 
| 98 | 
         
            +
            def filter_visible_model_specs():
         
     | 
| 99 | 
         
            +
                specs = {
         
     | 
| 100 | 
         
            +
                    'URL': ('url', 1),
         
     | 
| 101 | 
         
            +
                    '#params': ('num_parameters', 2),
         
     | 
| 102 | 
         
            +
                    'Architecture': ('architecture', 3),
         
     | 
| 103 | 
         
            +
                    'Type': ('type', 4),
         
     | 
| 104 | 
         
            +
                    'Precision': ('precision', 5),
         
     | 
| 105 | 
         
            +
                }
         
     | 
| 106 | 
         
            +
                visible_specs = st.multiselect(
         
     | 
| 107 | 
         
            +
                    'Select model specs to be shown in the table.',
         
     | 
| 108 | 
         
            +
                    options=sorted(specs.keys(), key=lambda x: specs[x][1]),
         
     | 
| 109 | 
         
            +
                )
         
     | 
| 110 | 
         
            +
                # visible_specs = sorted(visible_specs, key=lambda x: specs[x][1])
         
     | 
| 111 | 
         
            +
                return [specs[x][0] for x in visible_specs]
         
     | 
| 112 | 
         
            +
             
     | 
| 113 | 
         
            +
             
     | 
| 114 | 
         
            +
            def filter_by_model_spec():
         
     | 
| 115 | 
         
            +
                pass
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
             
     | 
| 118 | 
         
            +
            def filter_visible_datasets(datasets):
         
     | 
| 119 | 
         
            +
                col1, col2 = st.columns(2)
         
     | 
| 120 | 
         
            +
                with col1:
         
     | 
| 121 | 
         
            +
                    dataset_grouping = st.selectbox(
         
     | 
| 122 | 
         
            +
                        'Dataset Grouping',
         
     | 
| 123 | 
         
            +
                        [
         
     | 
| 124 | 
         
            +
                            'Group Datasets',
         
     | 
| 125 | 
         
            +
                            'Show All Datasets',
         
     | 
| 126 | 
         
            +
                        ],
         
     | 
| 127 | 
         
            +
                    )
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
            +
                with col2:
         
     | 
| 130 | 
         
            +
                    filter_by_task = st.selectbox(
         
     | 
| 131 | 
         
            +
                        'Filter by Task',
         
     | 
| 132 | 
         
            +
                        [
         
     | 
| 133 | 
         
            +
                            'All',
         
     | 
| 134 | 
         
            +
                            'Understanding Tasks',
         
     | 
| 135 | 
         
            +
                            'Generation Tasks',
         
     | 
| 136 | 
         
            +
                            'Multiple Choice',
         
     | 
| 137 | 
         
            +
                            'Extractive Question Answering',
         
     | 
| 138 | 
         
            +
                            'Natural Language Inference',
         
     | 
| 139 | 
         
            +
                            'Text Classification',
         
     | 
| 140 | 
         
            +
                            'Summarization',
         
     | 
| 141 | 
         
            +
                        ],
         
     | 
| 142 | 
         
            +
                        disabled=dataset_grouping == "Group Datasets",
         
     | 
| 143 | 
         
            +
                    )
         
     | 
| 144 | 
         
            +
             
     | 
| 145 | 
         
            +
                if dataset_grouping == 'Group Datasets':
         
     | 
| 146 | 
         
            +
                    return list(DATASET_GROUPS.keys())
         
     | 
| 147 | 
         
            +
                elif dataset_grouping == 'Show All Datasets':
         
     | 
| 148 | 
         
            +
                    if filter_by_task == 'All':
         
     | 
| 149 | 
         
            +
                        return list(datasets.keys())
         
     | 
| 150 | 
         
            +
                    elif filter_by_task == 'Understanding Tasks':
         
     | 
| 151 | 
         
            +
                        this_datasets = [k for (k, v) in datasets.items() if not v['generative']]
         
     | 
| 152 | 
         
            +
                        return this_datasets
         
     | 
| 153 | 
         
            +
                    elif filter_by_task == 'Generation Tasks':
         
     | 
| 154 | 
         
            +
                        this_datasets = [k for (k, v) in datasets.items() if v['generative']]
         
     | 
| 155 | 
         
            +
                        return this_datasets
         
     | 
| 156 | 
         
            +
                    elif filter_by_task == 'Multiple Choice':
         
     | 
| 157 | 
         
            +
                        return DATASET_GROUPS['MCQA']['datasets']
         
     | 
| 158 | 
         
            +
                    elif filter_by_task == 'Extractive Question Answering':
         
     | 
| 159 | 
         
            +
                        return DATASET_GROUPS['QA']['datasets']
         
     | 
| 160 | 
         
            +
                    elif filter_by_task == 'Natural Language Inference':
         
     | 
| 161 | 
         
            +
                        return DATASET_GROUPS['NLI']['datasets']
         
     | 
| 162 | 
         
            +
                    elif filter_by_task == 'Text Classification':
         
     | 
| 163 | 
         
            +
                        return DATASET_GROUPS['TC']['datasets']
         
     | 
| 164 | 
         
            +
                    elif filter_by_task == 'Summarization':
         
     | 
| 165 | 
         
            +
                        return DATASET_GROUPS['SUM']['datasets']
         
     | 
| 166 | 
         
            +
             
     | 
| 167 | 
         
            +
             
     | 
| 168 | 
         
            +
            def introduction():
         
     | 
| 169 | 
         
            +
                st.title(':blue[Cetvel :straight_ruler:]')
         
     | 
| 170 | 
         
            +
                st.subheader('A Unified Benchmark for Evaluating Turkish LLMs', anchor=False)
         
     | 
| 171 | 
         
            +
                st.markdown('''Cetvel is an extended version of the [lm-eval-harness](https://github.com/EleutherAI/lm-evaluation-harness) tool, specifically includes tasks/datasets for benchmarking Turkish Large Language Models (LLMs). Cetvel includes a variety of tasks curated to assess different aspects of model performance in the Turkish language. Our primary goal is to objectively evaluate the capabilities of large language models in understanding and processing Turkish. For documentation and more details about the benchmark and the experiments, you can check the [GitHub repository](https://github.com/KUIS-AI/Cetvel).''')
         
     | 
| 172 | 
         
            +
             
     | 
| 173 | 
         
            +
             
     | 
| 174 | 
         
            +
            def main():
         
     | 
| 175 | 
         
            +
                introduction()
         
     | 
| 176 | 
         
            +
                results_df = cache_results('./results/zero-shot')
         
     | 
| 177 | 
         
            +
                datasets = cache_datasets('./data/datasets.json')
         
     | 
| 178 | 
         
            +
                dataset_column_configs = create_column_configs(datasets)
         
     | 
| 179 | 
         
            +
                group_column_configs = create_column_configs(DATASET_GROUPS)
         
     | 
| 180 | 
         
            +
                # score_columns = list(dataset_column_configs.keys()) + list(group_column_configs.keys())
         
     | 
| 181 | 
         
            +
                column_configs = MODEL_SPEC_CONFIGS | group_column_configs | dataset_column_configs
         
     | 
| 182 | 
         
            +
             
     | 
| 183 | 
         
            +
                visible_data_columns = sorted(filter_visible_datasets(datasets), key=str.casefold)
         
     | 
| 184 | 
         
            +
                visible_model_columns = filter_visible_model_specs()
         
     | 
| 185 | 
         
            +
                results_df = insert_average(results_df, visible_data_columns)
         
     | 
| 186 | 
         
            +
             
     | 
| 187 | 
         
            +
                st.dataframe(
         
     | 
| 188 | 
         
            +
                    results_df,
         
     | 
| 189 | 
         
            +
                    use_container_width=True,
         
     | 
| 190 | 
         
            +
                    hide_index=True,
         
     | 
| 191 | 
         
            +
                    column_config=column_configs,
         
     | 
| 192 | 
         
            +
                    column_order=['model', 'average',] + visible_model_columns + visible_data_columns,
         
     | 
| 193 | 
         
            +
                )
         
     | 
| 194 | 
         
            +
                st.image('./assets/kuis-ai-logo.png', width=240)
         
     | 
| 195 | 
         
            +
             
     | 
| 196 | 
         
            +
             
     | 
| 197 | 
         
            +
            main()
         
     | 
    	
        assets/kuis-ai-logo.png
    ADDED
    
    
											 
									 | 
									
								
    	
        data.py
    ADDED
    
    | 
         @@ -0,0 +1,121 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from enum import StrEnum, auto
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
             
     | 
| 4 | 
         
            +
            class Tasks(StrEnum):
         
     | 
| 5 | 
         
            +
                EXTRACTIVE_QUESTION_ANSWERING = auto()
         
     | 
| 6 | 
         
            +
                MULTIPLE_CHOICE = auto()
         
     | 
| 7 | 
         
            +
                SUMMARIZATION = auto()
         
     | 
| 8 | 
         
            +
                NATURAL_LANGUAGE_INFERENCE = auto()
         
     | 
| 9 | 
         
            +
                TEXT_CLASSIFICATION = auto()
         
     | 
| 10 | 
         
            +
                MACHINE_TRANSLATION = auto()
         
     | 
| 11 | 
         
            +
                GRAMMATICAL_ERROR_CORRECTION = auto()
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            class Metrics(StrEnum):
         
     | 
| 15 | 
         
            +
                F1 = "f1"
         
     | 
| 16 | 
         
            +
                EXACT_MATCH = "exact_match"
         
     | 
| 17 | 
         
            +
                ROGUE1 = "rouge1"
         
     | 
| 18 | 
         
            +
                ROUGE2 = "rouge2"
         
     | 
| 19 | 
         
            +
                ROUGEL = "rougeL"
         
     | 
| 20 | 
         
            +
                ACCURACY = "acc"
         
     | 
| 21 | 
         
            +
                WER = "wer"
         
     | 
| 22 | 
         
            +
                BLEU = "bleu"
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            DATASET_TASK_DICT = {
         
     | 
| 26 | 
         
            +
                # extractive qa
         
     | 
| 27 | 
         
            +
                'xquad_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
         
     | 
| 28 | 
         
            +
                'tquad': Tasks.EXTRACTIVE_QUESTION_ANSWERING,
         
     | 
| 29 | 
         
            +
                'mkqa_tr': Tasks.EXTRACTIVE_QUESTION_ANSWERING,  # not exactly
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
                # summarization
         
     | 
| 32 | 
         
            +
                'xlsum_tr': Tasks.SUMMARIZATION,
         
     | 
| 33 | 
         
            +
                'mlsum_tr': Tasks.SUMMARIZATION,
         
     | 
| 34 | 
         
            +
                'wiki_lingua_tr': Tasks.SUMMARIZATION,
         
     | 
| 35 | 
         
            +
                'tr-wikihow-summ': Tasks.SUMMARIZATION,
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                # NLI
         
     | 
| 38 | 
         
            +
                #'nli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
         
     | 
| 39 | 
         
            +
                'mnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
         
     | 
| 40 | 
         
            +
                'snli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
         
     | 
| 41 | 
         
            +
                'xnli_tr': Tasks.NATURAL_LANGUAGE_INFERENCE,
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
                # multiple-choice
         
     | 
| 44 | 
         
            +
                'xcopa_tr': Tasks.MULTIPLE_CHOICE,
         
     | 
| 45 | 
         
            +
                'exams_tr': Tasks.MULTIPLE_CHOICE,
         
     | 
| 46 | 
         
            +
                'belebele_tr': Tasks.MULTIPLE_CHOICE,
         
     | 
| 47 | 
         
            +
                'turkish_plu': Tasks.MULTIPLE_CHOICE,
         
     | 
| 48 | 
         
            +
                'turkish_plu_goal_inference': Tasks.MULTIPLE_CHOICE,
         
     | 
| 49 | 
         
            +
                'turkish_plu_next_event_prediction': Tasks.MULTIPLE_CHOICE,
         
     | 
| 50 | 
         
            +
                'turkish_plu_step_inference': Tasks.MULTIPLE_CHOICE,
         
     | 
| 51 | 
         
            +
                'turkish_plu_step_ordering': Tasks.MULTIPLE_CHOICE,
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
                # fact-checking, not sure whether these are multi-choice
         
     | 
| 54 | 
         
            +
                # 'trclaim19': Tasks.MULTIPLE_CHOICE,
         
     | 
| 55 | 
         
            +
                'check_worthiness': Tasks.MULTIPLE_CHOICE,
         
     | 
| 56 | 
         
            +
                'relevance_judgment': Tasks.MULTIPLE_CHOICE,
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                # text classification
         
     | 
| 59 | 
         
            +
                'sts_tr': Tasks.TEXT_CLASSIFICATION,
         
     | 
| 60 | 
         
            +
                'offenseval_tr': Tasks.TEXT_CLASSIFICATION,
         
     | 
| 61 | 
         
            +
                'news_cat': Tasks.TEXT_CLASSIFICATION,
         
     | 
| 62 | 
         
            +
                'ironytr': Tasks.TEXT_CLASSIFICATION,
         
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
                # other generation
         
     | 
| 65 | 
         
            +
                'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
         
     | 
| 66 | 
         
            +
                'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
         
     | 
| 67 | 
         
            +
            }
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
            TASK_METRIC_DICT = {
         
     | 
| 71 | 
         
            +
                Tasks.EXTRACTIVE_QUESTION_ANSWERING: Metrics.EXACT_MATCH,
         
     | 
| 72 | 
         
            +
                Tasks.MULTIPLE_CHOICE: Metrics.ACCURACY,
         
     | 
| 73 | 
         
            +
                Tasks.TEXT_CLASSIFICATION: Metrics.ACCURACY,
         
     | 
| 74 | 
         
            +
                Tasks.NATURAL_LANGUAGE_INFERENCE: Metrics.ACCURACY,
         
     | 
| 75 | 
         
            +
                Tasks.SUMMARIZATION: Metrics.ROUGE2,
         
     | 
| 76 | 
         
            +
                Tasks.MACHINE_TRANSLATION: Metrics.BLEU,
         
     | 
| 77 | 
         
            +
                Tasks.GRAMMATICAL_ERROR_CORRECTION: Metrics.EXACT_MATCH,
         
     | 
| 78 | 
         
            +
            }
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
             
     | 
| 81 | 
         
            +
            GENERATIVE_TASKS = (
         
     | 
| 82 | 
         
            +
                Tasks.SUMMARIZATION,
         
     | 
| 83 | 
         
            +
                Tasks.MACHINE_TRANSLATION,
         
     | 
| 84 | 
         
            +
                Tasks.GRAMMATICAL_ERROR_CORRECTION,
         
     | 
| 85 | 
         
            +
            )
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
            DATASET_GROUPS = {
         
     | 
| 88 | 
         
            +
                'QA': {
         
     | 
| 89 | 
         
            +
                    'datasets': ['xquad_tr', 'tquad', 'mkqa_tr'],
         
     | 
| 90 | 
         
            +
                    'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
         
     | 
| 91 | 
         
            +
                },
         
     | 
| 92 | 
         
            +
                'MCQA': {
         
     | 
| 93 | 
         
            +
                    'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
         
     | 
| 94 | 
         
            +
                    'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele and Turkish PLU.'
         
     | 
| 95 | 
         
            +
                },
         
     | 
| 96 | 
         
            +
                'TC': {
         
     | 
| 97 | 
         
            +
                    'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
         
     | 
| 98 | 
         
            +
                    'description': 'Text Classification datasets.',
         
     | 
| 99 | 
         
            +
                },
         
     | 
| 100 | 
         
            +
                'NLI': {
         
     | 
| 101 | 
         
            +
                    'datasets': ['mnli_tr', 'snli_tr', 'xnli_tr'],
         
     | 
| 102 | 
         
            +
                    'description': 'Natural Language Inference (NLI) datasets in Turkish: XNLI, SNLI and MNLI.',
         
     | 
| 103 | 
         
            +
                },
         
     | 
| 104 | 
         
            +
                'SUM': {
         
     | 
| 105 | 
         
            +
                    'datasets': [name for name, task in DATASET_TASK_DICT.items() if task == Tasks.SUMMARIZATION],
         
     | 
| 106 | 
         
            +
                    'description': 'Summarization datasets in Turkish (XLSum, MLSum, WikiLingua and TrWikiHowSumm).',
         
     | 
| 107 | 
         
            +
                },
         
     | 
| 108 | 
         
            +
                'GEC': {
         
     | 
| 109 | 
         
            +
                    'datasets': ['gecturk_generation',],
         
     | 
| 110 | 
         
            +
                    'description': 'Grammatical Error Correction task.',
         
     | 
| 111 | 
         
            +
                },
         
     | 
| 112 | 
         
            +
                'MT': {
         
     | 
| 113 | 
         
            +
                    'datasets': ['wmt-tr-en-prompt'],
         
     | 
| 114 | 
         
            +
                    'description': 'Machine Translation on WMT-16 dataset (English-to-Turkish).',
         
     | 
| 115 | 
         
            +
                },
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
                #  'TrClaim19': {
         
     | 
| 118 | 
         
            +
                #     'datasets': ['check_worthiness', 'relevance_judgment'],
         
     | 
| 119 | 
         
            +
                #     'description': 'TrClaim19 dataset for fact-checking.',
         
     | 
| 120 | 
         
            +
                # },
         
     | 
| 121 | 
         
            +
            }
         
     | 
    	
        data/datasets.json
    ADDED
    
    | 
         @@ -0,0 +1,185 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "tquad": {
         
     | 
| 3 | 
         
            +
                    "name": "TQUAD",
         
     | 
| 4 | 
         
            +
                    "task": "extractive_question_answering",
         
     | 
| 5 | 
         
            +
                    "description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.",
         
     | 
| 6 | 
         
            +
                    "url": "https://github.com/TQuad/turkish-nlp-qa-dataset",
         
     | 
| 7 | 
         
            +
                    "hf_name": "mcemilg/tquad",
         
     | 
| 8 | 
         
            +
                    "generative": false
         
     | 
| 9 | 
         
            +
                },
         
     | 
| 10 | 
         
            +
                "xquad_tr": {
         
     | 
| 11 | 
         
            +
                    "name": "XQUAD",
         
     | 
| 12 | 
         
            +
                    "task": "extractive_question_answering",
         
     | 
| 13 | 
         
            +
                    "description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..",
         
     | 
| 14 | 
         
            +
                    "url": "https://github.com/google-deepmind/xquad",
         
     | 
| 15 | 
         
            +
                    "hf_name": "google/xquad",
         
     | 
| 16 | 
         
            +
                    "generative": false
         
     | 
| 17 | 
         
            +
                },
         
     | 
| 18 | 
         
            +
                "mkqa_tr": {
         
     | 
| 19 | 
         
            +
                    "name": "MKQA",
         
     | 
| 20 | 
         
            +
                    "task": "extractive_question_answering",
         
     | 
| 21 | 
         
            +
                    "description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.",
         
     | 
| 22 | 
         
            +
                    "url": "https://github.com/apple/ml-mkqa",
         
     | 
| 23 | 
         
            +
                    "hf_name": "mcemilg/mkqa_tr",
         
     | 
| 24 | 
         
            +
                    "generative": false
         
     | 
| 25 | 
         
            +
                },
         
     | 
| 26 | 
         
            +
                "xlsum_tr": {
         
     | 
| 27 | 
         
            +
                    "name": "XLSum",
         
     | 
| 28 | 
         
            +
                    "task": "summarization",
         
     | 
| 29 | 
         
            +
                    "description": "Abstractive summarization dataset for 44 languages.",
         
     | 
| 30 | 
         
            +
                    "url": "https://github.com/csebuetnlp/xl-sum",
         
     | 
| 31 | 
         
            +
                    "hf_name": "csebuetnlp/xlsum",
         
     | 
| 32 | 
         
            +
                    "generative": true
         
     | 
| 33 | 
         
            +
                },
         
     | 
| 34 | 
         
            +
                "mlsum_tr": {
         
     | 
| 35 | 
         
            +
                    "name": "MLSum",
         
     | 
| 36 | 
         
            +
                    "task": "summarization",
         
     | 
| 37 | 
         
            +
                    "description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.",
         
     | 
| 38 | 
         
            +
                    "url": "https://huggingface.co/datasets/reciTAL/mlsum",
         
     | 
| 39 | 
         
            +
                    "hf_name": "reciTAL/mlsum",
         
     | 
| 40 | 
         
            +
                    "generative": true
         
     | 
| 41 | 
         
            +
                },
         
     | 
| 42 | 
         
            +
                "wiki_lingua_tr": {
         
     | 
| 43 | 
         
            +
                    "name": "WikiLingua",
         
     | 
| 44 | 
         
            +
                    "task": "summarization",
         
     | 
| 45 | 
         
            +
                    "description": "A multilingual abstractive summarization dataset covering 17 languages.",
         
     | 
| 46 | 
         
            +
                    "url": "https://github.com/esdurmus/Wikilingua",
         
     | 
| 47 | 
         
            +
                    "hf_name": "GEM/wiki_lingua",
         
     | 
| 48 | 
         
            +
                    "generative": true
         
     | 
| 49 | 
         
            +
                },
         
     | 
| 50 | 
         
            +
                "tr-wikihow-summ": {
         
     | 
| 51 | 
         
            +
                    "name": "WikiHowSumm",
         
     | 
| 52 | 
         
            +
                    "task": "summarization",
         
     | 
| 53 | 
         
            +
                    "description": "A summarization dataset obtained from WikiHow website.",
         
     | 
| 54 | 
         
            +
                    "url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ",
         
     | 
| 55 | 
         
            +
                    "hf_name": "ardauzunoglu/tr-wikihow-summ",
         
     | 
| 56 | 
         
            +
                    "generative": true
         
     | 
| 57 | 
         
            +
                },
         
     | 
| 58 | 
         
            +
                "mnli_tr": {
         
     | 
| 59 | 
         
            +
                    "name": "MNLI",
         
     | 
| 60 | 
         
            +
                    "task": "natural_language_inference",
         
     | 
| 61 | 
         
            +
                    "description": "Multi-Genre NLI (MNLI) dataset.",
         
     | 
| 62 | 
         
            +
                    "url": "https://cims.nyu.edu/~sbowman/multinli/",
         
     | 
| 63 | 
         
            +
                    "hf_name": "boun-tabi/nli_tr",
         
     | 
| 64 | 
         
            +
                    "generative": false
         
     | 
| 65 | 
         
            +
                },
         
     | 
| 66 | 
         
            +
                "snli_tr": {
         
     | 
| 67 | 
         
            +
                    "name": "SNLI",
         
     | 
| 68 | 
         
            +
                    "task": "natural_language_inference",
         
     | 
| 69 | 
         
            +
                    "description": "The Stanford NLI (SNLI) dataset.",
         
     | 
| 70 | 
         
            +
                    "url": "https://nlp.stanford.edu/projects/snli/",
         
     | 
| 71 | 
         
            +
                    "hf_name": "boun-tabi/nli_tr",
         
     | 
| 72 | 
         
            +
                    "generative": false
         
     | 
| 73 | 
         
            +
                },
         
     | 
| 74 | 
         
            +
                "xnli_tr": {
         
     | 
| 75 | 
         
            +
                    "name": "XNLI",
         
     | 
| 76 | 
         
            +
                    "task": "natural_language_inference",
         
     | 
| 77 | 
         
            +
                    "description": "The Cross-Lingual NLI (XNLI) dataset.",
         
     | 
| 78 | 
         
            +
                    "url": "https://github.com/facebookresearch/XNLI",
         
     | 
| 79 | 
         
            +
                    "hf_name": "boun-tabi/nli_tr",
         
     | 
| 80 | 
         
            +
                    "generative": false
         
     | 
| 81 | 
         
            +
                },
         
     | 
| 82 | 
         
            +
                "xcopa_tr": {
         
     | 
| 83 | 
         
            +
                    "name": "XCOPA",
         
     | 
| 84 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 85 | 
         
            +
                    "description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.",
         
     | 
| 86 | 
         
            +
                    "url": "https://github.com/cambridgeltl/xcopa",
         
     | 
| 87 | 
         
            +
                    "hf_name": "cambridgeltl/xcopa",
         
     | 
| 88 | 
         
            +
                    "generative": false
         
     | 
| 89 | 
         
            +
                },
         
     | 
| 90 | 
         
            +
                "exams_tr": {
         
     | 
| 91 | 
         
            +
                    "name": "Exams",
         
     | 
| 92 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 93 | 
         
            +
                    "description": "A question answering dataset covering high school exams.",
         
     | 
| 94 | 
         
            +
                    "url": "https://huggingface.co/datasets/exams",
         
     | 
| 95 | 
         
            +
                    "hf_name": "exams",
         
     | 
| 96 | 
         
            +
                    "generative": false
         
     | 
| 97 | 
         
            +
                },
         
     | 
| 98 | 
         
            +
                "belebele_tr": {
         
     | 
| 99 | 
         
            +
                    "name": "Belebele",
         
     | 
| 100 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 101 | 
         
            +
                    "description": "A multiple choice question answering dataset to evaluate machine comprehension.",
         
     | 
| 102 | 
         
            +
                    "url": "https://github.com/facebookresearch/belebele",
         
     | 
| 103 | 
         
            +
                    "generative": false
         
     | 
| 104 | 
         
            +
                },
         
     | 
| 105 | 
         
            +
                "turkish_plu_goal_inference": {
         
     | 
| 106 | 
         
            +
                    "name": "PLU-GI",
         
     | 
| 107 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 108 | 
         
            +
                    "description": "TurkishPLU - Goal Inference task.",
         
     | 
| 109 | 
         
            +
                    "url": "https://github.com/GGLAB-KU/turkish-plu",
         
     | 
| 110 | 
         
            +
                    "hf_name": "mcemilg/turkish-plu-goal-inference",
         
     | 
| 111 | 
         
            +
                    "generative": false
         
     | 
| 112 | 
         
            +
                },
         
     | 
| 113 | 
         
            +
                "turkish_plu_next_event_prediction": {
         
     | 
| 114 | 
         
            +
                    "name": "PLU-NE",
         
     | 
| 115 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 116 | 
         
            +
                    "description": "TurkishPLU - Next Event Prediction task.",
         
     | 
| 117 | 
         
            +
                    "url": "https://github.com/GGLAB-KU/turkish-plu",
         
     | 
| 118 | 
         
            +
                    "hf_name": "mcemilg/turkish-plu-next-event-prediction",
         
     | 
| 119 | 
         
            +
                    "generative": false
         
     | 
| 120 | 
         
            +
                },
         
     | 
| 121 | 
         
            +
                "turkish_plu_step_inference": {
         
     | 
| 122 | 
         
            +
                    "name": "PLU-SI",
         
     | 
| 123 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 124 | 
         
            +
                    "description": "TurkishPLU - Step Inference task.",
         
     | 
| 125 | 
         
            +
                    "url": "https://github.com/GGLAB-KU/turkish-plu",
         
     | 
| 126 | 
         
            +
                    "hf_name": "mcemilg/turkish-plu-step-inference",
         
     | 
| 127 | 
         
            +
                    "generative": false
         
     | 
| 128 | 
         
            +
                },
         
     | 
| 129 | 
         
            +
                "turkish_plu_step_ordering": {
         
     | 
| 130 | 
         
            +
                    "name": "PLU-SO",
         
     | 
| 131 | 
         
            +
                    "task": "multiple_choice",
         
     | 
| 132 | 
         
            +
                    "description": "TurkishPLU - Step Ordering task.",
         
     | 
| 133 | 
         
            +
                    "url": "https://github.com/GGLAB-KU/turkish-plu",
         
     | 
| 134 | 
         
            +
                    "hf_name": "mcemilg/turkish-plu-step-ordering",
         
     | 
| 135 | 
         
            +
                    "generative": false
         
     | 
| 136 | 
         
            +
                },
         
     | 
| 137 | 
         
            +
                "sts_tr": {
         
     | 
| 138 | 
         
            +
                    "name": "STS",
         
     | 
| 139 | 
         
            +
                    "task": "text_classification",
         
     | 
| 140 | 
         
            +
                    "description": "The machine-translated Semantic Textual Similarity dataset in Turkish.",
         
     | 
| 141 | 
         
            +
                    "url": "https://github.com/emrecncelik/sts-benchmark-tr",
         
     | 
| 142 | 
         
            +
                    "hf_name": "emrecan/stsb-mt-turkish",
         
     | 
| 143 | 
         
            +
                    "generative": false
         
     | 
| 144 | 
         
            +
                },
         
     | 
| 145 | 
         
            +
                "offenseval_tr": {
         
     | 
| 146 | 
         
            +
                    "name": "OffensEval",
         
     | 
| 147 | 
         
            +
                    "task": "text_classification",
         
     | 
| 148 | 
         
            +
                    "description": "A dataset for offensive speech recognition in Turkish.",
         
     | 
| 149 | 
         
            +
                    "url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020",
         
     | 
| 150 | 
         
            +
                    "hf_name": "coltekin/offenseval2020_tr",
         
     | 
| 151 | 
         
            +
                    "generative": false
         
     | 
| 152 | 
         
            +
                },
         
     | 
| 153 | 
         
            +
                "news_cat": {
         
     | 
| 154 | 
         
            +
                    "name": "NewsCat",
         
     | 
| 155 | 
         
            +
                    "task": "text_classification",
         
     | 
| 156 | 
         
            +
                    "description": "News classification dataset collected from Turkish newspapers websites.",
         
     | 
| 157 | 
         
            +
                    "url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html",
         
     | 
| 158 | 
         
            +
                    "hf_name": "mcemilg/news-cat",
         
     | 
| 159 | 
         
            +
                    "generative": false
         
     | 
| 160 | 
         
            +
                },
         
     | 
| 161 | 
         
            +
                "ironytr": {
         
     | 
| 162 | 
         
            +
                    "name": "IronyTR",
         
     | 
| 163 | 
         
            +
                    "task": "text_classification",
         
     | 
| 164 | 
         
            +
                    "description": "Irony detection dataset in Turkish.",
         
     | 
| 165 | 
         
            +
                    "url": "https://github.com/teghub/IronyTR",
         
     | 
| 166 | 
         
            +
                    "hf_name": "mcemilg/IronyTR",
         
     | 
| 167 | 
         
            +
                    "generative": false
         
     | 
| 168 | 
         
            +
                },
         
     | 
| 169 | 
         
            +
                "wmt-tr-en-prompt": {
         
     | 
| 170 | 
         
            +
                    "name": "WMT",
         
     | 
| 171 | 
         
            +
                    "task": "machine_translation",
         
     | 
| 172 | 
         
            +
                    "description": "English-to-Turkish machine translation dataset.",
         
     | 
| 173 | 
         
            +
                    "url": "http://www.aclweb.org/anthology/W/W16/W16-2301",
         
     | 
| 174 | 
         
            +
                    "hf_name": "wmt/wmt16",
         
     | 
| 175 | 
         
            +
                    "generative": true
         
     | 
| 176 | 
         
            +
                },
         
     | 
| 177 | 
         
            +
                "gecturk_generation": {
         
     | 
| 178 | 
         
            +
                    "name": "GECTurk",
         
     | 
| 179 | 
         
            +
                    "task": "grammatical_error_correction",
         
     | 
| 180 | 
         
            +
                    "description": "A dataset for grammatical error correction.",
         
     | 
| 181 | 
         
            +
                    "url": "https://github.com/GGLAB-KU/gecturk",
         
     | 
| 182 | 
         
            +
                    "hf_name": "mcemilg/GECTurk-generation",
         
     | 
| 183 | 
         
            +
                    "generative": true
         
     | 
| 184 | 
         
            +
                }
         
     | 
| 185 | 
         
            +
            }
         
     | 
    	
        environment.yaml
    ADDED
    
    | 
         @@ -0,0 +1,93 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Cetvel-leaderboard
         
     | 
| 2 | 
         
            +
            channels:
         
     | 
| 3 | 
         
            +
              - defaults
         
     | 
| 4 | 
         
            +
            dependencies:
         
     | 
| 5 | 
         
            +
              - _libgcc_mutex=0.1=main
         
     | 
| 6 | 
         
            +
              - _openmp_mutex=5.1=1_gnu
         
     | 
| 7 | 
         
            +
              - bzip2=1.0.8=h5eee18b_6
         
     | 
| 8 | 
         
            +
              - ca-certificates=2024.7.2=h06a4308_0
         
     | 
| 9 | 
         
            +
              - expat=2.6.2=h6a678d5_0
         
     | 
| 10 | 
         
            +
              - ld_impl_linux-64=2.38=h1181459_1
         
     | 
| 11 | 
         
            +
              - libffi=3.4.4=h6a678d5_1
         
     | 
| 12 | 
         
            +
              - libgcc-ng=11.2.0=h1234567_1
         
     | 
| 13 | 
         
            +
              - libgomp=11.2.0=h1234567_1
         
     | 
| 14 | 
         
            +
              - libstdcxx-ng=11.2.0=h1234567_1
         
     | 
| 15 | 
         
            +
              - libuuid=1.41.5=h5eee18b_0
         
     | 
| 16 | 
         
            +
              - ncurses=6.4=h6a678d5_0
         
     | 
| 17 | 
         
            +
              - openssl=3.0.14=h5eee18b_0
         
     | 
| 18 | 
         
            +
              - python=3.12.4=h5148396_1
         
     | 
| 19 | 
         
            +
              - readline=8.2=h5eee18b_0
         
     | 
| 20 | 
         
            +
              - sqlite=3.45.3=h5eee18b_0
         
     | 
| 21 | 
         
            +
              - tk=8.6.14=h39e8969_0
         
     | 
| 22 | 
         
            +
              - wheel=0.43.0=py312h06a4308_0
         
     | 
| 23 | 
         
            +
              - xz=5.4.6=h5eee18b_1
         
     | 
| 24 | 
         
            +
              - zlib=1.2.13=h5eee18b_1
         
     | 
| 25 | 
         
            +
              - pip:
         
     | 
| 26 | 
         
            +
                  - altair==5.3.0
         
     | 
| 27 | 
         
            +
                  - asttokens==2.4.1
         
     | 
| 28 | 
         
            +
                  - attrs==23.2.0
         
     | 
| 29 | 
         
            +
                  - blinker==1.8.2
         
     | 
| 30 | 
         
            +
                  - cachetools==5.3.3
         
     | 
| 31 | 
         
            +
                  - certifi==2024.7.4
         
     | 
| 32 | 
         
            +
                  - charset-normalizer==3.3.2
         
     | 
| 33 | 
         
            +
                  - click==8.1.7
         
     | 
| 34 | 
         
            +
                  - contourpy==1.2.1
         
     | 
| 35 | 
         
            +
                  - cycler==0.12.1
         
     | 
| 36 | 
         
            +
                  - decorator==5.1.1
         
     | 
| 37 | 
         
            +
                  - executing==2.0.1
         
     | 
| 38 | 
         
            +
                  - fonttools==4.53.1
         
     | 
| 39 | 
         
            +
                  - gitdb==4.0.11
         
     | 
| 40 | 
         
            +
                  - gitpython==3.1.43
         
     | 
| 41 | 
         
            +
                  - idna==3.7
         
     | 
| 42 | 
         
            +
                  - ipdb==0.13.13
         
     | 
| 43 | 
         
            +
                  - ipython==8.26.0
         
     | 
| 44 | 
         
            +
                  - jedi==0.19.1
         
     | 
| 45 | 
         
            +
                  - jinja2==3.1.4
         
     | 
| 46 | 
         
            +
                  - jsonschema==4.23.0
         
     | 
| 47 | 
         
            +
                  - jsonschema-specifications==2023.12.1
         
     | 
| 48 | 
         
            +
                  - kiwisolver==1.4.5
         
     | 
| 49 | 
         
            +
                  - markdown-it-py==3.0.0
         
     | 
| 50 | 
         
            +
                  - markupsafe==2.1.5
         
     | 
| 51 | 
         
            +
                  - matplotlib==3.9.1
         
     | 
| 52 | 
         
            +
                  - matplotlib-inline==0.1.7
         
     | 
| 53 | 
         
            +
                  - mdurl==0.1.2
         
     | 
| 54 | 
         
            +
                  - numpy==2.0.0
         
     | 
| 55 | 
         
            +
                  - packaging==24.1
         
     | 
| 56 | 
         
            +
                  - pandas==2.2.2
         
     | 
| 57 | 
         
            +
                  - parso==0.8.4
         
     | 
| 58 | 
         
            +
                  - pexpect==4.9.0
         
     | 
| 59 | 
         
            +
                  - pillow==10.4.0
         
     | 
| 60 | 
         
            +
                  - pip==24.1.2
         
     | 
| 61 | 
         
            +
                  - prompt-toolkit==3.0.47
         
     | 
| 62 | 
         
            +
                  - protobuf==5.27.2
         
     | 
| 63 | 
         
            +
                  - ptyprocess==0.7.0
         
     | 
| 64 | 
         
            +
                  - pure-eval==0.2.2
         
     | 
| 65 | 
         
            +
                  - pyarrow==16.1.0
         
     | 
| 66 | 
         
            +
                  - pydeck==0.9.1
         
     | 
| 67 | 
         
            +
                  - pygments==2.18.0
         
     | 
| 68 | 
         
            +
                  - pyparsing==3.1.2
         
     | 
| 69 | 
         
            +
                  - python-dateutil==2.9.0.post0
         
     | 
| 70 | 
         
            +
                  - pytz==2024.1
         
     | 
| 71 | 
         
            +
                  - redis==5.0.7
         
     | 
| 72 | 
         
            +
                  - referencing==0.35.1
         
     | 
| 73 | 
         
            +
                  - requests==2.32.3
         
     | 
| 74 | 
         
            +
                  - rich==13.7.1
         
     | 
| 75 | 
         
            +
                  - rpds-py==0.19.0
         
     | 
| 76 | 
         
            +
                  - semantic-version==2.10.0
         
     | 
| 77 | 
         
            +
                  - setuptools==70.3.0
         
     | 
| 78 | 
         
            +
                  - setuptools-rust==1.9.0
         
     | 
| 79 | 
         
            +
                  - six==1.16.0
         
     | 
| 80 | 
         
            +
                  - smmap==5.0.1
         
     | 
| 81 | 
         
            +
                  - stack-data==0.6.3
         
     | 
| 82 | 
         
            +
                  - streamlit==1.36.0
         
     | 
| 83 | 
         
            +
                  - tenacity==8.5.0
         
     | 
| 84 | 
         
            +
                  - toml==0.10.2
         
     | 
| 85 | 
         
            +
                  - toolz==0.12.1
         
     | 
| 86 | 
         
            +
                  - tornado==6.4.1
         
     | 
| 87 | 
         
            +
                  - traitlets==5.14.3
         
     | 
| 88 | 
         
            +
                  - typing-extensions==4.12.2
         
     | 
| 89 | 
         
            +
                  - tzdata==2024.1
         
     | 
| 90 | 
         
            +
                  - urllib3==2.2.2
         
     | 
| 91 | 
         
            +
                  - watchdog==4.0.1
         
     | 
| 92 | 
         
            +
                  - wcwidth==0.2.13
         
     | 
| 93 | 
         
            +
            prefix: /home/ilker/miniconda3/envs/streamlit-tutor
         
     | 
    	
        process_result.py
    ADDED
    
    | 
         @@ -0,0 +1,72 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os.path as osp
         
     | 
| 2 | 
         
            +
            import argparse
         
     | 
| 3 | 
         
            +
            import json
         
     | 
| 4 | 
         
            +
            from data import Tasks, DATASET_TASK_DICT
         
     | 
| 5 | 
         
            +
            from utils import preprocess_path
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            def process_result(entry, name, task):
         
     | 
| 9 | 
         
            +
                processed = {
         
     | 
| 10 | 
         
            +
                    'name': name,
         
     | 
| 11 | 
         
            +
                    'task': str(task),
         
     | 
| 12 | 
         
            +
                }
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
                if task == Tasks.EXTRACTIVE_QUESTION_ANSWERING:
         
     | 
| 15 | 
         
            +
                    key = 'em,none' if name == 'mkqa_tr' else 'exact,none'
         
     | 
| 16 | 
         
            +
                    scale = 0.01 if name != 'mkqa_tr' else 1
         
     | 
| 17 | 
         
            +
                    processed['exact_match'] = scale * entry[key]
         
     | 
| 18 | 
         
            +
                    processed['f1'] = scale * entry['f1,none']
         
     | 
| 19 | 
         
            +
                elif task == Tasks.SUMMARIZATION:
         
     | 
| 20 | 
         
            +
                    processed['rouge1'] = entry['rouge1,none']
         
     | 
| 21 | 
         
            +
                    processed['rouge2'] = entry['rouge2,none']
         
     | 
| 22 | 
         
            +
                    processed['rougeL'] = entry['rougeL,none']
         
     | 
| 23 | 
         
            +
                elif task in (
         
     | 
| 24 | 
         
            +
                    Tasks.MULTIPLE_CHOICE,
         
     | 
| 25 | 
         
            +
                    Tasks.NATURAL_LANGUAGE_INFERENCE,
         
     | 
| 26 | 
         
            +
                    Tasks.TEXT_CLASSIFICATION,
         
     | 
| 27 | 
         
            +
                ):
         
     | 
| 28 | 
         
            +
                    processed['acc'] = entry['acc,none']
         
     | 
| 29 | 
         
            +
                    processed['acc_norm'] = entry.get('acc_norm,none', processed['acc'])
         
     | 
| 30 | 
         
            +
                elif task == Tasks.MACHINE_TRANSLATION:
         
     | 
| 31 | 
         
            +
                    processed['wer'] = entry['wer,none']
         
     | 
| 32 | 
         
            +
                    processed['bleu'] = entry['bleu,none']
         
     | 
| 33 | 
         
            +
                elif task == Tasks.GRAMMATICAL_ERROR_CORRECTION:
         
     | 
| 34 | 
         
            +
                    processed['exact_match'] = entry['exact_match,none']
         
     | 
| 35 | 
         
            +
                
         
     | 
| 36 | 
         
            +
                return processed
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            def main():
         
     | 
| 40 | 
         
            +
                parser = argparse.ArgumentParser(description='Results file formatter.')
         
     | 
| 41 | 
         
            +
                parser.add_argument('-i', '--input-file', type=str, help='Input JSON file for the results.')
         
     | 
| 42 | 
         
            +
                parser.add_argument('-o', '--output-file', type=str, help='Output JSON file for the formatted results.')
         
     | 
| 43 | 
         
            +
                args = parser.parse_args()
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
                with open(preprocess_path(args.input_file)) as f:
         
     | 
| 46 | 
         
            +
                    raw_data = json.load(f)
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
                # first, get model args
         
     | 
| 49 | 
         
            +
                model_args = raw_data['config']['model_args'].split(',')
         
     | 
| 50 | 
         
            +
                model_args = dict([tuple(pair.split('=')) for pair in model_args])
         
     | 
| 51 | 
         
            +
                processed = dict()
         
     | 
| 52 | 
         
            +
                model_args['model'] = model_args.pop('pretrained')
         
     | 
| 53 | 
         
            +
                processed['model'] = model_args
         
     | 
| 54 | 
         
            +
                processed['model']['api'] = raw_data['config']['model']
         
     | 
| 55 | 
         
            +
             
     | 
| 56 | 
         
            +
                # then, process results
         
     | 
| 57 | 
         
            +
                results = raw_data['results']
         
     | 
| 58 | 
         
            +
                processed['results'] = list()
         
     | 
| 59 | 
         
            +
                for dataset, entry in results.items():
         
     | 
| 60 | 
         
            +
                    if dataset not in DATASET_TASK_DICT.keys():
         
     | 
| 61 | 
         
            +
                        continue
         
     | 
| 62 | 
         
            +
                    task = DATASET_TASK_DICT[dataset]
         
     | 
| 63 | 
         
            +
                    processed['results'].append(process_result(entry, dataset, task))
         
     | 
| 64 | 
         
            +
                
         
     | 
| 65 | 
         
            +
                with open(preprocess_path(args.output_file), 'w') as f:
         
     | 
| 66 | 
         
            +
                    json.dump(processed, f, indent=4)
         
     | 
| 67 | 
         
            +
                
         
     | 
| 68 | 
         
            +
                print('done')
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            if __name__ == '__main__':
         
     | 
| 72 | 
         
            +
                main()
         
     | 
    	
        requirements.txt
    ADDED
    
    | 
         @@ -0,0 +1,8 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            altair==5.3.0
         
     | 
| 2 | 
         
            +
            click==8.1.7
         
     | 
| 3 | 
         
            +
            matplotlib==3.9.1
         
     | 
| 4 | 
         
            +
            numpy==2.0.0
         
     | 
| 5 | 
         
            +
            pandas==2.2.2
         
     | 
| 6 | 
         
            +
            pillow==10.4.0
         
     | 
| 7 | 
         
            +
            streamlit==1.36.0
         
     | 
| 8 | 
         
            +
            tornado==6.4.1
         
     | 
    	
        results/zero-shot/aya-23-8b.json
    ADDED
    
    | 
         @@ -0,0 +1,161 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "load_in_8bit": "True",
         
     | 
| 4 | 
         
            +
                    "trust_remote_code": "True",
         
     | 
| 5 | 
         
            +
                    "model": "CohereForAI/aya-23-8B",
         
     | 
| 6 | 
         
            +
                    "api": "hf",
         
     | 
| 7 | 
         
            +
                    "architecture": "CohereForCausalLM",
         
     | 
| 8 | 
         
            +
                    "dtype": "float16",
         
     | 
| 9 | 
         
            +
                    "max_length": 8192,
         
     | 
| 10 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 11 | 
         
            +
                    "num_parameters": "8b"
         
     | 
| 12 | 
         
            +
                },
         
     | 
| 13 | 
         
            +
                "results": [
         
     | 
| 14 | 
         
            +
                    {
         
     | 
| 15 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 16 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 17 | 
         
            +
                        "acc": 0.6067,
         
     | 
| 18 | 
         
            +
                        "acc_norm": 0.6067
         
     | 
| 19 | 
         
            +
                    },
         
     | 
| 20 | 
         
            +
                    {
         
     | 
| 21 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 22 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 23 | 
         
            +
                        "acc": 0.2697,
         
     | 
| 24 | 
         
            +
                        "acc_norm": 0.2901
         
     | 
| 25 | 
         
            +
                    },
         
     | 
| 26 | 
         
            +
                    {
         
     | 
| 27 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 28 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 29 | 
         
            +
                        "acc": 0.38345521023765994,
         
     | 
| 30 | 
         
            +
                        "acc_norm": 0.49177330895795246
         
     | 
| 31 | 
         
            +
                    },
         
     | 
| 32 | 
         
            +
                    {
         
     | 
| 33 | 
         
            +
                        "name": "ironytr",
         
     | 
| 34 | 
         
            +
                        "task": "text_classification",
         
     | 
| 35 | 
         
            +
                        "acc": 0.5166666666666667,
         
     | 
| 36 | 
         
            +
                        "acc_norm": 0.5016666666666667
         
     | 
| 37 | 
         
            +
                    },
         
     | 
| 38 | 
         
            +
                    {
         
     | 
| 39 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 40 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 41 | 
         
            +
                        "exact_match": 0.10017756732761172,
         
     | 
| 42 | 
         
            +
                        "f1": 0.16569513329103133
         
     | 
| 43 | 
         
            +
                    },
         
     | 
| 44 | 
         
            +
                    {
         
     | 
| 45 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 46 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 47 | 
         
            +
                        "acc": 0.3436,
         
     | 
| 48 | 
         
            +
                        "acc_norm": 0.3477
         
     | 
| 49 | 
         
            +
                    },
         
     | 
| 50 | 
         
            +
                    {
         
     | 
| 51 | 
         
            +
                        "name": "news_cat",
         
     | 
| 52 | 
         
            +
                        "task": "text_classification",
         
     | 
| 53 | 
         
            +
                        "acc": 0.724,
         
     | 
| 54 | 
         
            +
                        "acc_norm": 0.632
         
     | 
| 55 | 
         
            +
                    },
         
     | 
| 56 | 
         
            +
                    {
         
     | 
| 57 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 58 | 
         
            +
                        "task": "text_classification",
         
     | 
| 59 | 
         
            +
                        "acc": 0.3424036281179138,
         
     | 
| 60 | 
         
            +
                        "acc_norm": 0.7865646258503401
         
     | 
| 61 | 
         
            +
                    },
         
     | 
| 62 | 
         
            +
                    {
         
     | 
| 63 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 64 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 65 | 
         
            +
                        "acc": 0.42550274223034734,
         
     | 
| 66 | 
         
            +
                        "acc_norm": 0.4273308957952468
         
     | 
| 67 | 
         
            +
                    },
         
     | 
| 68 | 
         
            +
                    {
         
     | 
| 69 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 70 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 71 | 
         
            +
                        "acc": 0.3249,
         
     | 
| 72 | 
         
            +
                        "acc_norm": 0.3367
         
     | 
| 73 | 
         
            +
                    },
         
     | 
| 74 | 
         
            +
                    {
         
     | 
| 75 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 76 | 
         
            +
                        "task": "text_classification",
         
     | 
| 77 | 
         
            +
                        "acc": 0.22987672226250908,
         
     | 
| 78 | 
         
            +
                        "acc_norm": 0.19434372733865118
         
     | 
| 79 | 
         
            +
                    },
         
     | 
| 80 | 
         
            +
                    {
         
     | 
| 81 | 
         
            +
                        "name": "tquad",
         
     | 
| 82 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 83 | 
         
            +
                        "exact_match": 0.2062780269058296,
         
     | 
| 84 | 
         
            +
                        "f1": 0.4653972244152745
         
     | 
| 85 | 
         
            +
                    },
         
     | 
| 86 | 
         
            +
                    {
         
     | 
| 87 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 88 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 89 | 
         
            +
                        "acc": 0.3918757467144564,
         
     | 
| 90 | 
         
            +
                        "acc_norm": 0.3859020310633214
         
     | 
| 91 | 
         
            +
                    },
         
     | 
| 92 | 
         
            +
                    {
         
     | 
| 93 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 94 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 95 | 
         
            +
                        "acc": 0.4687022900763359,
         
     | 
| 96 | 
         
            +
                        "acc_norm": 0.5374045801526718
         
     | 
| 97 | 
         
            +
                    },
         
     | 
| 98 | 
         
            +
                    {
         
     | 
| 99 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 100 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 101 | 
         
            +
                        "acc": 0.33986928104575165,
         
     | 
| 102 | 
         
            +
                        "acc_norm": 0.45098039215686275
         
     | 
| 103 | 
         
            +
                    },
         
     | 
| 104 | 
         
            +
                    {
         
     | 
| 105 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 106 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 107 | 
         
            +
                        "acc": 0.6180215475024485,
         
     | 
| 108 | 
         
            +
                        "acc_norm": 0.6180215475024485
         
     | 
| 109 | 
         
            +
                    },
         
     | 
| 110 | 
         
            +
                    {
         
     | 
| 111 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 112 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 113 | 
         
            +
                        "acc": 0.596,
         
     | 
| 114 | 
         
            +
                        "acc_norm": 0.596
         
     | 
| 115 | 
         
            +
                    },
         
     | 
| 116 | 
         
            +
                    {
         
     | 
| 117 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 118 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 119 | 
         
            +
                        "acc": 0.4771084337349398,
         
     | 
| 120 | 
         
            +
                        "acc_norm": 0.4771084337349398
         
     | 
| 121 | 
         
            +
                    },
         
     | 
| 122 | 
         
            +
                    {
         
     | 
| 123 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 124 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 125 | 
         
            +
                        "exact_match": 0.24705882352941178,
         
     | 
| 126 | 
         
            +
                        "f1": 0.44192474929656556
         
     | 
| 127 | 
         
            +
                    },
         
     | 
| 128 | 
         
            +
                    {
         
     | 
| 129 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 130 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 131 | 
         
            +
                        "exact_match": 0.008281573498964804
         
     | 
| 132 | 
         
            +
                    },
         
     | 
| 133 | 
         
            +
                    {
         
     | 
| 134 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 135 | 
         
            +
                        "task": "summarization",
         
     | 
| 136 | 
         
            +
                        "rouge1": 0.37037019926313125,
         
     | 
| 137 | 
         
            +
                        "rouge2": 0.24005923597941317,
         
     | 
| 138 | 
         
            +
                        "rougeL": 0.31098002776173184
         
     | 
| 139 | 
         
            +
                    },
         
     | 
| 140 | 
         
            +
                    {
         
     | 
| 141 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 142 | 
         
            +
                        "task": "summarization",
         
     | 
| 143 | 
         
            +
                        "rouge1": 0.2645070959726481,
         
     | 
| 144 | 
         
            +
                        "rouge2": 0.11354354716145479,
         
     | 
| 145 | 
         
            +
                        "rougeL": 0.21357621995467704
         
     | 
| 146 | 
         
            +
                    },
         
     | 
| 147 | 
         
            +
                    {
         
     | 
| 148 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 149 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 150 | 
         
            +
                        "wer": 0.7464128097803795,
         
     | 
| 151 | 
         
            +
                        "bleu": 0.16878189334002527
         
     | 
| 152 | 
         
            +
                    },
         
     | 
| 153 | 
         
            +
                    {
         
     | 
| 154 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 155 | 
         
            +
                        "task": "summarization",
         
     | 
| 156 | 
         
            +
                        "rouge1": 0.2855728817569547,
         
     | 
| 157 | 
         
            +
                        "rouge2": 0.14081555638864124,
         
     | 
| 158 | 
         
            +
                        "rougeL": 0.23467303626936886
         
     | 
| 159 | 
         
            +
                    }
         
     | 
| 160 | 
         
            +
                ]
         
     | 
| 161 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/aya-expanse-8b.json
    ADDED
    
    | 
         @@ -0,0 +1,159 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "model": "CohereForAI/aya-expanse-8b",
         
     | 
| 4 | 
         
            +
                    "api": "hf",
         
     | 
| 5 | 
         
            +
                    "architecture": "CohereForCausalLM",
         
     | 
| 6 | 
         
            +
                    "max_length": 8192,
         
     | 
| 7 | 
         
            +
                    "dtype": "float16",
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "8b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 14 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 15 | 
         
            +
                        "acc": 0.7355555555555555,
         
     | 
| 16 | 
         
            +
                        "acc_norm": 0.7355555555555555
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.3155216284987277,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.3460559796437659
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.4026508226691042,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.6224862888482633
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 32 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 33 | 
         
            +
                        "exact_match": 0.0018296499590736194
         
     | 
| 34 | 
         
            +
                    },
         
     | 
| 35 | 
         
            +
                    {
         
     | 
| 36 | 
         
            +
                        "name": "ironytr",
         
     | 
| 37 | 
         
            +
                        "task": "text_classification",
         
     | 
| 38 | 
         
            +
                        "acc": 0.505,
         
     | 
| 39 | 
         
            +
                        "acc_norm": 0.49833333333333335
         
     | 
| 40 | 
         
            +
                    },
         
     | 
| 41 | 
         
            +
                    {
         
     | 
| 42 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 43 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 44 | 
         
            +
                        "exact_match": 0.06954720331459012,
         
     | 
| 45 | 
         
            +
                        "f1": 0.13476533908972033
         
     | 
| 46 | 
         
            +
                    },
         
     | 
| 47 | 
         
            +
                    {
         
     | 
| 48 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 49 | 
         
            +
                        "task": "summarization",
         
     | 
| 50 | 
         
            +
                        "rouge1": 0.363610486561065,
         
     | 
| 51 | 
         
            +
                        "rouge2": 0.21362825588593481,
         
     | 
| 52 | 
         
            +
                        "rougeL": 0.29773476508614094
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 56 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 57 | 
         
            +
                        "acc": 0.3078,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.35
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "news_cat",
         
     | 
| 62 | 
         
            +
                        "task": "text_classification",
         
     | 
| 63 | 
         
            +
                        "acc": 0.76,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.58
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 68 | 
         
            +
                        "task": "text_classification",
         
     | 
| 69 | 
         
            +
                        "acc": 0.2675736961451247,
         
     | 
| 70 | 
         
            +
                        "acc_norm": 0.7956349206349206
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 74 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 75 | 
         
            +
                        "acc": 0.5877513711151737,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.579981718464351
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 80 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 81 | 
         
            +
                        "acc": 0.344,
         
     | 
| 82 | 
         
            +
                        "acc_norm": 0.3435
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 86 | 
         
            +
                        "task": "text_classification",
         
     | 
| 87 | 
         
            +
                        "acc": 0.2095721537345903,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.21029731689630166
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "tquad",
         
     | 
| 92 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 93 | 
         
            +
                        "exact_match": 0.13452914798206278,
         
     | 
| 94 | 
         
            +
                        "f1": 0.435087842533856
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 98 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 99 | 
         
            +
                        "acc": 0.4062126642771804,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.3930704898446834
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 104 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 105 | 
         
            +
                        "acc": 0.4900763358778626,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.5465648854961832
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 110 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 111 | 
         
            +
                        "acc": 0.3464052287581699,
         
     | 
| 112 | 
         
            +
                        "acc_norm": 0.4395424836601307
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 116 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 117 | 
         
            +
                        "acc": 0.5935357492654261,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.5935357492654261
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 122 | 
         
            +
                        "task": "summarization",
         
     | 
| 123 | 
         
            +
                        "rouge1": 0.3064320242538614,
         
     | 
| 124 | 
         
            +
                        "rouge2": 0.1340385267540697,
         
     | 
| 125 | 
         
            +
                        "rougeL": 0.24764232131755232
         
     | 
| 126 | 
         
            +
                    },
         
     | 
| 127 | 
         
            +
                    {
         
     | 
| 128 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 129 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 130 | 
         
            +
                        "wer": 0.7822550373875778,
         
     | 
| 131 | 
         
            +
                        "bleu": 0.17034711245148307
         
     | 
| 132 | 
         
            +
                    },
         
     | 
| 133 | 
         
            +
                    {
         
     | 
| 134 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 135 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 136 | 
         
            +
                        "acc": 0.578,
         
     | 
| 137 | 
         
            +
                        "acc_norm": 0.578
         
     | 
| 138 | 
         
            +
                    },
         
     | 
| 139 | 
         
            +
                    {
         
     | 
| 140 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 141 | 
         
            +
                        "task": "summarization",
         
     | 
| 142 | 
         
            +
                        "rouge1": 0.26621653203927675,
         
     | 
| 143 | 
         
            +
                        "rouge2": 0.133428873146516,
         
     | 
| 144 | 
         
            +
                        "rougeL": 0.2083669711429916
         
     | 
| 145 | 
         
            +
                    },
         
     | 
| 146 | 
         
            +
                    {
         
     | 
| 147 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 148 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 149 | 
         
            +
                        "acc": 0.4919678714859438,
         
     | 
| 150 | 
         
            +
                        "acc_norm": 0.4919678714859438
         
     | 
| 151 | 
         
            +
                    },
         
     | 
| 152 | 
         
            +
                    {
         
     | 
| 153 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 154 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 155 | 
         
            +
                        "exact_match": 0.2495798319327731,
         
     | 
| 156 | 
         
            +
                        "f1": 0.4735125568867167
         
     | 
| 157 | 
         
            +
                    }
         
     | 
| 158 | 
         
            +
                ]
         
     | 
| 159 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/aya101.json
    ADDED
    
    | 
         @@ -0,0 +1,172 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 4 | 
         
            +
                    "max_length": 4096,
         
     | 
| 5 | 
         
            +
                    "model": "CohereForAI/aya-101",
         
     | 
| 6 | 
         
            +
                    "api": "hf",
         
     | 
| 7 | 
         
            +
                    "architecture": "T5ForConditionalGeneration",
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "13b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 14 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 15 | 
         
            +
                        "exact_match": 0.07563025210084033,
         
     | 
| 16 | 
         
            +
                        "f1": 0.16462359535888943
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 20 | 
         
            +
                        "task": "summarization",
         
     | 
| 21 | 
         
            +
                        "rouge1": 0.02416422194769531,
         
     | 
| 22 | 
         
            +
                        "rouge2": 0.00149839274458772,
         
     | 
| 23 | 
         
            +
                        "rougeL": 0.02416422194769531
         
     | 
| 24 | 
         
            +
                    },
         
     | 
| 25 | 
         
            +
                    {
         
     | 
| 26 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 27 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 28 | 
         
            +
                        "acc": 0.596,
         
     | 
| 29 | 
         
            +
                        "acc_norm": 0.596
         
     | 
| 30 | 
         
            +
                    },
         
     | 
| 31 | 
         
            +
                    {
         
     | 
| 32 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 33 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 34 | 
         
            +
                        "wer": 0.9853633715998092,
         
     | 
| 35 | 
         
            +
                        "bleu": 0.0
         
     | 
| 36 | 
         
            +
                    },
         
     | 
| 37 | 
         
            +
                    {
         
     | 
| 38 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 39 | 
         
            +
                        "task": "summarization",
         
     | 
| 40 | 
         
            +
                        "rouge1": 0.029006633700390562,
         
     | 
| 41 | 
         
            +
                        "rouge2": 0.0004998910319276452,
         
     | 
| 42 | 
         
            +
                        "rougeL": 0.028967197984657227
         
     | 
| 43 | 
         
            +
                    },
         
     | 
| 44 | 
         
            +
                    {
         
     | 
| 45 | 
         
            +
                        "name": "turkish_plu",
         
     | 
| 46 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 47 | 
         
            +
                        "acc": 0.41344,
         
     | 
| 48 | 
         
            +
                        "acc_norm": 0.42816
         
     | 
| 49 | 
         
            +
                    },
         
     | 
| 50 | 
         
            +
                    {
         
     | 
| 51 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 52 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 53 | 
         
            +
                        "acc": 0.3739545997610514,
         
     | 
| 54 | 
         
            +
                        "acc_norm": 0.33811230585424135
         
     | 
| 55 | 
         
            +
                    },
         
     | 
| 56 | 
         
            +
                    {
         
     | 
| 57 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 58 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 59 | 
         
            +
                        "acc": 0.34961832061068704,
         
     | 
| 60 | 
         
            +
                        "acc_norm": 0.38625954198473283
         
     | 
| 61 | 
         
            +
                    },
         
     | 
| 62 | 
         
            +
                    {
         
     | 
| 63 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 64 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 65 | 
         
            +
                        "acc": 0.272875816993464,
         
     | 
| 66 | 
         
            +
                        "acc_norm": 0.35784313725490197
         
     | 
| 67 | 
         
            +
                    },
         
     | 
| 68 | 
         
            +
                    {
         
     | 
| 69 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 70 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 71 | 
         
            +
                        "acc": 0.5710088148873653,
         
     | 
| 72 | 
         
            +
                        "acc_norm": 0.5710088148873653
         
     | 
| 73 | 
         
            +
                    },
         
     | 
| 74 | 
         
            +
                    {
         
     | 
| 75 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 76 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 77 | 
         
            +
                        "acc": 0.553473491773309,
         
     | 
| 78 | 
         
            +
                        "acc_norm": 0.6238574040219378
         
     | 
| 79 | 
         
            +
                    },
         
     | 
| 80 | 
         
            +
                    {
         
     | 
| 81 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 82 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 83 | 
         
            +
                        "acc": 0.6709323583180987,
         
     | 
| 84 | 
         
            +
                        "acc_norm": 0.5781535648994516
         
     | 
| 85 | 
         
            +
                    },
         
     | 
| 86 | 
         
            +
                    {
         
     | 
| 87 | 
         
            +
                        "name": "tr-wikihow-summ",
         
     | 
| 88 | 
         
            +
                        "task": "summarization",
         
     | 
| 89 | 
         
            +
                        "rouge1": 0.02053796966151103,
         
     | 
| 90 | 
         
            +
                        "rouge2": 0.00029270301029826366,
         
     | 
| 91 | 
         
            +
                        "rougeL": 0.020495031370814234
         
     | 
| 92 | 
         
            +
                    },
         
     | 
| 93 | 
         
            +
                    {
         
     | 
| 94 | 
         
            +
                        "name": "tquad",
         
     | 
| 95 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 96 | 
         
            +
                        "exact_match": 0.053811659192825115,
         
     | 
| 97 | 
         
            +
                        "f1": 0.09199690627084456
         
     | 
| 98 | 
         
            +
                    },
         
     | 
| 99 | 
         
            +
                    {
         
     | 
| 100 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 101 | 
         
            +
                        "task": "text_classification",
         
     | 
| 102 | 
         
            +
                        "acc": 0.1696881798404641,
         
     | 
| 103 | 
         
            +
                        "acc_norm": 0.18781725888324874
         
     | 
| 104 | 
         
            +
                    },
         
     | 
| 105 | 
         
            +
                    {
         
     | 
| 106 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 107 | 
         
            +
                        "task": "text_classification",
         
     | 
| 108 | 
         
            +
                        "acc": 0.7993197278911565,
         
     | 
| 109 | 
         
            +
                        "acc_norm": 0.7970521541950113
         
     | 
| 110 | 
         
            +
                    },
         
     | 
| 111 | 
         
            +
                    {
         
     | 
| 112 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 113 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 114 | 
         
            +
                        "acc": 0.279,
         
     | 
| 115 | 
         
            +
                        "acc_norm": 0.3386
         
     | 
| 116 | 
         
            +
                    },
         
     | 
| 117 | 
         
            +
                    {
         
     | 
| 118 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 119 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 120 | 
         
            +
                        "acc": 0.2558,
         
     | 
| 121 | 
         
            +
                        "acc_norm": 0.3279
         
     | 
| 122 | 
         
            +
                    },
         
     | 
| 123 | 
         
            +
                    {
         
     | 
| 124 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 125 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 126 | 
         
            +
                        "acc": 0.2998003992015968,
         
     | 
| 127 | 
         
            +
                        "acc_norm": 0.34291417165668664
         
     | 
| 128 | 
         
            +
                    },
         
     | 
| 129 | 
         
            +
                    {
         
     | 
| 130 | 
         
            +
                        "name": "news_cat",
         
     | 
| 131 | 
         
            +
                        "task": "text_classification",
         
     | 
| 132 | 
         
            +
                        "acc": 0.2,
         
     | 
| 133 | 
         
            +
                        "acc_norm": 0.2
         
     | 
| 134 | 
         
            +
                    },
         
     | 
| 135 | 
         
            +
                    {
         
     | 
| 136 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 137 | 
         
            +
                        "task": "summarization",
         
     | 
| 138 | 
         
            +
                        "rouge1": 0.021746360547255133,
         
     | 
| 139 | 
         
            +
                        "rouge2": 0.003113110667892852,
         
     | 
| 140 | 
         
            +
                        "rougeL": 0.021727065059735186
         
     | 
| 141 | 
         
            +
                    },
         
     | 
| 142 | 
         
            +
                    {
         
     | 
| 143 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 144 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 145 | 
         
            +
                        "exact_match": 0.025451316957679788,
         
     | 
| 146 | 
         
            +
                        "f1": 0.05324060372891391
         
     | 
| 147 | 
         
            +
                    },
         
     | 
| 148 | 
         
            +
                    {
         
     | 
| 149 | 
         
            +
                        "name": "ironytr",
         
     | 
| 150 | 
         
            +
                        "task": "text_classification",
         
     | 
| 151 | 
         
            +
                        "acc": 0.5216666666666666,
         
     | 
| 152 | 
         
            +
                        "acc_norm": 0.5
         
     | 
| 153 | 
         
            +
                    },
         
     | 
| 154 | 
         
            +
                    {
         
     | 
| 155 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 156 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 157 | 
         
            +
                        "exact_match": 0.0
         
     | 
| 158 | 
         
            +
                    },
         
     | 
| 159 | 
         
            +
                    {
         
     | 
| 160 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 161 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 162 | 
         
            +
                        "acc": 0.22900763358778625,
         
     | 
| 163 | 
         
            +
                        "acc_norm": 0.2366412213740458
         
     | 
| 164 | 
         
            +
                    },
         
     | 
| 165 | 
         
            +
                    {
         
     | 
| 166 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 167 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 168 | 
         
            +
                        "acc": 0.2288888888888889,
         
     | 
| 169 | 
         
            +
                        "acc_norm": 0.2288888888888889
         
     | 
| 170 | 
         
            +
                    }
         
     | 
| 171 | 
         
            +
                ]
         
     | 
| 172 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/commencis-7b.json
    ADDED
    
    | 
         @@ -0,0 +1,172 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 4 | 
         
            +
                    "max_length": "4096",
         
     | 
| 5 | 
         
            +
                    "model": "Commencis/Commencis-LLM",
         
     | 
| 6 | 
         
            +
                    "api": "hf",
         
     | 
| 7 | 
         
            +
                    "architecture": "MistralForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "7b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 14 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 15 | 
         
            +
                        "exact_match": 0.06638655462184874,
         
     | 
| 16 | 
         
            +
                        "f1": 0.22895337255761397
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 20 | 
         
            +
                        "task": "summarization",
         
     | 
| 21 | 
         
            +
                        "rouge1": 0.23661435034483103,
         
     | 
| 22 | 
         
            +
                        "rouge2": 0.09475637339836376,
         
     | 
| 23 | 
         
            +
                        "rougeL": 0.17114647899378693
         
     | 
| 24 | 
         
            +
                    },
         
     | 
| 25 | 
         
            +
                    {
         
     | 
| 26 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 27 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 28 | 
         
            +
                        "acc": 0.58,
         
     | 
| 29 | 
         
            +
                        "acc_norm": 0.58
         
     | 
| 30 | 
         
            +
                    },
         
     | 
| 31 | 
         
            +
                    {
         
     | 
| 32 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 33 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 34 | 
         
            +
                        "wer": 1.292660190832963,
         
     | 
| 35 | 
         
            +
                        "bleu": 0.046829706960566486
         
     | 
| 36 | 
         
            +
                    },
         
     | 
| 37 | 
         
            +
                    {
         
     | 
| 38 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 39 | 
         
            +
                        "task": "summarization",
         
     | 
| 40 | 
         
            +
                        "rouge1": 0.20899244459581318,
         
     | 
| 41 | 
         
            +
                        "rouge2": 0.06262304805792501,
         
     | 
| 42 | 
         
            +
                        "rougeL": 0.15190187433999106
         
     | 
| 43 | 
         
            +
                    },
         
     | 
| 44 | 
         
            +
                    {
         
     | 
| 45 | 
         
            +
                        "name": "turkish_plu",
         
     | 
| 46 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 47 | 
         
            +
                        "acc": 0.4128,
         
     | 
| 48 | 
         
            +
                        "acc_norm": 0.46176
         
     | 
| 49 | 
         
            +
                    },
         
     | 
| 50 | 
         
            +
                    {
         
     | 
| 51 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 52 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 53 | 
         
            +
                        "acc": 0.34767025089605735,
         
     | 
| 54 | 
         
            +
                        "acc_norm": 0.38948626045400236
         
     | 
| 55 | 
         
            +
                    },
         
     | 
| 56 | 
         
            +
                    {
         
     | 
| 57 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 58 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 59 | 
         
            +
                        "acc": 0.38625954198473283,
         
     | 
| 60 | 
         
            +
                        "acc_norm": 0.46259541984732827
         
     | 
| 61 | 
         
            +
                    },
         
     | 
| 62 | 
         
            +
                    {
         
     | 
| 63 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 64 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 65 | 
         
            +
                        "acc": 0.2761437908496732,
         
     | 
| 66 | 
         
            +
                        "acc_norm": 0.3872549019607843
         
     | 
| 67 | 
         
            +
                    },
         
     | 
| 68 | 
         
            +
                    {
         
     | 
| 69 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 70 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 71 | 
         
            +
                        "acc": 0.56513222331048,
         
     | 
| 72 | 
         
            +
                        "acc_norm": 0.56513222331048
         
     | 
| 73 | 
         
            +
                    },
         
     | 
| 74 | 
         
            +
                    {
         
     | 
| 75 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 76 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 77 | 
         
            +
                        "acc": 0.3903107861060329,
         
     | 
| 78 | 
         
            +
                        "acc_norm": 0.4835466179159049
         
     | 
| 79 | 
         
            +
                    },
         
     | 
| 80 | 
         
            +
                    {
         
     | 
| 81 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 82 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 83 | 
         
            +
                        "acc": 0.5077696526508226,
         
     | 
| 84 | 
         
            +
                        "acc_norm": 0.526508226691042
         
     | 
| 85 | 
         
            +
                    },
         
     | 
| 86 | 
         
            +
                    {
         
     | 
| 87 | 
         
            +
                        "name": "tr-wikihow-summ",
         
     | 
| 88 | 
         
            +
                        "task": "summarization",
         
     | 
| 89 | 
         
            +
                        "rouge1": 0.23101542478965895,
         
     | 
| 90 | 
         
            +
                        "rouge2": 0.0718775262261334,
         
     | 
| 91 | 
         
            +
                        "rougeL": 0.16318786708633073
         
     | 
| 92 | 
         
            +
                    },
         
     | 
| 93 | 
         
            +
                    {
         
     | 
| 94 | 
         
            +
                        "name": "tquad",
         
     | 
| 95 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 96 | 
         
            +
                        "exact_match": 0.053811659192825115,
         
     | 
| 97 | 
         
            +
                        "f1": 0.3110458108565287
         
     | 
| 98 | 
         
            +
                    },
         
     | 
| 99 | 
         
            +
                    {
         
     | 
| 100 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 101 | 
         
            +
                        "task": "text_classification",
         
     | 
| 102 | 
         
            +
                        "acc": 0.14865844815083393,
         
     | 
| 103 | 
         
            +
                        "acc_norm": 0.2226250906453952
         
     | 
| 104 | 
         
            +
                    },
         
     | 
| 105 | 
         
            +
                    {
         
     | 
| 106 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 107 | 
         
            +
                        "task": "text_classification",
         
     | 
| 108 | 
         
            +
                        "acc": 0.24263038548752835,
         
     | 
| 109 | 
         
            +
                        "acc_norm": 0.29365079365079366
         
     | 
| 110 | 
         
            +
                    },
         
     | 
| 111 | 
         
            +
                    {
         
     | 
| 112 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 113 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 114 | 
         
            +
                        "acc": 0.3058,
         
     | 
| 115 | 
         
            +
                        "acc_norm": 0.3103
         
     | 
| 116 | 
         
            +
                    },
         
     | 
| 117 | 
         
            +
                    {
         
     | 
| 118 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 119 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 120 | 
         
            +
                        "acc": 0.2972,
         
     | 
| 121 | 
         
            +
                        "acc_norm": 0.32
         
     | 
| 122 | 
         
            +
                    },
         
     | 
| 123 | 
         
            +
                    {
         
     | 
| 124 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 125 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 126 | 
         
            +
                        "acc": 0.3141716566866267,
         
     | 
| 127 | 
         
            +
                        "acc_norm": 0.3281437125748503
         
     | 
| 128 | 
         
            +
                    },
         
     | 
| 129 | 
         
            +
                    {
         
     | 
| 130 | 
         
            +
                        "name": "news_cat",
         
     | 
| 131 | 
         
            +
                        "task": "text_classification",
         
     | 
| 132 | 
         
            +
                        "acc": 0.624,
         
     | 
| 133 | 
         
            +
                        "acc_norm": 0.368
         
     | 
| 134 | 
         
            +
                    },
         
     | 
| 135 | 
         
            +
                    {
         
     | 
| 136 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 137 | 
         
            +
                        "task": "summarization",
         
     | 
| 138 | 
         
            +
                        "rouge1": 0.30963778437323686,
         
     | 
| 139 | 
         
            +
                        "rouge2": 0.16100694114326877,
         
     | 
| 140 | 
         
            +
                        "rougeL": 0.23447680384800107
         
     | 
| 141 | 
         
            +
                    },
         
     | 
| 142 | 
         
            +
                    {
         
     | 
| 143 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 144 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 145 | 
         
            +
                        "exact_match": 0.0324060372891388,
         
     | 
| 146 | 
         
            +
                        "f1": 0.07231572678508513
         
     | 
| 147 | 
         
            +
                    },
         
     | 
| 148 | 
         
            +
                    {
         
     | 
| 149 | 
         
            +
                        "name": "ironytr",
         
     | 
| 150 | 
         
            +
                        "task": "text_classification",
         
     | 
| 151 | 
         
            +
                        "acc": 0.56,
         
     | 
| 152 | 
         
            +
                        "acc_norm": 0.54
         
     | 
| 153 | 
         
            +
                    },
         
     | 
| 154 | 
         
            +
                    {
         
     | 
| 155 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 156 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 157 | 
         
            +
                        "exact_match": 0.1701574461938466
         
     | 
| 158 | 
         
            +
                    },
         
     | 
| 159 | 
         
            +
                    {
         
     | 
| 160 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 161 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 162 | 
         
            +
                        "acc": 0.24681933842239187,
         
     | 
| 163 | 
         
            +
                        "acc_norm": 0.29770992366412213
         
     | 
| 164 | 
         
            +
                    },
         
     | 
| 165 | 
         
            +
                    {
         
     | 
| 166 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 167 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 168 | 
         
            +
                        "acc": 0.3233333333333333,
         
     | 
| 169 | 
         
            +
                        "acc_norm": 0.3233333333333333
         
     | 
| 170 | 
         
            +
                    }
         
     | 
| 171 | 
         
            +
                ]
         
     | 
| 172 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/kanarya-2b.json
    ADDED
    
    | 
         @@ -0,0 +1,171 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "dtype": "float16",
         
     | 
| 4 | 
         
            +
                    "model": "asafaya/kanarya-2b",
         
     | 
| 5 | 
         
            +
                    "api": "hf",
         
     | 
| 6 | 
         
            +
                    "architecture": "GPTJForCausalLM",
         
     | 
| 7 | 
         
            +
                    "type": "pretrained",
         
     | 
| 8 | 
         
            +
                    "num_parameters": "3b"
         
     | 
| 9 | 
         
            +
                },
         
     | 
| 10 | 
         
            +
                "results": [
         
     | 
| 11 | 
         
            +
                    {
         
     | 
| 12 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 13 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 14 | 
         
            +
                        "acc": 0.2811111111111111,
         
     | 
| 15 | 
         
            +
                        "acc_norm": 0.2811111111111111
         
     | 
| 16 | 
         
            +
                    },
         
     | 
| 17 | 
         
            +
                    {
         
     | 
| 18 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 19 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 20 | 
         
            +
                        "acc": 0.30025445292620867,
         
     | 
| 21 | 
         
            +
                        "acc_norm": 0.3256997455470738
         
     | 
| 22 | 
         
            +
                    },
         
     | 
| 23 | 
         
            +
                    {
         
     | 
| 24 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 25 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 26 | 
         
            +
                        "exact_match": 9.62973662670326e-05
         
     | 
| 27 | 
         
            +
                    },
         
     | 
| 28 | 
         
            +
                    {
         
     | 
| 29 | 
         
            +
                        "name": "ironytr",
         
     | 
| 30 | 
         
            +
                        "task": "text_classification",
         
     | 
| 31 | 
         
            +
                        "acc": 0.5,
         
     | 
| 32 | 
         
            +
                        "acc_norm": 0.5016666666666667
         
     | 
| 33 | 
         
            +
                    },
         
     | 
| 34 | 
         
            +
                    {
         
     | 
| 35 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 36 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 37 | 
         
            +
                        "exact_match": 0.005770938147380882,
         
     | 
| 38 | 
         
            +
                        "f1": 0.0157485308417537
         
     | 
| 39 | 
         
            +
                    },
         
     | 
| 40 | 
         
            +
                    {
         
     | 
| 41 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 42 | 
         
            +
                        "task": "summarization",
         
     | 
| 43 | 
         
            +
                        "rouge1": 0.380182975983147,
         
     | 
| 44 | 
         
            +
                        "rouge2": 0.2469518162622865,
         
     | 
| 45 | 
         
            +
                        "rougeL": 0.30607429328228153
         
     | 
| 46 | 
         
            +
                    },
         
     | 
| 47 | 
         
            +
                    {
         
     | 
| 48 | 
         
            +
                        "name": "news_cat",
         
     | 
| 49 | 
         
            +
                        "task": "text_classification",
         
     | 
| 50 | 
         
            +
                        "acc": 0.668,
         
     | 
| 51 | 
         
            +
                        "acc_norm": 0.556
         
     | 
| 52 | 
         
            +
                    },
         
     | 
| 53 | 
         
            +
                    {
         
     | 
| 54 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 55 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 56 | 
         
            +
                        "acc": 0.3278,
         
     | 
| 57 | 
         
            +
                        "acc_norm": 0.3463
         
     | 
| 58 | 
         
            +
                    },
         
     | 
| 59 | 
         
            +
                    {
         
     | 
| 60 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 61 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 62 | 
         
            +
                        "acc": 0.3088,
         
     | 
| 63 | 
         
            +
                        "acc_norm": 0.3109
         
     | 
| 64 | 
         
            +
                    },
         
     | 
| 65 | 
         
            +
                    {
         
     | 
| 66 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 67 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 68 | 
         
            +
                        "acc": 0.3273453093812375,
         
     | 
| 69 | 
         
            +
                        "acc_norm": 0.3341317365269461
         
     | 
| 70 | 
         
            +
                    },
         
     | 
| 71 | 
         
            +
                    {
         
     | 
| 72 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 73 | 
         
            +
                        "task": "text_classification",
         
     | 
| 74 | 
         
            +
                        "acc": 0.6159297052154195,
         
     | 
| 75 | 
         
            +
                        "acc_norm": 0.796485260770975
         
     | 
| 76 | 
         
            +
                    },
         
     | 
| 77 | 
         
            +
                    {
         
     | 
| 78 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 79 | 
         
            +
                        "task": "text_classification",
         
     | 
| 80 | 
         
            +
                        "acc": 0.12907904278462654,
         
     | 
| 81 | 
         
            +
                        "acc_norm": 0.12037708484408992
         
     | 
| 82 | 
         
            +
                    },
         
     | 
| 83 | 
         
            +
                    {
         
     | 
| 84 | 
         
            +
                        "name": "tquad",
         
     | 
| 85 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 86 | 
         
            +
                        "exact_match": 0.016816143497757848,
         
     | 
| 87 | 
         
            +
                        "f1": 0.046325790025566756
         
     | 
| 88 | 
         
            +
                    },
         
     | 
| 89 | 
         
            +
                    {
         
     | 
| 90 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 91 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 92 | 
         
            +
                        "acc": 0.623400365630713,
         
     | 
| 93 | 
         
            +
                        "acc_norm": 0.6238574040219378
         
     | 
| 94 | 
         
            +
                    },
         
     | 
| 95 | 
         
            +
                    {
         
     | 
| 96 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 97 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 98 | 
         
            +
                        "acc": 0.5068555758683729,
         
     | 
| 99 | 
         
            +
                        "acc_norm": 0.5758683729433273
         
     | 
| 100 | 
         
            +
                    },
         
     | 
| 101 | 
         
            +
                    {
         
     | 
| 102 | 
         
            +
                        "name": "turkish_plu",
         
     | 
| 103 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 104 | 
         
            +
                        "acc": 0.4928,
         
     | 
| 105 | 
         
            +
                        "acc_norm": 0.536
         
     | 
| 106 | 
         
            +
                    },
         
     | 
| 107 | 
         
            +
                    {
         
     | 
| 108 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 109 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 110 | 
         
            +
                        "acc": 0.45878136200716846,
         
     | 
| 111 | 
         
            +
                        "acc_norm": 0.46714456391875747
         
     | 
| 112 | 
         
            +
                    },
         
     | 
| 113 | 
         
            +
                    {
         
     | 
| 114 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 115 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 116 | 
         
            +
                        "acc": 0.45648854961832064,
         
     | 
| 117 | 
         
            +
                        "acc_norm": 0.5190839694656488
         
     | 
| 118 | 
         
            +
                    },
         
     | 
| 119 | 
         
            +
                    {
         
     | 
| 120 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 121 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 122 | 
         
            +
                        "acc": 0.35784313725490197,
         
     | 
| 123 | 
         
            +
                        "acc_norm": 0.5
         
     | 
| 124 | 
         
            +
                    },
         
     | 
| 125 | 
         
            +
                    {
         
     | 
| 126 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 127 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 128 | 
         
            +
                        "acc": 0.6248775710088149,
         
     | 
| 129 | 
         
            +
                        "acc_norm": 0.6248775710088149
         
     | 
| 130 | 
         
            +
                    },
         
     | 
| 131 | 
         
            +
                    {
         
     | 
| 132 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 133 | 
         
            +
                        "task": "summarization",
         
     | 
| 134 | 
         
            +
                        "rouge1": 0.14941800836498376,
         
     | 
| 135 | 
         
            +
                        "rouge2": 0.04469826846423095,
         
     | 
| 136 | 
         
            +
                        "rougeL": 0.11118162846926655
         
     | 
| 137 | 
         
            +
                    },
         
     | 
| 138 | 
         
            +
                    {
         
     | 
| 139 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 140 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 141 | 
         
            +
                        "wer": 2.833755212322392,
         
     | 
| 142 | 
         
            +
                        "bleu": 0.030496946295093332
         
     | 
| 143 | 
         
            +
                    },
         
     | 
| 144 | 
         
            +
                    {
         
     | 
| 145 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 146 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 147 | 
         
            +
                        "acc": 0.642,
         
     | 
| 148 | 
         
            +
                        "acc_norm": 0.642
         
     | 
| 149 | 
         
            +
                    },
         
     | 
| 150 | 
         
            +
                    {
         
     | 
| 151 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 152 | 
         
            +
                        "task": "summarization",
         
     | 
| 153 | 
         
            +
                        "rouge1": 0.2462743722502333,
         
     | 
| 154 | 
         
            +
                        "rouge2": 0.09312295140534987,
         
     | 
| 155 | 
         
            +
                        "rougeL": 0.1685445897911506
         
     | 
| 156 | 
         
            +
                    },
         
     | 
| 157 | 
         
            +
                    {
         
     | 
| 158 | 
         
            +
                        "name": "tr-wikihow-summ",
         
     | 
| 159 | 
         
            +
                        "task": "summarization",
         
     | 
| 160 | 
         
            +
                        "rouge1": null,
         
     | 
| 161 | 
         
            +
                        "rouge2": null,
         
     | 
| 162 | 
         
            +
                        "rougeL": null
         
     | 
| 163 | 
         
            +
                    },
         
     | 
| 164 | 
         
            +
                    {
         
     | 
| 165 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 166 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 167 | 
         
            +
                        "exact_match": 0.008403361344537815,
         
     | 
| 168 | 
         
            +
                        "f1": 0.027799180278171867
         
     | 
| 169 | 
         
            +
                    }
         
     | 
| 170 | 
         
            +
                ]
         
     | 
| 171 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/llama-3-8b-instruct.json
    ADDED
    
    | 
         @@ -0,0 +1,160 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "trust_remote_code": "True",
         
     | 
| 4 | 
         
            +
                    "model": "meta-llama/Meta-Llama-3-8B-Instruct",
         
     | 
| 5 | 
         
            +
                    "api": "hf",
         
     | 
| 6 | 
         
            +
                    "architecture": "LlamaForCausalLM",
         
     | 
| 7 | 
         
            +
                    "max_length": 8192,
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 10 | 
         
            +
                    "num_parameters": "8b"
         
     | 
| 11 | 
         
            +
                },
         
     | 
| 12 | 
         
            +
                "results": [
         
     | 
| 13 | 
         
            +
                    {
         
     | 
| 14 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 15 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 16 | 
         
            +
                        "acc": 0.6633333333333333,
         
     | 
| 17 | 
         
            +
                        "acc_norm": 0.6633333333333333
         
     | 
| 18 | 
         
            +
                    },
         
     | 
| 19 | 
         
            +
                    {
         
     | 
| 20 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 21 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 22 | 
         
            +
                        "acc": 0.2697201017811705,
         
     | 
| 23 | 
         
            +
                        "acc_norm": 0.3104325699745547
         
     | 
| 24 | 
         
            +
                    },
         
     | 
| 25 | 
         
            +
                    {
         
     | 
| 26 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 27 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 28 | 
         
            +
                        "acc": 0.4218464351005484,
         
     | 
| 29 | 
         
            +
                        "acc_norm": 0.5644424131627057
         
     | 
| 30 | 
         
            +
                    },
         
     | 
| 31 | 
         
            +
                    {
         
     | 
| 32 | 
         
            +
                        "name": "ironytr",
         
     | 
| 33 | 
         
            +
                        "task": "text_classification",
         
     | 
| 34 | 
         
            +
                        "acc": 0.545,
         
     | 
| 35 | 
         
            +
                        "acc_norm": 0.6466666666666666
         
     | 
| 36 | 
         
            +
                    },
         
     | 
| 37 | 
         
            +
                    {
         
     | 
| 38 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 39 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 40 | 
         
            +
                        "exact_match": 0.0424681858538029,
         
     | 
| 41 | 
         
            +
                        "f1": 0.11050423163975964
         
     | 
| 42 | 
         
            +
                    },
         
     | 
| 43 | 
         
            +
                    {
         
     | 
| 44 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 45 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 46 | 
         
            +
                        "acc": 0.3201,
         
     | 
| 47 | 
         
            +
                        "acc_norm": 0.3653
         
     | 
| 48 | 
         
            +
                    },
         
     | 
| 49 | 
         
            +
                    {
         
     | 
| 50 | 
         
            +
                        "name": "news_cat",
         
     | 
| 51 | 
         
            +
                        "task": "text_classification",
         
     | 
| 52 | 
         
            +
                        "acc": 0.628,
         
     | 
| 53 | 
         
            +
                        "acc_norm": 0.588
         
     | 
| 54 | 
         
            +
                    },
         
     | 
| 55 | 
         
            +
                    {
         
     | 
| 56 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 57 | 
         
            +
                        "task": "text_classification",
         
     | 
| 58 | 
         
            +
                        "acc": 0.3081065759637188,
         
     | 
| 59 | 
         
            +
                        "acc_norm": 0.7304421768707483
         
     | 
| 60 | 
         
            +
                    },
         
     | 
| 61 | 
         
            +
                    {
         
     | 
| 62 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 63 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 64 | 
         
            +
                        "acc": 0.603290676416819,
         
     | 
| 65 | 
         
            +
                        "acc_norm": 0.5790676416819013
         
     | 
| 66 | 
         
            +
                    },
         
     | 
| 67 | 
         
            +
                    {
         
     | 
| 68 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 69 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 70 | 
         
            +
                        "acc": 0.3283,
         
     | 
| 71 | 
         
            +
                        "acc_norm": 0.353
         
     | 
| 72 | 
         
            +
                    },
         
     | 
| 73 | 
         
            +
                    {
         
     | 
| 74 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 75 | 
         
            +
                        "task": "text_classification",
         
     | 
| 76 | 
         
            +
                        "acc": 0.14213197969543148,
         
     | 
| 77 | 
         
            +
                        "acc_norm": 0.21537345902828137
         
     | 
| 78 | 
         
            +
                    },
         
     | 
| 79 | 
         
            +
                    {
         
     | 
| 80 | 
         
            +
                        "name": "tquad",
         
     | 
| 81 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 82 | 
         
            +
                        "exact_match": 0.1289237668161435,
         
     | 
| 83 | 
         
            +
                        "f1": 0.4134057883004977
         
     | 
| 84 | 
         
            +
                    },
         
     | 
| 85 | 
         
            +
                    {
         
     | 
| 86 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 87 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 88 | 
         
            +
                        "acc": 0.38829151732377537,
         
     | 
| 89 | 
         
            +
                        "acc_norm": 0.43130227001194743
         
     | 
| 90 | 
         
            +
                    },
         
     | 
| 91 | 
         
            +
                    {
         
     | 
| 92 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 93 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 94 | 
         
            +
                        "acc": 0.4549618320610687,
         
     | 
| 95 | 
         
            +
                        "acc_norm": 0.517557251908397
         
     | 
| 96 | 
         
            +
                    },
         
     | 
| 97 | 
         
            +
                    {
         
     | 
| 98 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 99 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 100 | 
         
            +
                        "acc": 0.3137254901960784,
         
     | 
| 101 | 
         
            +
                        "acc_norm": 0.44281045751633985
         
     | 
| 102 | 
         
            +
                    },
         
     | 
| 103 | 
         
            +
                    {
         
     | 
| 104 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 105 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 106 | 
         
            +
                        "acc": 0.6160626836434868,
         
     | 
| 107 | 
         
            +
                        "acc_norm": 0.6160626836434868
         
     | 
| 108 | 
         
            +
                    },
         
     | 
| 109 | 
         
            +
                    {
         
     | 
| 110 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 111 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 112 | 
         
            +
                        "acc": 0.586,
         
     | 
| 113 | 
         
            +
                        "acc_norm": 0.586
         
     | 
| 114 | 
         
            +
                    },
         
     | 
| 115 | 
         
            +
                    {
         
     | 
| 116 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 117 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 118 | 
         
            +
                        "acc": 0.4389558232931727,
         
     | 
| 119 | 
         
            +
                        "acc_norm": 0.4389558232931727
         
     | 
| 120 | 
         
            +
                    },
         
     | 
| 121 | 
         
            +
                    {
         
     | 
| 122 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 123 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 124 | 
         
            +
                        "exact_match": 0.09747899159663864,
         
     | 
| 125 | 
         
            +
                        "f1": 0.24450355256139333
         
     | 
| 126 | 
         
            +
                    },
         
     | 
| 127 | 
         
            +
                    {
         
     | 
| 128 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 129 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 130 | 
         
            +
                        "exact_match": 0.005007463045885695
         
     | 
| 131 | 
         
            +
                    },
         
     | 
| 132 | 
         
            +
                    {
         
     | 
| 133 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 134 | 
         
            +
                        "task": "summarization",
         
     | 
| 135 | 
         
            +
                        "rouge1": 0.40612528796779146,
         
     | 
| 136 | 
         
            +
                        "rouge2": 0.25769550481564407,
         
     | 
| 137 | 
         
            +
                        "rougeL": 0.3281187592669974
         
     | 
| 138 | 
         
            +
                    },
         
     | 
| 139 | 
         
            +
                    {
         
     | 
| 140 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 141 | 
         
            +
                        "task": "summarization",
         
     | 
| 142 | 
         
            +
                        "rouge1": 0.23621778991663983,
         
     | 
| 143 | 
         
            +
                        "rouge2": 0.08052321922363763,
         
     | 
| 144 | 
         
            +
                        "rougeL": 0.1710165526266978
         
     | 
| 145 | 
         
            +
                    },
         
     | 
| 146 | 
         
            +
                    {
         
     | 
| 147 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 148 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 149 | 
         
            +
                        "wer": 0.823814082821166,
         
     | 
| 150 | 
         
            +
                        "bleu": 0.13572050882587958
         
     | 
| 151 | 
         
            +
                    },
         
     | 
| 152 | 
         
            +
                    {
         
     | 
| 153 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 154 | 
         
            +
                        "task": "summarization",
         
     | 
| 155 | 
         
            +
                        "rouge1": 0.29619456321037296,
         
     | 
| 156 | 
         
            +
                        "rouge2": 0.13520487191226377,
         
     | 
| 157 | 
         
            +
                        "rougeL": 0.220446635816053
         
     | 
| 158 | 
         
            +
                    }
         
     | 
| 159 | 
         
            +
                ]
         
     | 
| 160 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/llama-3-8b.json
    ADDED
    
    | 
         @@ -0,0 +1,159 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "model": "meta-llama/Meta-Llama-3-8B",
         
     | 
| 4 | 
         
            +
                    "api": "hf",
         
     | 
| 5 | 
         
            +
                    "architecture": "LlamaForCausalLM",
         
     | 
| 6 | 
         
            +
                    "max_length": 8192,
         
     | 
| 7 | 
         
            +
                    "type": "pretrained",
         
     | 
| 8 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "8b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 14 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 15 | 
         
            +
                        "acc": 0.5144,
         
     | 
| 16 | 
         
            +
                        "acc_norm": 0.5144
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.3028,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.3537
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.37614259597806216,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.38391224862888484
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "ironytr",
         
     | 
| 32 | 
         
            +
                        "task": "text_classification",
         
     | 
| 33 | 
         
            +
                        "acc": 0.515,
         
     | 
| 34 | 
         
            +
                        "acc_norm": 0.525
         
     | 
| 35 | 
         
            +
                    },
         
     | 
| 36 | 
         
            +
                    {
         
     | 
| 37 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 38 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 39 | 
         
            +
                        "exact_match": 0.13465522343888725,
         
     | 
| 40 | 
         
            +
                        "f1": 0.19144550324599957
         
     | 
| 41 | 
         
            +
                    },
         
     | 
| 42 | 
         
            +
                    {
         
     | 
| 43 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 44 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 45 | 
         
            +
                        "acc": 0.3206,
         
     | 
| 46 | 
         
            +
                        "acc_norm": 0.3329
         
     | 
| 47 | 
         
            +
                    },
         
     | 
| 48 | 
         
            +
                    {
         
     | 
| 49 | 
         
            +
                        "name": "news_cat",
         
     | 
| 50 | 
         
            +
                        "task": "text_classification",
         
     | 
| 51 | 
         
            +
                        "acc": 0.724,
         
     | 
| 52 | 
         
            +
                        "acc_norm": 0.656
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 56 | 
         
            +
                        "task": "text_classification",
         
     | 
| 57 | 
         
            +
                        "acc": 0.2193877551020408,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.48214285714285715
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 62 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 63 | 
         
            +
                        "acc": 0.42550274223034734,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.5173674588665448
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 68 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 69 | 
         
            +
                        "acc": 0.325,
         
     | 
| 70 | 
         
            +
                        "acc_norm": 0.3766
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 74 | 
         
            +
                        "task": "text_classification",
         
     | 
| 75 | 
         
            +
                        "acc": 0.16388687454677303,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.19216823785351705
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "tquad",
         
     | 
| 80 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 81 | 
         
            +
                        "exact_match": 0.28475336322869954,
         
     | 
| 82 | 
         
            +
                        "f1": 0.5013148868557868
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 86 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 87 | 
         
            +
                        "acc": 0.38948626045400236,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.4169653524492234
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 92 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 93 | 
         
            +
                        "acc": 0.4488549618320611,
         
     | 
| 94 | 
         
            +
                        "acc_norm": 0.5328244274809161
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 98 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 99 | 
         
            +
                        "acc": 0.32189542483660133,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.47058823529411764
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 104 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 105 | 
         
            +
                        "acc": 0.6278158667972575,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.6278158667972575
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 110 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 111 | 
         
            +
                        "acc": 0.618,
         
     | 
| 112 | 
         
            +
                        "acc_norm": 0.618
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 116 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 117 | 
         
            +
                        "acc": 0.4839357429718876,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.4839357429718876
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 122 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 123 | 
         
            +
                        "exact_match": 0.20840336134453782,
         
     | 
| 124 | 
         
            +
                        "f1": 0.33796418555415153
         
     | 
| 125 | 
         
            +
                    },
         
     | 
| 126 | 
         
            +
                    {
         
     | 
| 127 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 128 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 129 | 
         
            +
                        "exact_match": 0.006692666955558766
         
     | 
| 130 | 
         
            +
                    },
         
     | 
| 131 | 
         
            +
                    {
         
     | 
| 132 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 133 | 
         
            +
                        "task": "summarization",
         
     | 
| 134 | 
         
            +
                        "rouge1": 0.38446881575055203,
         
     | 
| 135 | 
         
            +
                        "rouge2": 0.2503978598237102,
         
     | 
| 136 | 
         
            +
                        "rougeL": 0.319713589198042
         
     | 
| 137 | 
         
            +
                    },
         
     | 
| 138 | 
         
            +
                    {
         
     | 
| 139 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 140 | 
         
            +
                        "task": "summarization",
         
     | 
| 141 | 
         
            +
                        "rouge1": 0.2069234464456151,
         
     | 
| 142 | 
         
            +
                        "rouge2": 0.06576422586110373,
         
     | 
| 143 | 
         
            +
                        "rougeL": 0.1516869929958613
         
     | 
| 144 | 
         
            +
                    },
         
     | 
| 145 | 
         
            +
                    {
         
     | 
| 146 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 147 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 148 | 
         
            +
                        "wer": 0.9262281724087097,
         
     | 
| 149 | 
         
            +
                        "bleu": 0.113320746345327
         
     | 
| 150 | 
         
            +
                    },
         
     | 
| 151 | 
         
            +
                    {
         
     | 
| 152 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 153 | 
         
            +
                        "task": "summarization",
         
     | 
| 154 | 
         
            +
                        "rouge1": 0.2615001361521869,
         
     | 
| 155 | 
         
            +
                        "rouge2": 0.11093149007661907,
         
     | 
| 156 | 
         
            +
                        "rougeL": 0.20321693263972507
         
     | 
| 157 | 
         
            +
                    }
         
     | 
| 158 | 
         
            +
                ]
         
     | 
| 159 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/llama-3.1-8b-instruct.json
    ADDED
    
    | 
         @@ -0,0 +1,159 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
         
     | 
| 4 | 
         
            +
                    "api": "hf",
         
     | 
| 5 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 6 | 
         
            +
                    "max_length": 131072,
         
     | 
| 7 | 
         
            +
                    "architecture": "LlamaForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "8b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 14 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 15 | 
         
            +
                        "acc": 0.7077777777777777,
         
     | 
| 16 | 
         
            +
                        "acc_norm": 0.7077777777777777
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.3231552162849873,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.35877862595419846
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.37614259597806216,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.37614259597806216
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "ironytr",
         
     | 
| 32 | 
         
            +
                        "task": "text_classification",
         
     | 
| 33 | 
         
            +
                        "acc": 0.5133333333333333,
         
     | 
| 34 | 
         
            +
                        "acc_norm": 0.5666666666666667
         
     | 
| 35 | 
         
            +
                    },
         
     | 
| 36 | 
         
            +
                    {
         
     | 
| 37 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 38 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 39 | 
         
            +
                        "exact_match": 0.09115122817401598,
         
     | 
| 40 | 
         
            +
                        "f1": 0.15627870028803578
         
     | 
| 41 | 
         
            +
                    },
         
     | 
| 42 | 
         
            +
                    {
         
     | 
| 43 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 44 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 45 | 
         
            +
                        "acc": 0.3209,
         
     | 
| 46 | 
         
            +
                        "acc_norm": 0.3596
         
     | 
| 47 | 
         
            +
                    },
         
     | 
| 48 | 
         
            +
                    {
         
     | 
| 49 | 
         
            +
                        "name": "news_cat",
         
     | 
| 50 | 
         
            +
                        "task": "text_classification",
         
     | 
| 51 | 
         
            +
                        "acc": 0.66,
         
     | 
| 52 | 
         
            +
                        "acc_norm": 0.604
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 56 | 
         
            +
                        "task": "text_classification",
         
     | 
| 57 | 
         
            +
                        "acc": 0.23582766439909297,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.3687641723356009
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 62 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 63 | 
         
            +
                        "acc": 0.4648080438756856,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.5648994515539305
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 68 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 69 | 
         
            +
                        "acc": 0.3028,
         
     | 
| 70 | 
         
            +
                        "acc_norm": 0.3528
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 74 | 
         
            +
                        "task": "text_classification",
         
     | 
| 75 | 
         
            +
                        "acc": 0.19579405366207397,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.1551849166062364
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "tquad",
         
     | 
| 80 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 81 | 
         
            +
                        "exact_match": 0.23318385650224216,
         
     | 
| 82 | 
         
            +
                        "f1": 0.5062272078338648
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 86 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 87 | 
         
            +
                        "acc": 0.40860215053763443,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.45997610513739545
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 92 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 93 | 
         
            +
                        "acc": 0.4442748091603053,
         
     | 
| 94 | 
         
            +
                        "acc_norm": 0.5419847328244275
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 98 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 99 | 
         
            +
                        "acc": 0.33169934640522875,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.4624183006535948
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 104 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 105 | 
         
            +
                        "acc": 0.633692458374143,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.633692458374143
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 110 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 111 | 
         
            +
                        "acc": 0.608,
         
     | 
| 112 | 
         
            +
                        "acc_norm": 0.608
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 116 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 117 | 
         
            +
                        "acc": 0.4807228915662651,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.4807228915662651
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 122 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 123 | 
         
            +
                        "exact_match": 0.21428571428571427,
         
     | 
| 124 | 
         
            +
                        "f1": 0.4170277103753468
         
     | 
| 125 | 
         
            +
                    },
         
     | 
| 126 | 
         
            +
                    {
         
     | 
| 127 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 128 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 129 | 
         
            +
                        "exact_match": 0.005007463045885695
         
     | 
| 130 | 
         
            +
                    },
         
     | 
| 131 | 
         
            +
                    {
         
     | 
| 132 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 133 | 
         
            +
                        "task": "summarization",
         
     | 
| 134 | 
         
            +
                        "rouge1": 0.40612528796779146,
         
     | 
| 135 | 
         
            +
                        "rouge2": 0.25769550481564407,
         
     | 
| 136 | 
         
            +
                        "rougeL": 0.3281187592669974
         
     | 
| 137 | 
         
            +
                    },
         
     | 
| 138 | 
         
            +
                    {
         
     | 
| 139 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 140 | 
         
            +
                        "task": "summarization",
         
     | 
| 141 | 
         
            +
                        "rouge1": 0.23621778991663983,
         
     | 
| 142 | 
         
            +
                        "rouge2": 0.08052321922363763,
         
     | 
| 143 | 
         
            +
                        "rougeL": 0.1710165526266978
         
     | 
| 144 | 
         
            +
                    },
         
     | 
| 145 | 
         
            +
                    {
         
     | 
| 146 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 147 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 148 | 
         
            +
                        "wer": 0.823814082821166,
         
     | 
| 149 | 
         
            +
                        "bleu": 0.13572050882587958
         
     | 
| 150 | 
         
            +
                    },
         
     | 
| 151 | 
         
            +
                    {
         
     | 
| 152 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 153 | 
         
            +
                        "task": "summarization",
         
     | 
| 154 | 
         
            +
                        "rouge1": 0.29619456321037296,
         
     | 
| 155 | 
         
            +
                        "rouge2": 0.13520487191226377,
         
     | 
| 156 | 
         
            +
                        "rougeL": 0.220446635816053
         
     | 
| 157 | 
         
            +
                    }
         
     | 
| 158 | 
         
            +
                ]
         
     | 
| 159 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/llama-3.1-8b.json
    ADDED
    
    | 
         @@ -0,0 +1,127 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "model": "meta-llama/Meta-Llama-3.1-8B",
         
     | 
| 4 | 
         
            +
                    "api": "hf",
         
     | 
| 5 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 6 | 
         
            +
                    "max_length": 131072,
         
     | 
| 7 | 
         
            +
                    "architecture": "LlamaForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "pretrained",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "8b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 14 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 15 | 
         
            +
                        "acc": 0.6144,
         
     | 
| 16 | 
         
            +
                        "acc_norm": 0.6144
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.3130,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.3537
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.37614259597806216,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.37751371115173676
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "ironytr",
         
     | 
| 32 | 
         
            +
                        "task": "text_classification",
         
     | 
| 33 | 
         
            +
                        "acc": 0.585,
         
     | 
| 34 | 
         
            +
                        "acc_norm": 0.5183333333333333
         
     | 
| 35 | 
         
            +
                    },
         
     | 
| 36 | 
         
            +
                    {
         
     | 
| 37 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 38 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 39 | 
         
            +
                        "exact_match": 0.09248298313110388,
         
     | 
| 40 | 
         
            +
                        "f1": 0.15127108197296948
         
     | 
| 41 | 
         
            +
                    },
         
     | 
| 42 | 
         
            +
                    {
         
     | 
| 43 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 44 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 45 | 
         
            +
                        "acc": 0.3495,
         
     | 
| 46 | 
         
            +
                        "acc_norm": 0.3481
         
     | 
| 47 | 
         
            +
                    },
         
     | 
| 48 | 
         
            +
                    {
         
     | 
| 49 | 
         
            +
                        "name": "news_cat",
         
     | 
| 50 | 
         
            +
                        "task": "text_classification",
         
     | 
| 51 | 
         
            +
                        "acc": 0.692,
         
     | 
| 52 | 
         
            +
                        "acc_norm": 0.588
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 56 | 
         
            +
                        "task": "text_classification",
         
     | 
| 57 | 
         
            +
                        "acc": 0.3463718820861678,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.7636054421768708
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 62 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 63 | 
         
            +
                        "acc": 0.4227605118829982,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.506398537477148
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 68 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 69 | 
         
            +
                        "acc": 0.3169,
         
     | 
| 70 | 
         
            +
                        "acc_norm": 0.3379
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 74 | 
         
            +
                        "task": "text_classification",
         
     | 
| 75 | 
         
            +
                        "acc": 0.17041334300217548,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.2001450326323423
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "tquad",
         
     | 
| 80 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 81 | 
         
            +
                        "exact_match": 0.2757847533632287,
         
     | 
| 82 | 
         
            +
                        "f1": 0.5178366277473359
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 86 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 87 | 
         
            +
                        "acc": 0.4145758661887694,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.4324970131421744
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 92 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 93 | 
         
            +
                        "acc": 0.4488549618320611,
         
     | 
| 94 | 
         
            +
                        "acc_norm": 0.5358778625954198
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 98 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 99 | 
         
            +
                        "acc": 0.3382352941176471,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.4738562091503268
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 104 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 105 | 
         
            +
                        "acc": 0.6425073457394711,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.6425073457394711
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 110 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 111 | 
         
            +
                        "acc": 0.626,
         
     | 
| 112 | 
         
            +
                        "acc_norm": 0.626
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 116 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 117 | 
         
            +
                        "acc": 0.4947791164658635,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.4947791164658635
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 122 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 123 | 
         
            +
                        "exact_match": 0.2092436974789916,
         
     | 
| 124 | 
         
            +
                        "f1": 0.35674599908781446
         
     | 
| 125 | 
         
            +
                    }
         
     | 
| 126 | 
         
            +
                ]
         
     | 
| 127 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/llama-3.2-1b.json
    ADDED
    
    | 
         @@ -0,0 +1,191 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "model": "meta-llama/Llama-3.2-1B",
         
     | 
| 4 | 
         
            +
                    "api": "hf",
         
     | 
| 5 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 6 | 
         
            +
                    "max_length": 131072,
         
     | 
| 7 | 
         
            +
                    "architecture": "LlamaForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "pretrained",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "1b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 14 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 15 | 
         
            +
                        "acc": 0.29555555555555557,
         
     | 
| 16 | 
         
            +
                        "acc_norm": 0.29555555555555557
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.28498727735368956,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.3053435114503817
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.3880255941499086,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.623400365630713
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 32 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 33 | 
         
            +
                        "exact_match": 0.00741489720256151
         
     | 
| 34 | 
         
            +
                    },
         
     | 
| 35 | 
         
            +
                    {
         
     | 
| 36 | 
         
            +
                        "name": "ironytr",
         
     | 
| 37 | 
         
            +
                        "task": "text_classification",
         
     | 
| 38 | 
         
            +
                        "acc": 0.5283333333333333,
         
     | 
| 39 | 
         
            +
                        "acc_norm": 0.5033333333333333
         
     | 
| 40 | 
         
            +
                    },
         
     | 
| 41 | 
         
            +
                    {
         
     | 
| 42 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 43 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 44 | 
         
            +
                        "exact_match": 0.007694584196507843,
         
     | 
| 45 | 
         
            +
                        "f1": 0.03304091036050505
         
     | 
| 46 | 
         
            +
                    },
         
     | 
| 47 | 
         
            +
                    {
         
     | 
| 48 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 49 | 
         
            +
                        "task": "summarization",
         
     | 
| 50 | 
         
            +
                        "rouge1": 0.23283491254211872,
         
     | 
| 51 | 
         
            +
                        "rouge2": 0.13426790568610214,
         
     | 
| 52 | 
         
            +
                        "rougeL": 0.18915548037371513
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 56 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 57 | 
         
            +
                        "acc": 0.3232,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.334
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "news_cat",
         
     | 
| 62 | 
         
            +
                        "task": "text_classification",
         
     | 
| 63 | 
         
            +
                        "acc": 0.58,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.532
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 68 | 
         
            +
                        "task": "text_classification",
         
     | 
| 69 | 
         
            +
                        "acc": 0.4671201814058957,
         
     | 
| 70 | 
         
            +
                        "acc_norm": 0.7820294784580499
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 74 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 75 | 
         
            +
                        "acc": 0.56672760511883,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.5781535648994516
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 80 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 81 | 
         
            +
                        "acc": 0.3239,
         
     | 
| 82 | 
         
            +
                        "acc_norm": 0.3105
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 86 | 
         
            +
                        "task": "text_classification",
         
     | 
| 87 | 
         
            +
                        "acc": 0.17113850616388687,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.22552574329224076
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "tquad",
         
     | 
| 92 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 93 | 
         
            +
                        "exact_match": 0.06278026905829596,
         
     | 
| 94 | 
         
            +
                        "f1": 0.21486130318406463
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 98 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 99 | 
         
            +
                        "acc": 0.35842293906810035,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.4026284348864994
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 104 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 105 | 
         
            +
                        "acc": 0.3709923664122137,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.467175572519084
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 110 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 111 | 
         
            +
                        "acc": 0.27941176470588236,
         
     | 
| 112 | 
         
            +
                        "acc_norm": 0.41830065359477125
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 116 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 117 | 
         
            +
                        "acc": 0.5759059745347699,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.5759059745347699
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 122 | 
         
            +
                        "task": "summarization",
         
     | 
| 123 | 
         
            +
                        "rouge1": 0.10861529436199803,
         
     | 
| 124 | 
         
            +
                        "rouge2": 0.034862923521078545,
         
     | 
| 125 | 
         
            +
                        "rougeL": 0.08692160533533941
         
     | 
| 126 | 
         
            +
                    },
         
     | 
| 127 | 
         
            +
                    {
         
     | 
| 128 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 129 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 130 | 
         
            +
                        "wer": 3.910683208136067,
         
     | 
| 131 | 
         
            +
                        "bleu": 0.012043288243775466
         
     | 
| 132 | 
         
            +
                    },
         
     | 
| 133 | 
         
            +
                    {
         
     | 
| 134 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 135 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 136 | 
         
            +
                        "acc": 0.556,
         
     | 
| 137 | 
         
            +
                        "acc_norm": 0.556
         
     | 
| 138 | 
         
            +
                    },
         
     | 
| 139 | 
         
            +
                    {
         
     | 
| 140 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 141 | 
         
            +
                        "task": "summarization",
         
     | 
| 142 | 
         
            +
                        "rouge1": 0.16924699150407269,
         
     | 
| 143 | 
         
            +
                        "rouge2": 0.07190935921365724,
         
     | 
| 144 | 
         
            +
                        "rougeL": 0.13255123335488528
         
     | 
| 145 | 
         
            +
                    },
         
     | 
| 146 | 
         
            +
                    {
         
     | 
| 147 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 148 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 149 | 
         
            +
                        "acc": 0.4389558232931727,
         
     | 
| 150 | 
         
            +
                        "acc_norm": 0.4389558232931727
         
     | 
| 151 | 
         
            +
                    },
         
     | 
| 152 | 
         
            +
                    {
         
     | 
| 153 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 154 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 155 | 
         
            +
                        "exact_match": 0.04873949579831932,
         
     | 
| 156 | 
         
            +
                        "f1": 0.11156636293859905
         
     | 
| 157 | 
         
            +
                    },
         
     | 
| 158 | 
         
            +
                    {
         
     | 
| 159 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 160 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 161 | 
         
            +
                        "exact_match": 0.0073185998362944775
         
     | 
| 162 | 
         
            +
                    },
         
     | 
| 163 | 
         
            +
                    {
         
     | 
| 164 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 165 | 
         
            +
                        "task": "summarization",
         
     | 
| 166 | 
         
            +
                        "rouge1": 0.35440052022111407,
         
     | 
| 167 | 
         
            +
                        "rouge2": 0.2215476501673455,
         
     | 
| 168 | 
         
            +
                        "rougeL": 0.2911311598176804
         
     | 
| 169 | 
         
            +
                    },
         
     | 
| 170 | 
         
            +
                    {
         
     | 
| 171 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 172 | 
         
            +
                        "task": "summarization",
         
     | 
| 173 | 
         
            +
                        "rouge1": 0.18510384577665046,
         
     | 
| 174 | 
         
            +
                        "rouge2": 0.056181066004903614,
         
     | 
| 175 | 
         
            +
                        "rougeL": 0.1392211003290612
         
     | 
| 176 | 
         
            +
                    },
         
     | 
| 177 | 
         
            +
                    {
         
     | 
| 178 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 179 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 180 | 
         
            +
                        "wer": 1.311990023748812,
         
     | 
| 181 | 
         
            +
                        "bleu": 0.02624044942774961
         
     | 
| 182 | 
         
            +
                    },
         
     | 
| 183 | 
         
            +
                    {
         
     | 
| 184 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 185 | 
         
            +
                        "task": "summarization",
         
     | 
| 186 | 
         
            +
                        "rouge1": 0.2429304790539497,
         
     | 
| 187 | 
         
            +
                        "rouge2": 0.09668008744707143,
         
     | 
| 188 | 
         
            +
                        "rougeL": 0.18327092913535944
         
     | 
| 189 | 
         
            +
                    }
         
     | 
| 190 | 
         
            +
                ]
         
     | 
| 191 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/llama-3.2-3b-instruct.json
    ADDED
    
    | 
         @@ -0,0 +1,191 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "model": "meta-llama/Llama-3.2-3B-Instruct",
         
     | 
| 4 | 
         
            +
                    "api": "hf",
         
     | 
| 5 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 6 | 
         
            +
                    "max_length": 131072,
         
     | 
| 7 | 
         
            +
                    "architecture": "LlamaForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "3b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 14 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 15 | 
         
            +
                        "acc": 0.5577777777777778,
         
     | 
| 16 | 
         
            +
                        "acc_norm": 0.5577777777777778
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.26208651399491095,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.3053435114503817
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.37614259597806216,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.3807129798903108
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 32 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 33 | 
         
            +
                        "exact_match": 0.007222302470027445
         
     | 
| 34 | 
         
            +
                    },
         
     | 
| 35 | 
         
            +
                    {
         
     | 
| 36 | 
         
            +
                        "name": "ironytr",
         
     | 
| 37 | 
         
            +
                        "task": "text_classification",
         
     | 
| 38 | 
         
            +
                        "acc": 0.5016666666666667,
         
     | 
| 39 | 
         
            +
                        "acc_norm": 0.5083333333333333
         
     | 
| 40 | 
         
            +
                    },
         
     | 
| 41 | 
         
            +
                    {
         
     | 
| 42 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 43 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 44 | 
         
            +
                        "exact_match": 0.04675939627108612,
         
     | 
| 45 | 
         
            +
                        "f1": 0.08114473798410345
         
     | 
| 46 | 
         
            +
                    },
         
     | 
| 47 | 
         
            +
                    {
         
     | 
| 48 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 49 | 
         
            +
                        "task": "summarization",
         
     | 
| 50 | 
         
            +
                        "rouge1": 0.2669056212126977,
         
     | 
| 51 | 
         
            +
                        "rouge2": 0.1480446780314802,
         
     | 
| 52 | 
         
            +
                        "rougeL": 0.2106440565987865
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 56 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 57 | 
         
            +
                        "acc": 0.32,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.3141
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "news_cat",
         
     | 
| 62 | 
         
            +
                        "task": "text_classification",
         
     | 
| 63 | 
         
            +
                        "acc": 0.64,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.552
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 68 | 
         
            +
                        "task": "text_classification",
         
     | 
| 69 | 
         
            +
                        "acc": 0.20634920634920634,
         
     | 
| 70 | 
         
            +
                        "acc_norm": 0.35600907029478457
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 74 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 75 | 
         
            +
                        "acc": 0.4227605118829982,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.42413162705667273
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 80 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 81 | 
         
            +
                        "acc": 0.319,
         
     | 
| 82 | 
         
            +
                        "acc_norm": 0.2923
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 86 | 
         
            +
                        "task": "text_classification",
         
     | 
| 87 | 
         
            +
                        "acc": 0.12907904278462654,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.16896301667875271
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "tquad",
         
     | 
| 92 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 93 | 
         
            +
                        "exact_match": 0.18721973094170405,
         
     | 
| 94 | 
         
            +
                        "f1": 0.5109898180473623
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 98 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 99 | 
         
            +
                        "acc": 0.3321385902031063,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.3548387096774194
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 104 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 105 | 
         
            +
                        "acc": 0.3648854961832061,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.4488549618320611
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 110 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 111 | 
         
            +
                        "acc": 0.24183006535947713,
         
     | 
| 112 | 
         
            +
                        "acc_norm": 0.3758169934640523
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 116 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 117 | 
         
            +
                        "acc": 0.5710088148873653,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.5710088148873653
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 122 | 
         
            +
                        "task": "summarization",
         
     | 
| 123 | 
         
            +
                        "rouge1": 0.1342879173103036,
         
     | 
| 124 | 
         
            +
                        "rouge2": 0.041489300068460175,
         
     | 
| 125 | 
         
            +
                        "rougeL": 0.10482785510181569
         
     | 
| 126 | 
         
            +
                    },
         
     | 
| 127 | 
         
            +
                    {
         
     | 
| 128 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 129 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 130 | 
         
            +
                        "wer": 1.7706536060519733,
         
     | 
| 131 | 
         
            +
                        "bleu": 0.048843165627950165
         
     | 
| 132 | 
         
            +
                    },
         
     | 
| 133 | 
         
            +
                    {
         
     | 
| 134 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 135 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 136 | 
         
            +
                        "acc": 0.546,
         
     | 
| 137 | 
         
            +
                        "acc_norm": 0.546
         
     | 
| 138 | 
         
            +
                    },
         
     | 
| 139 | 
         
            +
                    {
         
     | 
| 140 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 141 | 
         
            +
                        "task": "summarization",
         
     | 
| 142 | 
         
            +
                        "rouge1": 0.17224405229987672,
         
     | 
| 143 | 
         
            +
                        "rouge2": 0.06736413357191079,
         
     | 
| 144 | 
         
            +
                        "rougeL": 0.12750762702828333
         
     | 
| 145 | 
         
            +
                    },
         
     | 
| 146 | 
         
            +
                    {
         
     | 
| 147 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 148 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 149 | 
         
            +
                        "acc": 0.42811244979919677,
         
     | 
| 150 | 
         
            +
                        "acc_norm": 0.42811244979919677
         
     | 
| 151 | 
         
            +
                    },
         
     | 
| 152 | 
         
            +
                    {
         
     | 
| 153 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 154 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 155 | 
         
            +
                        "exact_match": 0.23025210084033615,
         
     | 
| 156 | 
         
            +
                        "f1": 0.4335914561273987
         
     | 
| 157 | 
         
            +
                    },
         
     | 
| 158 | 
         
            +
                    {
         
     | 
| 159 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 160 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 161 | 
         
            +
                        "exact_match": 0.009726033992970293
         
     | 
| 162 | 
         
            +
                    },
         
     | 
| 163 | 
         
            +
                    {
         
     | 
| 164 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 165 | 
         
            +
                        "task": "summarization",
         
     | 
| 166 | 
         
            +
                        "rouge1": 0.36482642805140486,
         
     | 
| 167 | 
         
            +
                        "rouge2": 0.2215366481025873,
         
     | 
| 168 | 
         
            +
                        "rougeL": 0.2964001074060548
         
     | 
| 169 | 
         
            +
                    },
         
     | 
| 170 | 
         
            +
                    {
         
     | 
| 171 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 172 | 
         
            +
                        "task": "summarization",
         
     | 
| 173 | 
         
            +
                        "rouge1": 0.21420020104688736,
         
     | 
| 174 | 
         
            +
                        "rouge2": 0.06939715371402275,
         
     | 
| 175 | 
         
            +
                        "rougeL": 0.1623531918550368
         
     | 
| 176 | 
         
            +
                    },
         
     | 
| 177 | 
         
            +
                    {
         
     | 
| 178 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 179 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 180 | 
         
            +
                        "wer": 0.9910280580654681,
         
     | 
| 181 | 
         
            +
                        "bleu": 0.08179536823012563
         
     | 
| 182 | 
         
            +
                    },
         
     | 
| 183 | 
         
            +
                    {
         
     | 
| 184 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 185 | 
         
            +
                        "task": "summarization",
         
     | 
| 186 | 
         
            +
                        "rouge1": 0.2616423061938248,
         
     | 
| 187 | 
         
            +
                        "rouge2": 0.11064039063859936,
         
     | 
| 188 | 
         
            +
                        "rougeL": 0.19686955120787036
         
     | 
| 189 | 
         
            +
                    }
         
     | 
| 190 | 
         
            +
                ]
         
     | 
| 191 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/mistral-7b.json
    ADDED
    
    | 
         @@ -0,0 +1,165 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 4 | 
         
            +
                    "max_length": "4096",
         
     | 
| 5 | 
         
            +
                    "model": "mistralai/Mistral-7B-v0.1",
         
     | 
| 6 | 
         
            +
                    "api": "hf",
         
     | 
| 7 | 
         
            +
                    "architecture": "MixtralForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "pretrained",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "7b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 14 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 15 | 
         
            +
                        "exact_match": 0.16722689075630254,
         
     | 
| 16 | 
         
            +
                        "f1": 0.32150094374615246
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 20 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 21 | 
         
            +
                        "acc": 0.566,
         
     | 
| 22 | 
         
            +
                        "acc_norm": 0.566
         
     | 
| 23 | 
         
            +
                    },
         
     | 
| 24 | 
         
            +
                    {
         
     | 
| 25 | 
         
            +
                        "name": "turkish_plu",
         
     | 
| 26 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 27 | 
         
            +
                        "acc": 0.45152,
         
     | 
| 28 | 
         
            +
                        "acc_norm": 0.5136
         
     | 
| 29 | 
         
            +
                    },
         
     | 
| 30 | 
         
            +
                    {
         
     | 
| 31 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 32 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 33 | 
         
            +
                        "acc": 0.42771804062126645,
         
     | 
| 34 | 
         
            +
                        "acc_norm": 0.46714456391875747
         
     | 
| 35 | 
         
            +
                    },
         
     | 
| 36 | 
         
            +
                    {
         
     | 
| 37 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 38 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 39 | 
         
            +
                        "acc": 0.39541984732824426,
         
     | 
| 40 | 
         
            +
                        "acc_norm": 0.5022900763358779
         
     | 
| 41 | 
         
            +
                    },
         
     | 
| 42 | 
         
            +
                    {
         
     | 
| 43 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 44 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 45 | 
         
            +
                        "acc": 0.29248366013071897,
         
     | 
| 46 | 
         
            +
                        "acc_norm": 0.4411764705882353
         
     | 
| 47 | 
         
            +
                    },
         
     | 
| 48 | 
         
            +
                    {
         
     | 
| 49 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 50 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 51 | 
         
            +
                        "acc": 0.6023506366307542,
         
     | 
| 52 | 
         
            +
                        "acc_norm": 0.6023506366307542
         
     | 
| 53 | 
         
            +
                    },
         
     | 
| 54 | 
         
            +
                    {
         
     | 
| 55 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 56 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 57 | 
         
            +
                        "acc": 0.37614259597806216,
         
     | 
| 58 | 
         
            +
                        "acc_norm": 0.42458866544789764
         
     | 
| 59 | 
         
            +
                    },
         
     | 
| 60 | 
         
            +
                    {
         
     | 
| 61 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 62 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 63 | 
         
            +
                        "acc": 0.4218464351005484,
         
     | 
| 64 | 
         
            +
                        "acc_norm": 0.49588665447897623
         
     | 
| 65 | 
         
            +
                    },
         
     | 
| 66 | 
         
            +
                    {
         
     | 
| 67 | 
         
            +
                        "name": "tquad",
         
     | 
| 68 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 69 | 
         
            +
                        "exact_match": 0.2096412556053812,
         
     | 
| 70 | 
         
            +
                        "f1": 0.4767364701184728
         
     | 
| 71 | 
         
            +
                    },
         
     | 
| 72 | 
         
            +
                    {
         
     | 
| 73 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 74 | 
         
            +
                        "task": "text_classification",
         
     | 
| 75 | 
         
            +
                        "acc": 0.135605511240029,
         
     | 
| 76 | 
         
            +
                        "acc_norm": 0.20522117476432197
         
     | 
| 77 | 
         
            +
                    },
         
     | 
| 78 | 
         
            +
                    {
         
     | 
| 79 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 80 | 
         
            +
                        "task": "text_classification",
         
     | 
| 81 | 
         
            +
                        "acc": 0.2046485260770975,
         
     | 
| 82 | 
         
            +
                        "acc_norm": 0.3735827664399093
         
     | 
| 83 | 
         
            +
                    },
         
     | 
| 84 | 
         
            +
                    {
         
     | 
| 85 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 86 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 87 | 
         
            +
                        "acc": 0.3194,
         
     | 
| 88 | 
         
            +
                        "acc_norm": 0.3267
         
     | 
| 89 | 
         
            +
                    },
         
     | 
| 90 | 
         
            +
                    {
         
     | 
| 91 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 92 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 93 | 
         
            +
                        "acc": 0.3196,
         
     | 
| 94 | 
         
            +
                        "acc_norm": 0.3201
         
     | 
| 95 | 
         
            +
                    },
         
     | 
| 96 | 
         
            +
                    {
         
     | 
| 97 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 98 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 99 | 
         
            +
                        "acc": 0.331936127744511,
         
     | 
| 100 | 
         
            +
                        "acc_norm": 0.34910179640718564
         
     | 
| 101 | 
         
            +
                    },
         
     | 
| 102 | 
         
            +
                    {
         
     | 
| 103 | 
         
            +
                        "name": "news_cat",
         
     | 
| 104 | 
         
            +
                        "task": "text_classification",
         
     | 
| 105 | 
         
            +
                        "acc": 0.652,
         
     | 
| 106 | 
         
            +
                        "acc_norm": 0.44
         
     | 
| 107 | 
         
            +
                    },
         
     | 
| 108 | 
         
            +
                    {
         
     | 
| 109 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 110 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 111 | 
         
            +
                        "exact_match": 0.12030186445693992,
         
     | 
| 112 | 
         
            +
                        "f1": 0.16163416207615164
         
     | 
| 113 | 
         
            +
                    },
         
     | 
| 114 | 
         
            +
                    {
         
     | 
| 115 | 
         
            +
                        "name": "ironytr",
         
     | 
| 116 | 
         
            +
                        "task": "text_classification",
         
     | 
| 117 | 
         
            +
                        "acc": 0.5016666666666667,
         
     | 
| 118 | 
         
            +
                        "acc_norm": 0.52
         
     | 
| 119 | 
         
            +
                    },
         
     | 
| 120 | 
         
            +
                    {
         
     | 
| 121 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 122 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 123 | 
         
            +
                        "acc": 0.24173027989821882,
         
     | 
| 124 | 
         
            +
                        "acc_norm": 0.30279898218829515
         
     | 
| 125 | 
         
            +
                    },
         
     | 
| 126 | 
         
            +
                    {
         
     | 
| 127 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 128 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 129 | 
         
            +
                        "acc": 0.37444444444444447,
         
     | 
| 130 | 
         
            +
                        "acc_norm": 0.37444444444444447
         
     | 
| 131 | 
         
            +
                    },
         
     | 
| 132 | 
         
            +
                    {
         
     | 
| 133 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 134 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 135 | 
         
            +
                        "exact_match": 0.20660599932591844
         
     | 
| 136 | 
         
            +
                    },
         
     | 
| 137 | 
         
            +
                    {
         
     | 
| 138 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 139 | 
         
            +
                        "task": "summarization",
         
     | 
| 140 | 
         
            +
                        "rouge1": 0.09403885616158554,
         
     | 
| 141 | 
         
            +
                        "rouge2": 0.06300721907752257,
         
     | 
| 142 | 
         
            +
                        "rougeL": 0.08169726458665999
         
     | 
| 143 | 
         
            +
                    },
         
     | 
| 144 | 
         
            +
                    {
         
     | 
| 145 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 146 | 
         
            +
                        "task": "summarization",
         
     | 
| 147 | 
         
            +
                        "rouge1": 0.1905392717787084,
         
     | 
| 148 | 
         
            +
                        "rouge2": 0.05957088325130176,
         
     | 
| 149 | 
         
            +
                        "rougeL": 0.1472985242082243
         
     | 
| 150 | 
         
            +
                    },
         
     | 
| 151 | 
         
            +
                    {
         
     | 
| 152 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 153 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 154 | 
         
            +
                        "wer": 1.0876062644712858,
         
     | 
| 155 | 
         
            +
                        "bleu": 0.04973628734419603
         
     | 
| 156 | 
         
            +
                    },
         
     | 
| 157 | 
         
            +
                    {
         
     | 
| 158 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 159 | 
         
            +
                        "task": "summarization",
         
     | 
| 160 | 
         
            +
                        "rouge1": 0.02720399421152351,
         
     | 
| 161 | 
         
            +
                        "rouge2": 0.012032606076011431,
         
     | 
| 162 | 
         
            +
                        "rougeL": 0.02311080687545987
         
     | 
| 163 | 
         
            +
                    }
         
     | 
| 164 | 
         
            +
                ]
         
     | 
| 165 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/trendyol-7b.json
    ADDED
    
    | 
         @@ -0,0 +1,172 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "dtype": "bfloat16",
         
     | 
| 4 | 
         
            +
                    "max_length": "4096",
         
     | 
| 5 | 
         
            +
                    "model": "Trendyol/Trendyol-LLM-7b-base-v1.0",
         
     | 
| 6 | 
         
            +
                    "api": "hf",
         
     | 
| 7 | 
         
            +
                    "architecture": "MixtralForCausalLM",
         
     | 
| 8 | 
         
            +
                    "type": "instruction-tuned",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "7b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 14 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 15 | 
         
            +
                        "exact_match": 0.0,
         
     | 
| 16 | 
         
            +
                        "f1": 0.15289561928390746
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 20 | 
         
            +
                        "task": "summarization",
         
     | 
| 21 | 
         
            +
                        "rouge1": 0.12128827095936726,
         
     | 
| 22 | 
         
            +
                        "rouge2": 0.05041801264157676,
         
     | 
| 23 | 
         
            +
                        "rougeL": 0.09604301857137748
         
     | 
| 24 | 
         
            +
                    },
         
     | 
| 25 | 
         
            +
                    {
         
     | 
| 26 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 27 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 28 | 
         
            +
                        "acc": 0.61,
         
     | 
| 29 | 
         
            +
                        "acc_norm": 0.61
         
     | 
| 30 | 
         
            +
                    },
         
     | 
| 31 | 
         
            +
                    {
         
     | 
| 32 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 33 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 34 | 
         
            +
                        "wer": 13.038665635458035,
         
     | 
| 35 | 
         
            +
                        "bleu": 0.010261135899096054
         
     | 
| 36 | 
         
            +
                    },
         
     | 
| 37 | 
         
            +
                    {
         
     | 
| 38 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 39 | 
         
            +
                        "task": "summarization",
         
     | 
| 40 | 
         
            +
                        "rouge1": 0.09429776166714862,
         
     | 
| 41 | 
         
            +
                        "rouge2": 0.02873358785517343,
         
     | 
| 42 | 
         
            +
                        "rougeL": 0.07767336257524773
         
     | 
| 43 | 
         
            +
                    },
         
     | 
| 44 | 
         
            +
                    {
         
     | 
| 45 | 
         
            +
                        "name": "turkish_plu",
         
     | 
| 46 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 47 | 
         
            +
                        "acc": 0.46944,
         
     | 
| 48 | 
         
            +
                        "acc_norm": 0.49952
         
     | 
| 49 | 
         
            +
                    },
         
     | 
| 50 | 
         
            +
                    {
         
     | 
| 51 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 52 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 53 | 
         
            +
                        "acc": 0.4635603345280765,
         
     | 
| 54 | 
         
            +
                        "acc_norm": 0.44683393070489846
         
     | 
| 55 | 
         
            +
                    },
         
     | 
| 56 | 
         
            +
                    {
         
     | 
| 57 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 58 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 59 | 
         
            +
                        "acc": 0.43206106870229005,
         
     | 
| 60 | 
         
            +
                        "acc_norm": 0.48854961832061067
         
     | 
| 61 | 
         
            +
                    },
         
     | 
| 62 | 
         
            +
                    {
         
     | 
| 63 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 64 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 65 | 
         
            +
                        "acc": 0.3235294117647059,
         
     | 
| 66 | 
         
            +
                        "acc_norm": 0.4395424836601307
         
     | 
| 67 | 
         
            +
                    },
         
     | 
| 68 | 
         
            +
                    {
         
     | 
| 69 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 70 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 71 | 
         
            +
                        "acc": 0.5857002938295789,
         
     | 
| 72 | 
         
            +
                        "acc_norm": 0.5857002938295789
         
     | 
| 73 | 
         
            +
                    },
         
     | 
| 74 | 
         
            +
                    {
         
     | 
| 75 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 76 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 77 | 
         
            +
                        "acc": 0.37614259597806216,
         
     | 
| 78 | 
         
            +
                        "acc_norm": 0.37614259597806216
         
     | 
| 79 | 
         
            +
                    },
         
     | 
| 80 | 
         
            +
                    {
         
     | 
| 81 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 82 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 83 | 
         
            +
                        "acc": 0.4218464351005484,
         
     | 
| 84 | 
         
            +
                        "acc_norm": 0.4218464351005484
         
     | 
| 85 | 
         
            +
                    },
         
     | 
| 86 | 
         
            +
                    {
         
     | 
| 87 | 
         
            +
                        "name": "tr-wikihow-summ",
         
     | 
| 88 | 
         
            +
                        "task": "summarization",
         
     | 
| 89 | 
         
            +
                        "rouge1": 0.1602888221320987,
         
     | 
| 90 | 
         
            +
                        "rouge2": 0.04616347811027626,
         
     | 
| 91 | 
         
            +
                        "rougeL": 0.12482407983918105
         
     | 
| 92 | 
         
            +
                    },
         
     | 
| 93 | 
         
            +
                    {
         
     | 
| 94 | 
         
            +
                        "name": "tquad",
         
     | 
| 95 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 96 | 
         
            +
                        "exact_match": 0.007847533632286996,
         
     | 
| 97 | 
         
            +
                        "f1": 0.26089513093937805
         
     | 
| 98 | 
         
            +
                    },
         
     | 
| 99 | 
         
            +
                    {
         
     | 
| 100 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 101 | 
         
            +
                        "task": "text_classification",
         
     | 
| 102 | 
         
            +
                        "acc": 0.1551849166062364,
         
     | 
| 103 | 
         
            +
                        "acc_norm": 0.22697606961566352
         
     | 
| 104 | 
         
            +
                    },
         
     | 
| 105 | 
         
            +
                    {
         
     | 
| 106 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 107 | 
         
            +
                        "task": "text_classification",
         
     | 
| 108 | 
         
            +
                        "acc": 0.20294784580498867,
         
     | 
| 109 | 
         
            +
                        "acc_norm": 0.20294784580498867
         
     | 
| 110 | 
         
            +
                    },
         
     | 
| 111 | 
         
            +
                    {
         
     | 
| 112 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 113 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 114 | 
         
            +
                        "acc": 0.3134,
         
     | 
| 115 | 
         
            +
                        "acc_norm": 0.2942
         
     | 
| 116 | 
         
            +
                    },
         
     | 
| 117 | 
         
            +
                    {
         
     | 
| 118 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 119 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 120 | 
         
            +
                        "acc": 0.3204,
         
     | 
| 121 | 
         
            +
                        "acc_norm": 0.2894
         
     | 
| 122 | 
         
            +
                    },
         
     | 
| 123 | 
         
            +
                    {
         
     | 
| 124 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 125 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 126 | 
         
            +
                        "acc": 0.32974051896207585,
         
     | 
| 127 | 
         
            +
                        "acc_norm": 0.300998003992016
         
     | 
| 128 | 
         
            +
                    },
         
     | 
| 129 | 
         
            +
                    {
         
     | 
| 130 | 
         
            +
                        "name": "news_cat",
         
     | 
| 131 | 
         
            +
                        "task": "text_classification",
         
     | 
| 132 | 
         
            +
                        "acc": 0.812,
         
     | 
| 133 | 
         
            +
                        "acc_norm": 0.628
         
     | 
| 134 | 
         
            +
                    },
         
     | 
| 135 | 
         
            +
                    {
         
     | 
| 136 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 137 | 
         
            +
                        "task": "summarization",
         
     | 
| 138 | 
         
            +
                        "rouge1": 0.15450187559493767,
         
     | 
| 139 | 
         
            +
                        "rouge2": 0.08797823051939649,
         
     | 
| 140 | 
         
            +
                        "rougeL": 0.1350441813405041
         
     | 
| 141 | 
         
            +
                    },
         
     | 
| 142 | 
         
            +
                    {
         
     | 
| 143 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 144 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 145 | 
         
            +
                        "exact_match": 0.001479727730097662,
         
     | 
| 146 | 
         
            +
                        "f1": 0.037161672000373895
         
     | 
| 147 | 
         
            +
                    },
         
     | 
| 148 | 
         
            +
                    {
         
     | 
| 149 | 
         
            +
                        "name": "ironytr",
         
     | 
| 150 | 
         
            +
                        "task": "text_classification",
         
     | 
| 151 | 
         
            +
                        "acc": 0.5,
         
     | 
| 152 | 
         
            +
                        "acc_norm": 0.5
         
     | 
| 153 | 
         
            +
                    },
         
     | 
| 154 | 
         
            +
                    {
         
     | 
| 155 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 156 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 157 | 
         
            +
                        "exact_match": 0.00048148683133516297
         
     | 
| 158 | 
         
            +
                    },
         
     | 
| 159 | 
         
            +
                    {
         
     | 
| 160 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 161 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 162 | 
         
            +
                        "acc": 0.28498727735368956,
         
     | 
| 163 | 
         
            +
                        "acc_norm": 0.3486005089058524
         
     | 
| 164 | 
         
            +
                    },
         
     | 
| 165 | 
         
            +
                    {
         
     | 
| 166 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 167 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 168 | 
         
            +
                        "acc": 0.3622222222222222,
         
     | 
| 169 | 
         
            +
                        "acc_norm": 0.3622222222222222
         
     | 
| 170 | 
         
            +
                    }
         
     | 
| 171 | 
         
            +
                ]
         
     | 
| 172 | 
         
            +
            }
         
     | 
    	
        results/zero-shot/turna.json
    ADDED
    
    | 
         @@ -0,0 +1,172 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "model": {
         
     | 
| 3 | 
         
            +
                    "dtype": "auto",
         
     | 
| 4 | 
         
            +
                    "max_length": "1024",
         
     | 
| 5 | 
         
            +
                    "model": "boun-tabi-LMG/TURNA",
         
     | 
| 6 | 
         
            +
                    "api": "hf",
         
     | 
| 7 | 
         
            +
                    "architecture": "T5ForCondtiionalGeneration",
         
     | 
| 8 | 
         
            +
                    "type": "pretrained",
         
     | 
| 9 | 
         
            +
                    "num_parameters": "7b"
         
     | 
| 10 | 
         
            +
                },
         
     | 
| 11 | 
         
            +
                "results": [
         
     | 
| 12 | 
         
            +
                    {
         
     | 
| 13 | 
         
            +
                        "name": "xquad_tr",
         
     | 
| 14 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 15 | 
         
            +
                        "exact_match": 0.0,
         
     | 
| 16 | 
         
            +
                        "f1": 0.0
         
     | 
| 17 | 
         
            +
                    },
         
     | 
| 18 | 
         
            +
                    {
         
     | 
| 19 | 
         
            +
                        "name": "xlsum_tr",
         
     | 
| 20 | 
         
            +
                        "task": "summarization",
         
     | 
| 21 | 
         
            +
                        "rouge1": 0.1904384366601188,
         
     | 
| 22 | 
         
            +
                        "rouge2": 0.060686113611140166,
         
     | 
| 23 | 
         
            +
                        "rougeL": 0.1311090280660866
         
     | 
| 24 | 
         
            +
                    },
         
     | 
| 25 | 
         
            +
                    {
         
     | 
| 26 | 
         
            +
                        "name": "xcopa_tr",
         
     | 
| 27 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 28 | 
         
            +
                        "acc": 0.558,
         
     | 
| 29 | 
         
            +
                        "acc_norm": 0.558
         
     | 
| 30 | 
         
            +
                    },
         
     | 
| 31 | 
         
            +
                    {
         
     | 
| 32 | 
         
            +
                        "name": "wmt-tr-en-prompt",
         
     | 
| 33 | 
         
            +
                        "task": "machine_translation",
         
     | 
| 34 | 
         
            +
                        "wer": 3.9036796738046218,
         
     | 
| 35 | 
         
            +
                        "bleu": 0.0008286617236874524
         
     | 
| 36 | 
         
            +
                    },
         
     | 
| 37 | 
         
            +
                    {
         
     | 
| 38 | 
         
            +
                        "name": "wiki_lingua_tr",
         
     | 
| 39 | 
         
            +
                        "task": "summarization",
         
     | 
| 40 | 
         
            +
                        "rouge1": 0.18435291474691423,
         
     | 
| 41 | 
         
            +
                        "rouge2": 0.05584649726914134,
         
     | 
| 42 | 
         
            +
                        "rougeL": 0.13446021077350823
         
     | 
| 43 | 
         
            +
                    },
         
     | 
| 44 | 
         
            +
                    {
         
     | 
| 45 | 
         
            +
                        "name": "turkish_plu",
         
     | 
| 46 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 47 | 
         
            +
                        "acc": 0.40288,
         
     | 
| 48 | 
         
            +
                        "acc_norm": 0.44608
         
     | 
| 49 | 
         
            +
                    },
         
     | 
| 50 | 
         
            +
                    {
         
     | 
| 51 | 
         
            +
                        "name": "turkish_plu_goal_inference",
         
     | 
| 52 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 53 | 
         
            +
                        "acc": 0.37992831541218636,
         
     | 
| 54 | 
         
            +
                        "acc_norm": 0.35722819593787336
         
     | 
| 55 | 
         
            +
                    },
         
     | 
| 56 | 
         
            +
                    {
         
     | 
| 57 | 
         
            +
                        "name": "turkish_plu_next_event_prediction",
         
     | 
| 58 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 59 | 
         
            +
                        "acc": 0.383206106870229,
         
     | 
| 60 | 
         
            +
                        "acc_norm": 0.4488549618320611
         
     | 
| 61 | 
         
            +
                    },
         
     | 
| 62 | 
         
            +
                    {
         
     | 
| 63 | 
         
            +
                        "name": "turkish_plu_step_inference",
         
     | 
| 64 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 65 | 
         
            +
                        "acc": 0.272875816993464,
         
     | 
| 66 | 
         
            +
                        "acc_norm": 0.4542483660130719
         
     | 
| 67 | 
         
            +
                    },
         
     | 
| 68 | 
         
            +
                    {
         
     | 
| 69 | 
         
            +
                        "name": "turkish_plu_step_ordering",
         
     | 
| 70 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 71 | 
         
            +
                        "acc": 0.5122428991185113,
         
     | 
| 72 | 
         
            +
                        "acc_norm": 0.5122428991185113
         
     | 
| 73 | 
         
            +
                    },
         
     | 
| 74 | 
         
            +
                    {
         
     | 
| 75 | 
         
            +
                        "name": "check_worthiness",
         
     | 
| 76 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 77 | 
         
            +
                        "acc": 0.42230347349177333,
         
     | 
| 78 | 
         
            +
                        "acc_norm": 0.620201096892139
         
     | 
| 79 | 
         
            +
                    },
         
     | 
| 80 | 
         
            +
                    {
         
     | 
| 81 | 
         
            +
                        "name": "relevance_judgment",
         
     | 
| 82 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 83 | 
         
            +
                        "acc": 0.4904021937842779,
         
     | 
| 84 | 
         
            +
                        "acc_norm": 0.5781535648994516
         
     | 
| 85 | 
         
            +
                    },
         
     | 
| 86 | 
         
            +
                    {
         
     | 
| 87 | 
         
            +
                        "name": "tr-wikihow-summ",
         
     | 
| 88 | 
         
            +
                        "task": "summarization",
         
     | 
| 89 | 
         
            +
                        "rouge1": 0.20515501424269858,
         
     | 
| 90 | 
         
            +
                        "rouge2": 0.05693981251975118,
         
     | 
| 91 | 
         
            +
                        "rougeL": 0.1449313333992171
         
     | 
| 92 | 
         
            +
                    },
         
     | 
| 93 | 
         
            +
                    {
         
     | 
| 94 | 
         
            +
                        "name": "tquad",
         
     | 
| 95 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 96 | 
         
            +
                        "exact_match": 0.0,
         
     | 
| 97 | 
         
            +
                        "f1": 0.0003736920777279522
         
     | 
| 98 | 
         
            +
                    },
         
     | 
| 99 | 
         
            +
                    {
         
     | 
| 100 | 
         
            +
                        "name": "sts_tr",
         
     | 
| 101 | 
         
            +
                        "task": "text_classification",
         
     | 
| 102 | 
         
            +
                        "acc": 0.14213197969543148,
         
     | 
| 103 | 
         
            +
                        "acc_norm": 0.19506889050036258
         
     | 
| 104 | 
         
            +
                    },
         
     | 
| 105 | 
         
            +
                    {
         
     | 
| 106 | 
         
            +
                        "name": "offenseval_tr",
         
     | 
| 107 | 
         
            +
                        "task": "text_classification",
         
     | 
| 108 | 
         
            +
                        "acc": 0.5099206349206349,
         
     | 
| 109 | 
         
            +
                        "acc_norm": 0.7970521541950113
         
     | 
| 110 | 
         
            +
                    },
         
     | 
| 111 | 
         
            +
                    {
         
     | 
| 112 | 
         
            +
                        "name": "mnli_tr",
         
     | 
| 113 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 114 | 
         
            +
                        "acc": 0.3203,
         
     | 
| 115 | 
         
            +
                        "acc_norm": 0.3159
         
     | 
| 116 | 
         
            +
                    },
         
     | 
| 117 | 
         
            +
                    {
         
     | 
| 118 | 
         
            +
                        "name": "snli_tr",
         
     | 
| 119 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 120 | 
         
            +
                        "acc": 0.3223,
         
     | 
| 121 | 
         
            +
                        "acc_norm": 0.3278
         
     | 
| 122 | 
         
            +
                    },
         
     | 
| 123 | 
         
            +
                    {
         
     | 
| 124 | 
         
            +
                        "name": "xnli_tr",
         
     | 
| 125 | 
         
            +
                        "task": "natural_language_inference",
         
     | 
| 126 | 
         
            +
                        "acc": 0.32974051896207585,
         
     | 
| 127 | 
         
            +
                        "acc_norm": 0.3277445109780439
         
     | 
| 128 | 
         
            +
                    },
         
     | 
| 129 | 
         
            +
                    {
         
     | 
| 130 | 
         
            +
                        "name": "news_cat",
         
     | 
| 131 | 
         
            +
                        "task": "text_classification",
         
     | 
| 132 | 
         
            +
                        "acc": 0.328,
         
     | 
| 133 | 
         
            +
                        "acc_norm": 0.208
         
     | 
| 134 | 
         
            +
                    },
         
     | 
| 135 | 
         
            +
                    {
         
     | 
| 136 | 
         
            +
                        "name": "mlsum_tr",
         
     | 
| 137 | 
         
            +
                        "task": "summarization",
         
     | 
| 138 | 
         
            +
                        "rouge1": 0.20830277213555015,
         
     | 
| 139 | 
         
            +
                        "rouge2": 0.11040542892341527,
         
     | 
| 140 | 
         
            +
                        "rougeL": 0.16135585618616377
         
     | 
| 141 | 
         
            +
                    },
         
     | 
| 142 | 
         
            +
                    {
         
     | 
| 143 | 
         
            +
                        "name": "mkqa_tr",
         
     | 
| 144 | 
         
            +
                        "task": "extractive_question_answering",
         
     | 
| 145 | 
         
            +
                        "exact_match": 0.0011837821840781297,
         
     | 
| 146 | 
         
            +
                        "f1": 0.006720430107526878
         
     | 
| 147 | 
         
            +
                    },
         
     | 
| 148 | 
         
            +
                    {
         
     | 
| 149 | 
         
            +
                        "name": "ironytr",
         
     | 
| 150 | 
         
            +
                        "task": "text_classification",
         
     | 
| 151 | 
         
            +
                        "acc": 0.48333333333333334,
         
     | 
| 152 | 
         
            +
                        "acc_norm": 0.5033333333333333
         
     | 
| 153 | 
         
            +
                    },
         
     | 
| 154 | 
         
            +
                    {
         
     | 
| 155 | 
         
            +
                        "name": "gecturk_generation",
         
     | 
| 156 | 
         
            +
                        "task": "grammatical_error_correction",
         
     | 
| 157 | 
         
            +
                        "exact_match": 0.0
         
     | 
| 158 | 
         
            +
                    },
         
     | 
| 159 | 
         
            +
                    {
         
     | 
| 160 | 
         
            +
                        "name": "exams_tr",
         
     | 
| 161 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 162 | 
         
            +
                        "acc": 0.2366412213740458,
         
     | 
| 163 | 
         
            +
                        "acc_norm": 0.2748091603053435
         
     | 
| 164 | 
         
            +
                    },
         
     | 
| 165 | 
         
            +
                    {
         
     | 
| 166 | 
         
            +
                        "name": "belebele_tr",
         
     | 
| 167 | 
         
            +
                        "task": "multiple_choice",
         
     | 
| 168 | 
         
            +
                        "acc": 0.22555555555555556,
         
     | 
| 169 | 
         
            +
                        "acc_norm": 0.22555555555555556
         
     | 
| 170 | 
         
            +
                    }
         
     | 
| 171 | 
         
            +
                ]
         
     | 
| 172 | 
         
            +
            }
         
     | 
    	
        utils.py
    ADDED
    
    | 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os
         
     | 
| 2 | 
         
            +
            import os.path as osp
         
     | 
| 3 | 
         
            +
            import json
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            def preprocess_path(path):
         
     | 
| 7 | 
         
            +
                path = osp.expanduser(path)
         
     | 
| 8 | 
         
            +
                path = osp.abspath(path)
         
     | 
| 9 | 
         
            +
                return path
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            def get_model_url(entry):
         
     | 
| 13 | 
         
            +
                if entry['api'] == 'hf':
         
     | 
| 14 | 
         
            +
                    return f'https://huggingface.co/{entry["model"]}'
         
     | 
| 15 | 
         
            +
                return entry.get('url', f'https://localhost/{entry["model"]}')
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def read_results(path):
         
     | 
| 19 | 
         
            +
                path = preprocess_path(path)
         
     | 
| 20 | 
         
            +
                file_list = sorted(os.listdir(path))
         
     | 
| 21 | 
         
            +
                results = list()
         
     | 
| 22 | 
         
            +
                for file_name in file_list:
         
     | 
| 23 | 
         
            +
                    file_path = osp.join(path, file_name)
         
     | 
| 24 | 
         
            +
                    with open(file_path, 'r') as f:
         
     | 
| 25 | 
         
            +
                        this = json.load(f)
         
     | 
| 26 | 
         
            +
                    results.append(this)
         
     | 
| 27 | 
         
            +
                return results
         
     | 
| 28 | 
         
            +
                
         
     |