Spaces:
Running
Running
Roman Solomatin
commited on
Commit
•
d2bf885
1
Parent(s):
b688574
working table
Browse files- .gitignore +1 -0
- pdm.lock +248 -7
- pyproject.toml +7 -0
- src/__init__.py +0 -0
- src/encodechka/about.py +30 -59
- src/encodechka/app.py +89 -290
- src/encodechka/display/formatting.py +3 -1
- src/encodechka/display/utils.py +19 -117
- src/encodechka/envs.py +0 -25
- src/encodechka/leaderboard/__init__.py +0 -0
- src/encodechka/leaderboard/read_evals.py +0 -193
- src/encodechka/parser.py +29 -0
- src/encodechka/populate.py +9 -56
- src/encodechka/settings.py +26 -0
- src/encodechka/submission/__init__.py +0 -0
- src/encodechka/submission/check_validity.py +0 -131
- src/encodechka/submission/submit.py +0 -125
- tests/cassettes/test_parser.yaml +343 -0
- tests/test_parser.py +10 -0
.gitignore
CHANGED
@@ -12,3 +12,4 @@ eval-queue-bk/
|
|
12 |
eval-results-bk/
|
13 |
logs/
|
14 |
/.pdm-python
|
|
|
|
12 |
eval-results-bk/
|
13 |
logs/
|
14 |
/.pdm-python
|
15 |
+
leaderboard.csv
|
pdm.lock
CHANGED
@@ -2,10 +2,10 @@
|
|
2 |
# It is not intended for manual editing.
|
3 |
|
4 |
[metadata]
|
5 |
-
groups = ["default", "lint"]
|
6 |
strategy = ["cross_platform", "inherit_metadata"]
|
7 |
lock_version = "4.4.1"
|
8 |
-
content_hash = "sha256:
|
9 |
|
10 |
[[package]]
|
11 |
name = "aiofiles"
|
@@ -149,7 +149,7 @@ name = "colorama"
|
|
149 |
version = "0.4.6"
|
150 |
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
151 |
summary = "Cross-platform colored terminal text."
|
152 |
-
groups = ["default"]
|
153 |
marker = "platform_system == \"Windows\" or sys_platform == \"win32\""
|
154 |
files = [
|
155 |
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
@@ -224,7 +224,7 @@ name = "exceptiongroup"
|
|
224 |
version = "1.2.1"
|
225 |
requires_python = ">=3.7"
|
226 |
summary = "Backport of PEP 654 (exception groups)"
|
227 |
-
groups = ["default"]
|
228 |
marker = "python_version < \"3.11\""
|
229 |
files = [
|
230 |
{file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
|
@@ -464,7 +464,7 @@ name = "idna"
|
|
464 |
version = "3.7"
|
465 |
requires_python = ">=3.5"
|
466 |
summary = "Internationalized Domain Names in Applications (IDNA)"
|
467 |
-
groups = ["default"]
|
468 |
files = [
|
469 |
{file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
|
470 |
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
|
@@ -481,6 +481,17 @@ files = [
|
|
481 |
{file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"},
|
482 |
]
|
483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
[[package]]
|
485 |
name = "jinja2"
|
486 |
version = "3.1.4"
|
@@ -566,6 +577,72 @@ files = [
|
|
566 |
{file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"},
|
567 |
]
|
568 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
[[package]]
|
570 |
name = "markdown-it-py"
|
571 |
version = "3.0.0"
|
@@ -642,6 +719,32 @@ files = [
|
|
642 |
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
643 |
]
|
644 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
[[package]]
|
646 |
name = "numpy"
|
647 |
version = "1.26.4"
|
@@ -687,7 +790,7 @@ name = "packaging"
|
|
687 |
version = "24.1"
|
688 |
requires_python = ">=3.8"
|
689 |
summary = "Core utilities for Python packages"
|
690 |
-
groups = ["default"]
|
691 |
files = [
|
692 |
{file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
|
693 |
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
|
@@ -751,6 +854,17 @@ files = [
|
|
751 |
{file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
|
752 |
]
|
753 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
754 |
[[package]]
|
755 |
name = "pyarrow"
|
756 |
version = "16.1.0"
|
@@ -828,6 +942,21 @@ files = [
|
|
828 |
{file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"},
|
829 |
]
|
830 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
831 |
[[package]]
|
832 |
name = "pydub"
|
833 |
version = "0.25.1"
|
@@ -860,6 +989,39 @@ files = [
|
|
860 |
{file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"},
|
861 |
]
|
862 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
863 |
[[package]]
|
864 |
name = "python-dateutil"
|
865 |
version = "2.9.0.post0"
|
@@ -911,7 +1073,7 @@ name = "pyyaml"
|
|
911 |
version = "6.0.1"
|
912 |
requires_python = ">=3.6"
|
913 |
summary = "YAML parser and emitter for Python"
|
914 |
-
groups = ["default"]
|
915 |
files = [
|
916 |
{file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
|
917 |
{file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
|
@@ -1111,6 +1273,18 @@ files = [
|
|
1111 |
{file = "starlette-0.37.2.tar.gz", hash = "sha256:9af890290133b79fc3db55474ade20f6220a364a0402e0b556e7cd5e1e093823"},
|
1112 |
]
|
1113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1114 |
[[package]]
|
1115 |
name = "tomlkit"
|
1116 |
version = "0.12.0"
|
@@ -1303,6 +1477,22 @@ files = [
|
|
1303 |
{file = "uvloop-0.19.0.tar.gz", hash = "sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd"},
|
1304 |
]
|
1305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1306 |
[[package]]
|
1307 |
name = "watchfiles"
|
1308 |
version = "0.22.0"
|
@@ -1376,3 +1566,54 @@ files = [
|
|
1376 |
{file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"},
|
1377 |
{file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"},
|
1378 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
# It is not intended for manual editing.
|
3 |
|
4 |
[metadata]
|
5 |
+
groups = ["default", "lint", "test"]
|
6 |
strategy = ["cross_platform", "inherit_metadata"]
|
7 |
lock_version = "4.4.1"
|
8 |
+
content_hash = "sha256:91e6a31abcfb1868e17cc401d5b46de2963d35ef0693a5664fe8968e49f341e7"
|
9 |
|
10 |
[[package]]
|
11 |
name = "aiofiles"
|
|
|
149 |
version = "0.4.6"
|
150 |
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
151 |
summary = "Cross-platform colored terminal text."
|
152 |
+
groups = ["default", "test"]
|
153 |
marker = "platform_system == \"Windows\" or sys_platform == \"win32\""
|
154 |
files = [
|
155 |
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
|
|
224 |
version = "1.2.1"
|
225 |
requires_python = ">=3.7"
|
226 |
summary = "Backport of PEP 654 (exception groups)"
|
227 |
+
groups = ["default", "test"]
|
228 |
marker = "python_version < \"3.11\""
|
229 |
files = [
|
230 |
{file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
|
|
|
464 |
version = "3.7"
|
465 |
requires_python = ">=3.5"
|
466 |
summary = "Internationalized Domain Names in Applications (IDNA)"
|
467 |
+
groups = ["default", "test"]
|
468 |
files = [
|
469 |
{file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
|
470 |
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
|
|
|
481 |
{file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"},
|
482 |
]
|
483 |
|
484 |
+
[[package]]
|
485 |
+
name = "iniconfig"
|
486 |
+
version = "2.0.0"
|
487 |
+
requires_python = ">=3.7"
|
488 |
+
summary = "brain-dead simple config-ini parsing"
|
489 |
+
groups = ["test"]
|
490 |
+
files = [
|
491 |
+
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
492 |
+
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
493 |
+
]
|
494 |
+
|
495 |
[[package]]
|
496 |
name = "jinja2"
|
497 |
version = "3.1.4"
|
|
|
577 |
{file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"},
|
578 |
]
|
579 |
|
580 |
+
[[package]]
|
581 |
+
name = "lxml"
|
582 |
+
version = "5.2.2"
|
583 |
+
requires_python = ">=3.6"
|
584 |
+
summary = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
585 |
+
groups = ["default"]
|
586 |
+
files = [
|
587 |
+
{file = "lxml-5.2.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:364d03207f3e603922d0d3932ef363d55bbf48e3647395765f9bfcbdf6d23632"},
|
588 |
+
{file = "lxml-5.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50127c186f191b8917ea2fb8b206fbebe87fd414a6084d15568c27d0a21d60db"},
|
589 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4f025ef3db1c6da4460dd27c118d8cd136d0391da4e387a15e48e5c975147"},
|
590 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981a06a3076997adf7c743dcd0d7a0415582661e2517c7d961493572e909aa1d"},
|
591 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aef5474d913d3b05e613906ba4090433c515e13ea49c837aca18bde190853dff"},
|
592 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e275ea572389e41e8b039ac076a46cb87ee6b8542df3fff26f5baab43713bca"},
|
593 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5b65529bb2f21ac7861a0e94fdbf5dc0daab41497d18223b46ee8515e5ad297"},
|
594 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bcc98f911f10278d1daf14b87d65325851a1d29153caaf146877ec37031d5f36"},
|
595 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:b47633251727c8fe279f34025844b3b3a3e40cd1b198356d003aa146258d13a2"},
|
596 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:fbc9d316552f9ef7bba39f4edfad4a734d3d6f93341232a9dddadec4f15d425f"},
|
597 |
+
{file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:13e69be35391ce72712184f69000cda04fc89689429179bc4c0ae5f0b7a8c21b"},
|
598 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3b6a30a9ab040b3f545b697cb3adbf3696c05a3a68aad172e3fd7ca73ab3c835"},
|
599 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a233bb68625a85126ac9f1fc66d24337d6e8a0f9207b688eec2e7c880f012ec0"},
|
600 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dfa7c241073d8f2b8e8dbc7803c434f57dbb83ae2a3d7892dd068d99e96efe2c"},
|
601 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a7aca7964ac4bb07680d5c9d63b9d7028cace3e2d43175cb50bba8c5ad33316"},
|
602 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae4073a60ab98529ab8a72ebf429f2a8cc612619a8c04e08bed27450d52103c0"},
|
603 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ffb2be176fed4457e445fe540617f0252a72a8bc56208fd65a690fdb1f57660b"},
|
604 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e290d79a4107d7d794634ce3e985b9ae4f920380a813717adf61804904dc4393"},
|
605 |
+
{file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96e85aa09274955bb6bd483eaf5b12abadade01010478154b0ec70284c1b1526"},
|
606 |
+
{file = "lxml-5.2.2-cp310-cp310-win32.whl", hash = "sha256:f956196ef61369f1685d14dad80611488d8dc1ef00be57c0c5a03064005b0f30"},
|
607 |
+
{file = "lxml-5.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:875a3f90d7eb5c5d77e529080d95140eacb3c6d13ad5b616ee8095447b1d22e7"},
|
608 |
+
{file = "lxml-5.2.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b537bd04d7ccd7c6350cdaaaad911f6312cbd61e6e6045542f781c7f8b2e99d2"},
|
609 |
+
{file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4820c02195d6dfb7b8508ff276752f6b2ff8b64ae5d13ebe02e7667e035000b9"},
|
610 |
+
{file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a09f6184f17a80897172863a655467da2b11151ec98ba8d7af89f17bf63dae"},
|
611 |
+
{file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76acba4c66c47d27c8365e7c10b3d8016a7da83d3191d053a58382311a8bf4e1"},
|
612 |
+
{file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b128092c927eaf485928cec0c28f6b8bead277e28acf56800e972aa2c2abd7a2"},
|
613 |
+
{file = "lxml-5.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ae791f6bd43305aade8c0e22f816b34f3b72b6c820477aab4d18473a37e8090b"},
|
614 |
+
{file = "lxml-5.2.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a2f6a1bc2460e643785a2cde17293bd7a8f990884b822f7bca47bee0a82fc66b"},
|
615 |
+
{file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e8d351ff44c1638cb6e980623d517abd9f580d2e53bfcd18d8941c052a5a009"},
|
616 |
+
{file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec4bd9133420c5c52d562469c754f27c5c9e36ee06abc169612c959bd7dbb07"},
|
617 |
+
{file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:55ce6b6d803890bd3cc89975fca9de1dff39729b43b73cb15ddd933b8bc20484"},
|
618 |
+
{file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ab6a358d1286498d80fe67bd3d69fcbc7d1359b45b41e74c4a26964ca99c3f8"},
|
619 |
+
{file = "lxml-5.2.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:06668e39e1f3c065349c51ac27ae430719d7806c026fec462e5693b08b95696b"},
|
620 |
+
{file = "lxml-5.2.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9cd5323344d8ebb9fb5e96da5de5ad4ebab993bbf51674259dbe9d7a18049525"},
|
621 |
+
{file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89feb82ca055af0fe797a2323ec9043b26bc371365847dbe83c7fd2e2f181c34"},
|
622 |
+
{file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e481bba1e11ba585fb06db666bfc23dbe181dbafc7b25776156120bf12e0d5a6"},
|
623 |
+
{file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d6c6ea6a11ca0ff9cd0390b885984ed31157c168565702959c25e2191674a14"},
|
624 |
+
{file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3d98de734abee23e61f6b8c2e08a88453ada7d6486dc7cdc82922a03968928db"},
|
625 |
+
{file = "lxml-5.2.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:69ab77a1373f1e7563e0fb5a29a8440367dec051da6c7405333699d07444f511"},
|
626 |
+
{file = "lxml-5.2.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e17913c431f5ae01d8658dbf792fdc457073dcdfbb31dc0cc6ab256e664a8d"},
|
627 |
+
{file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f8757b03208c3f50097761be2dea0aba02e94f0dc7023ed73a7bb14ff11eb0"},
|
628 |
+
{file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a520b4f9974b0a0a6ed73c2154de57cdfd0c8800f4f15ab2b73238ffed0b36e"},
|
629 |
+
{file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5e097646944b66207023bc3c634827de858aebc226d5d4d6d16f0b77566ea182"},
|
630 |
+
{file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b5e4ef22ff25bfd4ede5f8fb30f7b24446345f3e79d9b7455aef2836437bc38a"},
|
631 |
+
{file = "lxml-5.2.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff69a9a0b4b17d78170c73abe2ab12084bdf1691550c5629ad1fe7849433f324"},
|
632 |
+
{file = "lxml-5.2.2.tar.gz", hash = "sha256:bb2dc4898180bea79863d5487e5f9c7c34297414bad54bcd0f0852aee9cfdb87"},
|
633 |
+
]
|
634 |
+
|
635 |
+
[[package]]
|
636 |
+
name = "markdown"
|
637 |
+
version = "3.6"
|
638 |
+
requires_python = ">=3.8"
|
639 |
+
summary = "Python implementation of John Gruber's Markdown."
|
640 |
+
groups = ["default"]
|
641 |
+
files = [
|
642 |
+
{file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"},
|
643 |
+
{file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"},
|
644 |
+
]
|
645 |
+
|
646 |
[[package]]
|
647 |
name = "markdown-it-py"
|
648 |
version = "3.0.0"
|
|
|
719 |
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
720 |
]
|
721 |
|
722 |
+
[[package]]
|
723 |
+
name = "multidict"
|
724 |
+
version = "6.0.5"
|
725 |
+
requires_python = ">=3.7"
|
726 |
+
summary = "multidict implementation"
|
727 |
+
groups = ["test"]
|
728 |
+
files = [
|
729 |
+
{file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"},
|
730 |
+
{file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"},
|
731 |
+
{file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"},
|
732 |
+
{file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"},
|
733 |
+
{file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"},
|
734 |
+
{file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"},
|
735 |
+
{file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"},
|
736 |
+
{file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"},
|
737 |
+
{file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"},
|
738 |
+
{file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"},
|
739 |
+
{file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"},
|
740 |
+
{file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"},
|
741 |
+
{file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"},
|
742 |
+
{file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"},
|
743 |
+
{file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"},
|
744 |
+
{file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"},
|
745 |
+
{file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
|
746 |
+
]
|
747 |
+
|
748 |
[[package]]
|
749 |
name = "numpy"
|
750 |
version = "1.26.4"
|
|
|
790 |
version = "24.1"
|
791 |
requires_python = ">=3.8"
|
792 |
summary = "Core utilities for Python packages"
|
793 |
+
groups = ["default", "test"]
|
794 |
files = [
|
795 |
{file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
|
796 |
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
|
|
|
854 |
{file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
|
855 |
]
|
856 |
|
857 |
+
[[package]]
|
858 |
+
name = "pluggy"
|
859 |
+
version = "1.5.0"
|
860 |
+
requires_python = ">=3.8"
|
861 |
+
summary = "plugin and hook calling mechanisms for python"
|
862 |
+
groups = ["test"]
|
863 |
+
files = [
|
864 |
+
{file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
|
865 |
+
{file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
|
866 |
+
]
|
867 |
+
|
868 |
[[package]]
|
869 |
name = "pyarrow"
|
870 |
version = "16.1.0"
|
|
|
942 |
{file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"},
|
943 |
]
|
944 |
|
945 |
+
[[package]]
|
946 |
+
name = "pydantic-settings"
|
947 |
+
version = "2.3.3"
|
948 |
+
requires_python = ">=3.8"
|
949 |
+
summary = "Settings management using Pydantic"
|
950 |
+
groups = ["default"]
|
951 |
+
dependencies = [
|
952 |
+
"pydantic>=2.7.0",
|
953 |
+
"python-dotenv>=0.21.0",
|
954 |
+
]
|
955 |
+
files = [
|
956 |
+
{file = "pydantic_settings-2.3.3-py3-none-any.whl", hash = "sha256:e4ed62ad851670975ec11285141db888fd24947f9440bd4380d7d8788d4965de"},
|
957 |
+
{file = "pydantic_settings-2.3.3.tar.gz", hash = "sha256:87fda838b64b5039b970cd47c3e8a1ee460ce136278ff672980af21516f6e6ce"},
|
958 |
+
]
|
959 |
+
|
960 |
[[package]]
|
961 |
name = "pydub"
|
962 |
version = "0.25.1"
|
|
|
989 |
{file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"},
|
990 |
]
|
991 |
|
992 |
+
[[package]]
|
993 |
+
name = "pytest"
|
994 |
+
version = "8.2.2"
|
995 |
+
requires_python = ">=3.8"
|
996 |
+
summary = "pytest: simple powerful testing with Python"
|
997 |
+
groups = ["test"]
|
998 |
+
dependencies = [
|
999 |
+
"colorama; sys_platform == \"win32\"",
|
1000 |
+
"exceptiongroup>=1.0.0rc8; python_version < \"3.11\"",
|
1001 |
+
"iniconfig",
|
1002 |
+
"packaging",
|
1003 |
+
"pluggy<2.0,>=1.5",
|
1004 |
+
"tomli>=1; python_version < \"3.11\"",
|
1005 |
+
]
|
1006 |
+
files = [
|
1007 |
+
{file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"},
|
1008 |
+
{file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"},
|
1009 |
+
]
|
1010 |
+
|
1011 |
+
[[package]]
|
1012 |
+
name = "pytest-vcr"
|
1013 |
+
version = "1.0.2"
|
1014 |
+
summary = "Plugin for managing VCR.py cassettes"
|
1015 |
+
groups = ["test"]
|
1016 |
+
dependencies = [
|
1017 |
+
"pytest>=3.6.0",
|
1018 |
+
"vcrpy",
|
1019 |
+
]
|
1020 |
+
files = [
|
1021 |
+
{file = "pytest-vcr-1.0.2.tar.gz", hash = "sha256:23ee51b75abbcc43d926272773aae4f39f93aceb75ed56852d0bf618f92e1896"},
|
1022 |
+
{file = "pytest_vcr-1.0.2-py2.py3-none-any.whl", hash = "sha256:2f316e0539399bea0296e8b8401145c62b6f85e9066af7e57b6151481b0d6d9c"},
|
1023 |
+
]
|
1024 |
+
|
1025 |
[[package]]
|
1026 |
name = "python-dateutil"
|
1027 |
version = "2.9.0.post0"
|
|
|
1073 |
version = "6.0.1"
|
1074 |
requires_python = ">=3.6"
|
1075 |
summary = "YAML parser and emitter for Python"
|
1076 |
+
groups = ["default", "test"]
|
1077 |
files = [
|
1078 |
{file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
|
1079 |
{file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
|
|
|
1273 |
{file = "starlette-0.37.2.tar.gz", hash = "sha256:9af890290133b79fc3db55474ade20f6220a364a0402e0b556e7cd5e1e093823"},
|
1274 |
]
|
1275 |
|
1276 |
+
[[package]]
|
1277 |
+
name = "tomli"
|
1278 |
+
version = "2.0.1"
|
1279 |
+
requires_python = ">=3.7"
|
1280 |
+
summary = "A lil' TOML parser"
|
1281 |
+
groups = ["test"]
|
1282 |
+
marker = "python_version < \"3.11\""
|
1283 |
+
files = [
|
1284 |
+
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
1285 |
+
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
1286 |
+
]
|
1287 |
+
|
1288 |
[[package]]
|
1289 |
name = "tomlkit"
|
1290 |
version = "0.12.0"
|
|
|
1477 |
{file = "uvloop-0.19.0.tar.gz", hash = "sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd"},
|
1478 |
]
|
1479 |
|
1480 |
+
[[package]]
|
1481 |
+
name = "vcrpy"
|
1482 |
+
version = "5.1.0"
|
1483 |
+
requires_python = ">=3.8"
|
1484 |
+
summary = "Automatically mock your HTTP interactions to simplify and speed up testing"
|
1485 |
+
groups = ["test"]
|
1486 |
+
dependencies = [
|
1487 |
+
"PyYAML",
|
1488 |
+
"wrapt",
|
1489 |
+
"yarl",
|
1490 |
+
]
|
1491 |
+
files = [
|
1492 |
+
{file = "vcrpy-5.1.0-py2.py3-none-any.whl", hash = "sha256:605e7b7a63dcd940db1df3ab2697ca7faf0e835c0852882142bafb19649d599e"},
|
1493 |
+
{file = "vcrpy-5.1.0.tar.gz", hash = "sha256:bbf1532f2618a04f11bce2a99af3a9647a32c880957293ff91e0a5f187b6b3d2"},
|
1494 |
+
]
|
1495 |
+
|
1496 |
[[package]]
|
1497 |
name = "watchfiles"
|
1498 |
version = "0.22.0"
|
|
|
1566 |
{file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"},
|
1567 |
{file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"},
|
1568 |
]
|
1569 |
+
|
1570 |
+
[[package]]
|
1571 |
+
name = "wrapt"
|
1572 |
+
version = "1.16.0"
|
1573 |
+
requires_python = ">=3.6"
|
1574 |
+
summary = "Module for decorators, wrappers and monkey patching."
|
1575 |
+
groups = ["test"]
|
1576 |
+
files = [
|
1577 |
+
{file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
|
1578 |
+
{file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
|
1579 |
+
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"},
|
1580 |
+
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"},
|
1581 |
+
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"},
|
1582 |
+
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"},
|
1583 |
+
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"},
|
1584 |
+
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"},
|
1585 |
+
{file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"},
|
1586 |
+
{file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"},
|
1587 |
+
{file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"},
|
1588 |
+
{file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
|
1589 |
+
]
|
1590 |
+
|
1591 |
+
[[package]]
|
1592 |
+
name = "yarl"
|
1593 |
+
version = "1.9.4"
|
1594 |
+
requires_python = ">=3.7"
|
1595 |
+
summary = "Yet another URL library"
|
1596 |
+
groups = ["test"]
|
1597 |
+
dependencies = [
|
1598 |
+
"idna>=2.0",
|
1599 |
+
"multidict>=4.0",
|
1600 |
+
]
|
1601 |
+
files = [
|
1602 |
+
{file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"},
|
1603 |
+
{file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"},
|
1604 |
+
{file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"},
|
1605 |
+
{file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"},
|
1606 |
+
{file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"},
|
1607 |
+
{file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"},
|
1608 |
+
{file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"},
|
1609 |
+
{file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"},
|
1610 |
+
{file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"},
|
1611 |
+
{file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"},
|
1612 |
+
{file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"},
|
1613 |
+
{file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"},
|
1614 |
+
{file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"},
|
1615 |
+
{file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"},
|
1616 |
+
{file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"},
|
1617 |
+
{file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"},
|
1618 |
+
{file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"},
|
1619 |
+
]
|
pyproject.toml
CHANGED
@@ -25,6 +25,9 @@ dependencies = [
|
|
25 |
# "accelerate",
|
26 |
# "sentencepiece",
|
27 |
"pyarrow>=16.1.0",
|
|
|
|
|
|
|
28 |
]
|
29 |
requires-python = "==3.10.*"
|
30 |
readme = "README.md"
|
@@ -37,6 +40,10 @@ distribution = false
|
|
37 |
lint = [
|
38 |
"ruff>=0.4.8",
|
39 |
]
|
|
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
[tool.ruff]
|
|
|
25 |
# "accelerate",
|
26 |
# "sentencepiece",
|
27 |
"pyarrow>=16.1.0",
|
28 |
+
"pydantic-settings>=2.3.3",
|
29 |
+
"markdown>=3.6",
|
30 |
+
"lxml>=5.2.2",
|
31 |
]
|
32 |
requires-python = "==3.10.*"
|
33 |
readme = "README.md"
|
|
|
40 |
lint = [
|
41 |
"ruff>=0.4.8",
|
42 |
]
|
43 |
+
test = [
|
44 |
+
"pytest>=8.2.2",
|
45 |
+
"pytest-vcr>=1.0.2",
|
46 |
+
]
|
47 |
|
48 |
|
49 |
[tool.ruff]
|
src/__init__.py
DELETED
File without changes
|
src/encodechka/about.py
CHANGED
@@ -9,69 +9,40 @@ class Task:
|
|
9 |
col_name: str
|
10 |
|
11 |
|
12 |
-
# Select your tasks here
|
13 |
-
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
-
|
21 |
-
# ---------------------------------------------------
|
22 |
|
23 |
-
|
24 |
-
# Your leaderboard name
|
25 |
-
TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
|
26 |
-
|
27 |
-
# What does your leaderboard evaluate?
|
28 |
INTRODUCTION_TEXT = """
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
"""
|
40 |
|
41 |
-
EVALUATION_QUEUE_TEXT = """
|
42 |
-
## Some good practices before submitting a model
|
43 |
-
|
44 |
-
### 1) Make sure you can load your model and tokenizer using AutoClasses:
|
45 |
-
```python
|
46 |
-
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
47 |
-
config = AutoConfig.from_pretrained("your model name", revision=revision)
|
48 |
-
model = AutoModel.from_pretrained("your model name", revision=revision)
|
49 |
-
tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
|
50 |
-
```
|
51 |
-
If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been
|
52 |
-
improperly uploaded.
|
53 |
-
|
54 |
-
Note: make sure your model is public!
|
55 |
-
Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it,
|
56 |
-
stay posted!
|
57 |
-
|
58 |
-
### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
|
59 |
-
It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number
|
60 |
-
of parameters of your model to the `Extended Viewer`!
|
61 |
-
|
62 |
-
### 3) Make sure your model has an open license!
|
63 |
-
This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
|
64 |
-
|
65 |
-
### 4) Fill up your model card
|
66 |
-
When we add extra information about models to the leaderboard, it will be automatically taken from the model card
|
67 |
-
|
68 |
-
## In case of model failure
|
69 |
-
If your model is displayed in the `FAILED` category, its execution stopped.
|
70 |
-
Make sure you have followed the above steps first.
|
71 |
-
If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without
|
72 |
-
modifications (you can add `--limit` to limit the number of examples per task).
|
73 |
-
"""
|
74 |
-
|
75 |
-
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
76 |
-
CITATION_BUTTON_TEXT = r"""
|
77 |
-
"""
|
|
|
9 |
col_name: str
|
10 |
|
11 |
|
|
|
|
|
12 |
class Tasks(Enum):
|
13 |
+
STS = Task("STS", "STS", "STS")
|
14 |
+
PI = Task("PI", "PI", "PI")
|
15 |
+
NLI = Task("NLI", "NLI", "NLI")
|
16 |
+
SA = Task("SA", "SA", "SA")
|
17 |
+
TI = Task("TI", "TI", "TI")
|
18 |
+
II = Task("II", "II", "II")
|
19 |
+
IC = Task("IC", "IC", "IC")
|
20 |
+
ICX = Task("ICX", "ICX", "ICX")
|
21 |
+
NE1 = Task("NE1", "NE1", "NE1")
|
22 |
+
NE2 = Task("NE2", "NE2", "NE2")
|
23 |
|
24 |
|
25 |
+
TITLE = """<h1 align="center" id="space-title">Encodechka</h1>"""
|
|
|
26 |
|
|
|
|
|
|
|
|
|
|
|
27 |
INTRODUCTION_TEXT = """
|
28 |
+
<a href="https://github.com/avidale/encodechka">Оригинальный репозиторий GitHub</a>
|
29 |
+
|
30 |
+
Задачи
|
31 |
+
- Semantic text similarity (**STS**) на основе переведённого датасета
|
32 |
+
[STS-B](https://huggingface.co/datasets/stsb_multi_mt);
|
33 |
+
- Paraphrase identification (**PI**) на основе датасета paraphraser.ru;
|
34 |
+
- Natural language inference (**NLI**) на датасете [XNLI](https://github.com/facebookresearch/XNLI);
|
35 |
+
- Sentiment analysis (**SA**) на данных [SentiRuEval2016](http://www.dialog-21.ru/evaluation/2016/sentiment/).
|
36 |
+
- Toxicity identification (**TI**) на датасете токсичных комментариев из
|
37 |
+
[OKMLCup](https://cups.mail.ru/ru/contests/okmlcup2020);
|
38 |
+
- Inappropriateness identification (**II**) на
|
39 |
+
[датасете Сколтеха](https://github.com/skoltech-nlp/inappropriate-sensitive-topics);
|
40 |
+
- Intent classification (**IC**) и её кросс-язычная версия **ICX** на датасете
|
41 |
+
[NLU-evaluation-data](https://github.com/xliuhw/NLU-Evaluation-Data), который я автоматически перевёл на русский.
|
42 |
+
В IC классификатор обучается на русских данных, а в ICX – на английских, а тестируется в обоих случаях на русских.
|
43 |
+
- Распознавание именованных сущностей на датасетах
|
44 |
+
[factRuEval-2016](https://github.com/dialogue-evaluation/factRuEval-2016) (**NE1**) и
|
45 |
+
[RuDReC](https://github.com/cimm-kzn/RuDReC) (**NE2**). Эти две задачи требуют получать эмбеддинги отдельных токенов,
|
46 |
+
а не целых предложений; поэтому там участвуют не все модели.
|
47 |
"""
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/encodechka/app.py
CHANGED
@@ -2,87 +2,34 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
from about import (
|
4 |
INTRODUCTION_TEXT,
|
5 |
-
LLM_BENCHMARKS_TEXT,
|
6 |
TITLE,
|
7 |
)
|
8 |
from apscheduler.schedulers.background import BackgroundScheduler
|
9 |
from display.css_html_js import custom_css
|
10 |
from display.utils import (
|
11 |
-
BENCHMARK_COLS,
|
12 |
COLS,
|
13 |
-
EVAL_COLS,
|
14 |
-
NUMERIC_INTERVALS,
|
15 |
TYPES,
|
16 |
AutoEvalColumn,
|
17 |
-
ModelType,
|
18 |
-
Precision,
|
19 |
fields,
|
20 |
)
|
21 |
-
from envs import (
|
22 |
-
API,
|
23 |
-
EVAL_REQUESTS_PATH,
|
24 |
-
EVAL_RESULTS_PATH,
|
25 |
-
QUEUE_REPO,
|
26 |
-
REPO_ID,
|
27 |
-
RESULTS_REPO,
|
28 |
-
TOKEN,
|
29 |
-
)
|
30 |
-
from huggingface_hub import snapshot_download
|
31 |
-
from populate import get_evaluation_queue_df, get_leaderboard_df
|
32 |
-
|
33 |
-
# from submission.submit import add_new_eval
|
34 |
-
|
35 |
-
|
36 |
-
def restart_space():
|
37 |
-
API.restart_space(repo_id=REPO_ID)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
print(EVAL_REQUESTS_PATH)
|
42 |
-
snapshot_download(
|
43 |
-
repo_id=QUEUE_REPO,
|
44 |
-
local_dir=EVAL_REQUESTS_PATH,
|
45 |
-
repo_type="dataset",
|
46 |
-
tqdm_class=None,
|
47 |
-
etag_timeout=30,
|
48 |
-
token=TOKEN,
|
49 |
-
)
|
50 |
-
except Exception:
|
51 |
-
restart_space()
|
52 |
-
try:
|
53 |
-
print(EVAL_RESULTS_PATH)
|
54 |
-
snapshot_download(
|
55 |
-
repo_id=RESULTS_REPO,
|
56 |
-
local_dir=EVAL_RESULTS_PATH,
|
57 |
-
repo_type="dataset",
|
58 |
-
tqdm_class=None,
|
59 |
-
etag_timeout=30,
|
60 |
-
token=TOKEN,
|
61 |
-
)
|
62 |
-
except Exception:
|
63 |
-
restart_space()
|
64 |
-
|
65 |
-
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
66 |
-
leaderboard_df = original_df.copy()
|
67 |
|
68 |
-
(
|
69 |
-
finished_eval_queue_df,
|
70 |
-
running_eval_queue_df,
|
71 |
-
pending_eval_queue_df,
|
72 |
-
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
73 |
|
74 |
|
75 |
-
|
76 |
-
def update_table(
|
77 |
hidden_df: pd.DataFrame,
|
78 |
columns: list,
|
79 |
-
type_query: list,
|
80 |
-
precision_query: str,
|
81 |
-
size_query: list,
|
82 |
show_deleted: bool,
|
83 |
query: str,
|
84 |
-
):
|
85 |
-
filtered_df = filter_models(hidden_df,
|
86 |
filtered_df = filter_queries(query, filtered_df)
|
87 |
df = select_columns(filtered_df, columns)
|
88 |
return df
|
@@ -94,11 +41,10 @@ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
94 |
|
95 |
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
96 |
always_here_cols = [
|
97 |
-
AutoEvalColumn.
|
98 |
-
AutoEvalColumn.model.name,
|
99 |
]
|
100 |
-
|
101 |
-
filtered_df = df[always_here_cols +
|
102 |
return filtered_df
|
103 |
|
104 |
|
@@ -117,256 +63,109 @@ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
|
|
117 |
filtered_df = filtered_df.drop_duplicates(
|
118 |
subset=[
|
119 |
AutoEvalColumn.model.name,
|
120 |
-
AutoEvalColumn.precision.name,
|
121 |
-
AutoEvalColumn.revision.name,
|
122 |
]
|
123 |
)
|
124 |
-
|
125 |
return filtered_df
|
126 |
|
127 |
|
128 |
def filter_models(
|
129 |
df: pd.DataFrame,
|
130 |
-
type_query: list,
|
131 |
-
size_query: list,
|
132 |
-
precision_query: list,
|
133 |
show_deleted: bool,
|
134 |
) -> pd.DataFrame:
|
135 |
-
# Show all models
|
136 |
if show_deleted:
|
137 |
filtered_df = df
|
138 |
-
else:
|
139 |
-
filtered_df = df[df[AutoEvalColumn.
|
140 |
-
|
141 |
-
type_emoji = [t[0] for t in type_query]
|
142 |
-
filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
143 |
-
filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin([*precision_query, "None"])]
|
144 |
-
|
145 |
-
numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
|
146 |
-
params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
|
147 |
-
mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
|
148 |
-
filtered_df = filtered_df.loc[mask]
|
149 |
|
150 |
return filtered_df
|
151 |
|
152 |
|
153 |
-
def
|
154 |
-
with gr.
|
155 |
-
gr.
|
156 |
-
|
157 |
-
|
158 |
-
with gr.Tabs(elem_classes="tab-buttons"):
|
159 |
-
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
160 |
with gr.Row():
|
161 |
-
|
162 |
-
with
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
for c in fields(AutoEvalColumn)
|
175 |
-
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
176 |
-
],
|
177 |
-
label="Select columns to show",
|
178 |
-
elem_id="column-select",
|
179 |
-
interactive=True,
|
180 |
-
)
|
181 |
-
with gr.Row():
|
182 |
-
deleted_models_visibility = gr.Checkbox(
|
183 |
-
value=False,
|
184 |
-
label="Show gated/private/deleted models",
|
185 |
-
interactive=True,
|
186 |
-
)
|
187 |
-
with gr.Column(min_width=320):
|
188 |
-
# with gr.Box(elem_id="box-filter"):
|
189 |
-
filter_columns_type = gr.CheckboxGroup(
|
190 |
-
label="Model types",
|
191 |
-
choices=[t.to_str() for t in ModelType],
|
192 |
-
value=[t.to_str() for t in ModelType],
|
193 |
-
interactive=True,
|
194 |
-
elem_id="filter-columns-type",
|
195 |
-
)
|
196 |
-
filter_columns_precision = gr.CheckboxGroup(
|
197 |
-
label="Precision",
|
198 |
-
choices=[i.value.name for i in Precision],
|
199 |
-
value=[i.value.name for i in Precision],
|
200 |
-
interactive=True,
|
201 |
-
elem_id="filter-columns-precision",
|
202 |
-
)
|
203 |
-
filter_columns_size = gr.CheckboxGroup(
|
204 |
-
label="Model sizes (in billions of parameters)",
|
205 |
-
choices=list(NUMERIC_INTERVALS.keys()),
|
206 |
-
value=list(NUMERIC_INTERVALS.keys()),
|
207 |
-
interactive=True,
|
208 |
-
elem_id="filter-columns-size",
|
209 |
-
)
|
210 |
-
|
211 |
-
leaderboard_table = gr.components.Dataframe(
|
212 |
-
value=leaderboard_df[
|
213 |
-
[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value
|
214 |
-
],
|
215 |
-
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
216 |
-
datatype=TYPES,
|
217 |
-
elem_id="leaderboard-table",
|
218 |
-
interactive=False,
|
219 |
-
visible=True,
|
220 |
-
)
|
221 |
-
|
222 |
-
# Dummy leaderboard for handling the case when the user uses backspace key
|
223 |
-
hidden_leaderboard_table_for_search = gr.components.Dataframe(
|
224 |
-
value=original_df[COLS],
|
225 |
-
headers=COLS,
|
226 |
-
datatype=TYPES,
|
227 |
-
visible=False,
|
228 |
-
)
|
229 |
-
search_bar.submit(
|
230 |
-
update_table,
|
231 |
-
[
|
232 |
-
hidden_leaderboard_table_for_search,
|
233 |
-
shown_columns,
|
234 |
-
filter_columns_type,
|
235 |
-
filter_columns_precision,
|
236 |
-
filter_columns_size,
|
237 |
-
deleted_models_visibility,
|
238 |
-
search_bar,
|
239 |
-
],
|
240 |
-
leaderboard_table,
|
241 |
-
)
|
242 |
-
for selector in [
|
243 |
-
shown_columns,
|
244 |
-
filter_columns_type,
|
245 |
-
filter_columns_precision,
|
246 |
-
filter_columns_size,
|
247 |
-
deleted_models_visibility,
|
248 |
-
]:
|
249 |
-
selector.change(
|
250 |
-
update_table,
|
251 |
-
[
|
252 |
-
hidden_leaderboard_table_for_search,
|
253 |
-
shown_columns,
|
254 |
-
filter_columns_type,
|
255 |
-
filter_columns_precision,
|
256 |
-
filter_columns_size,
|
257 |
-
deleted_models_visibility,
|
258 |
-
search_bar,
|
259 |
],
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
)
|
263 |
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
# with gr.Accordion(
|
274 |
-
# f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
275 |
-
# open=False,
|
276 |
-
# ):
|
277 |
-
# with gr.Row():
|
278 |
-
# finished_eval_table = gr.components.Dataframe(
|
279 |
-
# value=finished_eval_queue_df,
|
280 |
-
# headers=EVAL_COLS,
|
281 |
-
# datatype=EVAL_TYPES,
|
282 |
-
# row_count=5,
|
283 |
-
# )
|
284 |
-
# with gr.Accordion(
|
285 |
-
# f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
|
286 |
-
# open=False,
|
287 |
-
# ):
|
288 |
-
# with gr.Row():
|
289 |
-
# running_eval_table = gr.components.Dataframe(
|
290 |
-
# value=running_eval_queue_df,
|
291 |
-
# headers=EVAL_COLS,
|
292 |
-
# datatype=EVAL_TYPES,
|
293 |
-
# row_count=5,
|
294 |
-
# )
|
295 |
-
#
|
296 |
-
# with gr.Accordion(
|
297 |
-
# f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
298 |
-
# open=False,
|
299 |
-
# ):
|
300 |
-
# with gr.Row():
|
301 |
-
# pending_eval_table = gr.components.Dataframe(
|
302 |
-
# value=pending_eval_queue_df,
|
303 |
-
# headers=EVAL_COLS,
|
304 |
-
# datatype=EVAL_TYPES,
|
305 |
-
# row_count=5,
|
306 |
-
# )
|
307 |
-
# with gr.Row():
|
308 |
-
# gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
309 |
-
#
|
310 |
-
# with gr.Row():
|
311 |
-
# with gr.Column():
|
312 |
-
# model_name_textbox = gr.Textbox(label="Model name")
|
313 |
-
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
314 |
-
# model_type = gr.Dropdown(
|
315 |
-
# choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
316 |
-
# label="Model type",
|
317 |
-
# multiselect=False,
|
318 |
-
# value=None,
|
319 |
-
# interactive=True,
|
320 |
-
# )
|
321 |
-
#
|
322 |
-
# with gr.Column():
|
323 |
-
# precision = gr.Dropdown(
|
324 |
-
# choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
325 |
-
# label="Precision",
|
326 |
-
# multiselect=False,
|
327 |
-
# value="float16",
|
328 |
-
# interactive=True,
|
329 |
-
# )
|
330 |
-
# weight_type = gr.Dropdown(
|
331 |
-
# choices=[i.value.name for i in WeightType],
|
332 |
-
# label="Weights type",
|
333 |
-
# multiselect=False,
|
334 |
-
# value="Original",
|
335 |
-
# interactive=True,
|
336 |
-
# )
|
337 |
-
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
338 |
-
#
|
339 |
-
# submit_button = gr.Button("Submit Eval")
|
340 |
-
# submission_result = gr.Markdown()
|
341 |
-
# submit_button.click(
|
342 |
-
# add_new_eval,
|
343 |
-
# [
|
344 |
-
# model_name_textbox,
|
345 |
-
# base_model_name_textbox,
|
346 |
-
# revision_name_textbox,
|
347 |
-
# precision,
|
348 |
-
# weight_type,
|
349 |
-
# model_type,
|
350 |
-
# ],
|
351 |
-
# submission_result,
|
352 |
-
# )
|
353 |
-
#
|
354 |
-
# with gr.Row():
|
355 |
-
# with gr.Accordion("📙 Citation", open=False):
|
356 |
-
# citation_button = gr.Textbox(
|
357 |
-
# value=CITATION_BUTTON_TEXT,
|
358 |
-
# label=CITATION_BUTTON_LABEL,
|
359 |
-
# lines=20,
|
360 |
-
# elem_id="citation-button",
|
361 |
-
# show_copy_button=True,
|
362 |
-
# )
|
363 |
return app
|
364 |
|
365 |
|
366 |
def main():
|
|
|
367 |
app = build_app()
|
368 |
scheduler = BackgroundScheduler()
|
369 |
-
scheduler.add_job(
|
370 |
scheduler.start()
|
371 |
app.queue(default_concurrency_limit=40).launch()
|
372 |
|
|
|
2 |
import pandas as pd
|
3 |
from about import (
|
4 |
INTRODUCTION_TEXT,
|
|
|
5 |
TITLE,
|
6 |
)
|
7 |
from apscheduler.schedulers.background import BackgroundScheduler
|
8 |
from display.css_html_js import custom_css
|
9 |
from display.utils import (
|
|
|
10 |
COLS,
|
|
|
|
|
11 |
TYPES,
|
12 |
AutoEvalColumn,
|
|
|
|
|
13 |
fields,
|
14 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
from parser import update_leaderboard_table
|
17 |
+
from populate import get_leaderboard_df
|
18 |
+
from settings import (
|
19 |
+
get_settings,
|
20 |
+
)
|
21 |
|
22 |
+
settings = get_settings()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
|
26 |
+
def filter_table(
|
|
|
27 |
hidden_df: pd.DataFrame,
|
28 |
columns: list,
|
|
|
|
|
|
|
29 |
show_deleted: bool,
|
30 |
query: str,
|
31 |
+
) -> pd.DataFrame:
|
32 |
+
filtered_df = filter_models(hidden_df, show_deleted)
|
33 |
filtered_df = filter_queries(query, filtered_df)
|
34 |
df = select_columns(filtered_df, columns)
|
35 |
return df
|
|
|
41 |
|
42 |
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
43 |
always_here_cols = [
|
44 |
+
AutoEvalColumn.model.name.lower(),
|
|
|
45 |
]
|
46 |
+
s = [c for c in COLS if c in df.columns and c in columns]
|
47 |
+
filtered_df = df[always_here_cols + s]
|
48 |
return filtered_df
|
49 |
|
50 |
|
|
|
63 |
filtered_df = filtered_df.drop_duplicates(
|
64 |
subset=[
|
65 |
AutoEvalColumn.model.name,
|
|
|
|
|
66 |
]
|
67 |
)
|
|
|
68 |
return filtered_df
|
69 |
|
70 |
|
71 |
def filter_models(
|
72 |
df: pd.DataFrame,
|
|
|
|
|
|
|
73 |
show_deleted: bool,
|
74 |
) -> pd.DataFrame:
|
|
|
75 |
if show_deleted:
|
76 |
filtered_df = df
|
77 |
+
else:
|
78 |
+
filtered_df = df[df[AutoEvalColumn.is_private.name]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
return filtered_df
|
81 |
|
82 |
|
83 |
+
def get_leaderboard() -> gr.TabItem:
|
84 |
+
with gr.TabItem("🏅 Encodechka", elem_id="llm-benchmark-tab-table", id=0) as leaderboard_tab:
|
85 |
+
with gr.Row():
|
86 |
+
with gr.Column():
|
|
|
|
|
|
|
87 |
with gr.Row():
|
88 |
+
search_bar = gr.Textbox(
|
89 |
+
placeholder=" 🔍 Search for your model (separate multiple queries with `;`) "
|
90 |
+
"and press ENTER...",
|
91 |
+
show_label=False,
|
92 |
+
elem_id="search-bar",
|
93 |
+
)
|
94 |
+
with gr.Row():
|
95 |
+
shown_columns = gr.CheckboxGroup(
|
96 |
+
choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
|
97 |
+
value=[
|
98 |
+
c.name
|
99 |
+
for c in fields(AutoEvalColumn)
|
100 |
+
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
],
|
102 |
+
label="Select columns to show",
|
103 |
+
elem_id="column-select",
|
104 |
+
interactive=True,
|
105 |
+
)
|
106 |
+
with gr.Row():
|
107 |
+
private_models_visibility = gr.Checkbox(
|
108 |
+
value=True,
|
109 |
+
label="Show private models",
|
110 |
+
interactive=True,
|
111 |
)
|
112 |
|
113 |
+
leaderboard_table = gr.Dataframe(
|
114 |
+
value=get_leaderboard_df(),
|
115 |
+
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
116 |
+
datatype=TYPES,
|
117 |
+
elem_id="leaderboard-table",
|
118 |
+
interactive=False,
|
119 |
+
visible=True,
|
120 |
+
)
|
121 |
+
|
122 |
+
hidden_leaderboard_table_for_search = gr.Dataframe(
|
123 |
+
value=get_leaderboard_df(),
|
124 |
+
headers=COLS,
|
125 |
+
datatype=TYPES,
|
126 |
+
visible=False,
|
127 |
+
)
|
128 |
+
search_bar.submit(
|
129 |
+
filter_table,
|
130 |
+
[
|
131 |
+
hidden_leaderboard_table_for_search,
|
132 |
+
shown_columns,
|
133 |
+
private_models_visibility,
|
134 |
+
search_bar,
|
135 |
+
],
|
136 |
+
leaderboard_table,
|
137 |
+
)
|
138 |
+
for selector in [
|
139 |
+
shown_columns,
|
140 |
+
private_models_visibility,
|
141 |
+
]:
|
142 |
+
selector.change(
|
143 |
+
filter_table,
|
144 |
+
[
|
145 |
+
hidden_leaderboard_table_for_search,
|
146 |
+
shown_columns,
|
147 |
+
private_models_visibility,
|
148 |
+
search_bar,
|
149 |
+
],
|
150 |
+
leaderboard_table,
|
151 |
+
queue=True,
|
152 |
+
)
|
153 |
+
return leaderboard_tab
|
154 |
|
155 |
+
|
156 |
+
def build_app() -> gr.Blocks:
|
157 |
+
with gr.Blocks(css=custom_css) as app:
|
158 |
+
gr.HTML(TITLE)
|
159 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
160 |
+
get_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
return app
|
162 |
|
163 |
|
164 |
def main():
|
165 |
+
update_leaderboard_table()
|
166 |
app = build_app()
|
167 |
scheduler = BackgroundScheduler()
|
168 |
+
scheduler.add_job(update_leaderboard_table, "interval", days=1)
|
169 |
scheduler.start()
|
170 |
app.queue(default_concurrency_limit=40).launch()
|
171 |
|
src/encodechka/display/formatting.py
CHANGED
@@ -5,7 +5,9 @@ def model_hyperlink(link, model_name):
|
|
5 |
)
|
6 |
|
7 |
|
8 |
-
def make_clickable_model(model_name):
|
|
|
|
|
9 |
link = f"https://huggingface.co/{model_name}"
|
10 |
return model_hyperlink(link, model_name)
|
11 |
|
|
|
5 |
)
|
6 |
|
7 |
|
8 |
+
def make_clickable_model(model_name: str) -> str:
|
9 |
+
if model_name.find("/") == -1:
|
10 |
+
return model_name
|
11 |
link = f"https://huggingface.co/{model_name}"
|
12 |
return model_hyperlink(link, model_name)
|
13 |
|
src/encodechka/display/utils.py
CHANGED
@@ -21,135 +21,37 @@ class ColumnContent:
|
|
21 |
never_hidden: bool = False
|
22 |
|
23 |
|
24 |
-
## Leaderboard columns
|
25 |
auto_eval_column_dict = [
|
26 |
-
(
|
27 |
-
"model_type_symbol",
|
28 |
-
ColumnContent,
|
29 |
-
ColumnContent("T", "str", True, never_hidden=True),
|
30 |
-
),
|
31 |
(
|
32 |
"model",
|
33 |
ColumnContent,
|
34 |
-
ColumnContent("
|
35 |
),
|
36 |
-
]
|
37 |
-
# Scores
|
38 |
-
auto_eval_column_dict.append(("average", ColumnContent, ColumnContent("Average ⬆️", "number", True)))
|
39 |
-
for task in Tasks:
|
40 |
-
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
41 |
-
# Model information
|
42 |
-
auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False)))
|
43 |
-
auto_eval_column_dict.append(("architecture", ColumnContent, ColumnContent("Architecture", "str", False)))
|
44 |
-
auto_eval_column_dict.append(("weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)))
|
45 |
-
auto_eval_column_dict.append(("precision", ColumnContent, ColumnContent("Precision", "str", False)))
|
46 |
-
auto_eval_column_dict.append(("license", ColumnContent, ColumnContent("Hub License", "str", False)))
|
47 |
-
auto_eval_column_dict.append(("params", ColumnContent, ColumnContent("#Params (B)", "number", False)))
|
48 |
-
auto_eval_column_dict.append(("likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)))
|
49 |
-
auto_eval_column_dict.append(
|
50 |
(
|
51 |
-
"
|
52 |
-
|
53 |
-
|
54 |
-
)
|
55 |
-
)
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# We use make dataclass to dynamically fill the scores from Tasks
|
59 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
60 |
|
61 |
-
|
62 |
-
## For the queue columns in the submission tab
|
63 |
-
@dataclass(frozen=True)
|
64 |
-
class EvalQueueColumn: # Queue column
|
65 |
-
model = ColumnContent("model", "markdown", True)
|
66 |
-
revision = ColumnContent("revision", "str", True)
|
67 |
-
private = ColumnContent("private", "bool", True)
|
68 |
-
precision = ColumnContent("precision", "str", True)
|
69 |
-
weight_type = ColumnContent("weight_type", "str", "Original")
|
70 |
-
status = ColumnContent("status", "str", True)
|
71 |
-
|
72 |
-
|
73 |
-
## All the model information that we might need
|
74 |
-
@dataclass
|
75 |
-
class ModelDetails:
|
76 |
-
name: str
|
77 |
-
display_name: str = ""
|
78 |
-
symbol: str = "" # emoji
|
79 |
-
|
80 |
-
|
81 |
-
class ModelType(Enum):
|
82 |
-
PT = ModelDetails(name="pretrained", symbol="🟢")
|
83 |
-
FT = ModelDetails(name="fine-tuned", symbol="🔶")
|
84 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
85 |
-
RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
86 |
-
Unknown = ModelDetails(name="", symbol="?")
|
87 |
-
|
88 |
-
def to_str(self, separator=" "):
|
89 |
-
return f"{self.value.symbol}{separator}{self.value.name}"
|
90 |
-
|
91 |
-
@staticmethod
|
92 |
-
def from_str(type):
|
93 |
-
if "fine-tuned" in type or "🔶" in type:
|
94 |
-
return ModelType.FT
|
95 |
-
if "pretrained" in type or "🟢" in type:
|
96 |
-
return ModelType.PT
|
97 |
-
if "RL-tuned" in type or "🟦" in type:
|
98 |
-
return ModelType.RL
|
99 |
-
if "instruction-tuned" in type or "⭕" in type:
|
100 |
-
return ModelType.IFT
|
101 |
-
return ModelType.Unknown
|
102 |
-
|
103 |
-
|
104 |
-
class WeightType(Enum):
|
105 |
-
Adapter = ModelDetails("Adapter")
|
106 |
-
Original = ModelDetails("Original")
|
107 |
-
Delta = ModelDetails("Delta")
|
108 |
-
|
109 |
-
|
110 |
-
class Precision(Enum):
|
111 |
-
float16 = ModelDetails("float16")
|
112 |
-
bfloat16 = ModelDetails("bfloat16")
|
113 |
-
float32 = ModelDetails("float32")
|
114 |
-
# qt_8bit = ModelDetails("8bit")
|
115 |
-
# qt_4bit = ModelDetails("4bit")
|
116 |
-
# qt_GPTQ = ModelDetails("GPTQ")
|
117 |
-
Unknown = ModelDetails("?")
|
118 |
-
|
119 |
-
def from_str(precision):
|
120 |
-
if precision in ["torch.float16", "float16"]:
|
121 |
-
return Precision.float16
|
122 |
-
if precision in ["torch.bfloat16", "bfloat16"]:
|
123 |
-
return Precision.bfloat16
|
124 |
-
if precision in ["float32"]:
|
125 |
-
return Precision.float32
|
126 |
-
# if precision in ["8bit"]:
|
127 |
-
# return Precision.qt_8bit
|
128 |
-
# if precision in ["4bit"]:
|
129 |
-
# return Precision.qt_4bit
|
130 |
-
# if precision in ["GPTQ", "None"]:
|
131 |
-
# return Precision.qt_GPTQ
|
132 |
-
return Precision.Unknown
|
133 |
-
|
134 |
-
|
135 |
-
# Column selection
|
136 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
137 |
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
|
138 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
139 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
140 |
|
141 |
-
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
142 |
-
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
143 |
-
|
144 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
145 |
-
|
146 |
-
NUMERIC_INTERVALS = {
|
147 |
-
"?": pd.Interval(-1, 0, closed="right"),
|
148 |
-
"~1.5": pd.Interval(0, 2, closed="right"),
|
149 |
-
"~3": pd.Interval(2, 4, closed="right"),
|
150 |
-
"~7": pd.Interval(4, 9, closed="right"),
|
151 |
-
"~13": pd.Interval(9, 20, closed="right"),
|
152 |
-
"~35": pd.Interval(20, 45, closed="right"),
|
153 |
-
"~60": pd.Interval(45, 70, closed="right"),
|
154 |
-
"70+": pd.Interval(70, 10000, closed="right"),
|
155 |
-
}
|
|
|
21 |
never_hidden: bool = False
|
22 |
|
23 |
|
|
|
24 |
auto_eval_column_dict = [
|
|
|
|
|
|
|
|
|
|
|
25 |
(
|
26 |
"model",
|
27 |
ColumnContent,
|
28 |
+
ColumnContent("model", "markdown", True, never_hidden=True),
|
29 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
(
|
31 |
+
"CPU", ColumnContent, ColumnContent("CPU", "number", True)
|
32 |
+
), ("GPU", ColumnContent, ColumnContent("GPU", "number", True)),
|
33 |
+
("size", ColumnContent, ColumnContent("size", "number", True)),
|
34 |
+
("MeanS", ColumnContent, ColumnContent("Mean S", "number", True)),
|
35 |
+
("MeanSW", ColumnContent, ColumnContent("Mean S+W", "number", True)),
|
36 |
+
("dim", ColumnContent, ColumnContent("dim", "number", True)),
|
37 |
+
("STS", ColumnContent, ColumnContent("STS", "number", True)),
|
38 |
+
("PI", ColumnContent, ColumnContent("PI", "number", True)),
|
39 |
+
("NLI", ColumnContent, ColumnContent("NLI", "number", True)),
|
40 |
+
("SA", ColumnContent, ColumnContent("SA", "number", True)),
|
41 |
+
("TI", ColumnContent, ColumnContent("TI", "number", True)),
|
42 |
+
("II", ColumnContent, ColumnContent("II", "number", True)),
|
43 |
+
("IC", ColumnContent, ColumnContent("IC", "number", True)),
|
44 |
+
("ICX", ColumnContent, ColumnContent("ICX", "number", True)),
|
45 |
+
("NE1", ColumnContent, ColumnContent("NE1", "number", True)),
|
46 |
+
("NE2", ColumnContent, ColumnContent("NE2", "number", True)),
|
47 |
+
("is_private", ColumnContent, ColumnContent("is_private", "boolean", True, hidden=True)),
|
48 |
+
]
|
49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
50 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
53 |
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
|
54 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
55 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
56 |
|
|
|
|
|
|
|
57 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/encodechka/envs.py
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
|
3 |
-
from huggingface_hub import HfApi
|
4 |
-
|
5 |
-
# Info to change for your repository
|
6 |
-
# ----------------------------------
|
7 |
-
TOKEN = os.environ.get("TOKEN") # A read/write token for your org
|
8 |
-
|
9 |
-
OWNER = "demo-leaderboard-backend"
|
10 |
-
# ----------------------------------
|
11 |
-
|
12 |
-
REPO_ID = f"{OWNER}/leaderboard"
|
13 |
-
QUEUE_REPO = f"{OWNER}/requests"
|
14 |
-
RESULTS_REPO = f"{OWNER}/results"
|
15 |
-
|
16 |
-
# If you setup a cache later, just change HF_HOME
|
17 |
-
CACHE_PATH = os.getenv("HF_HOME", ".")
|
18 |
-
|
19 |
-
# Local caches
|
20 |
-
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
21 |
-
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
22 |
-
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
23 |
-
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
24 |
-
|
25 |
-
API = HfApi(token=TOKEN)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/encodechka/leaderboard/__init__.py
DELETED
File without changes
|
src/encodechka/leaderboard/read_evals.py
DELETED
@@ -1,193 +0,0 @@
|
|
1 |
-
import glob
|
2 |
-
import json
|
3 |
-
import os
|
4 |
-
from dataclasses import dataclass
|
5 |
-
|
6 |
-
import dateutil
|
7 |
-
import numpy as np
|
8 |
-
from display.formatting import make_clickable_model
|
9 |
-
from display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
|
10 |
-
|
11 |
-
|
12 |
-
@dataclass
|
13 |
-
class EvalResult:
|
14 |
-
"""Represents one full evaluation. Built from a combination of the result and request file for a given run."""
|
15 |
-
|
16 |
-
eval_name: str # org_model_precision (uid)
|
17 |
-
full_model: str # org/model (path on hub)
|
18 |
-
org: str
|
19 |
-
model: str
|
20 |
-
revision: str # commit hash, "" if main
|
21 |
-
results: dict
|
22 |
-
precision: Precision = Precision.Unknown
|
23 |
-
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
24 |
-
weight_type: WeightType = WeightType.Original # Original or Adapter
|
25 |
-
architecture: str = "Unknown"
|
26 |
-
license: str = "?"
|
27 |
-
likes: int = 0
|
28 |
-
num_params: int = 0
|
29 |
-
date: str = "" # submission date of request file
|
30 |
-
still_on_hub: bool = False
|
31 |
-
|
32 |
-
@classmethod
|
33 |
-
def init_from_json_file(self, json_filepath):
|
34 |
-
"""Inits the result from the specific model result file"""
|
35 |
-
with open(json_filepath) as fp:
|
36 |
-
data = json.load(fp)
|
37 |
-
|
38 |
-
config = data.get("config")
|
39 |
-
|
40 |
-
# Precision
|
41 |
-
precision = Precision.from_str(config.get("model_dtype"))
|
42 |
-
|
43 |
-
# Get model and org
|
44 |
-
org_and_model = config.get("model_name", config.get("model_args", None))
|
45 |
-
org_and_model = org_and_model.split("/", 1)
|
46 |
-
|
47 |
-
if len(org_and_model) == 1:
|
48 |
-
org = None
|
49 |
-
model = org_and_model[0]
|
50 |
-
result_key = f"{model}_{precision.value.name}"
|
51 |
-
else:
|
52 |
-
org = org_and_model[0]
|
53 |
-
model = org_and_model[1]
|
54 |
-
result_key = f"{org}_{model}_{precision.value.name}"
|
55 |
-
full_model = "/".join(org_and_model)
|
56 |
-
|
57 |
-
# still_on_hub, _, model_config = is_model_on_hub(
|
58 |
-
# full_model,
|
59 |
-
# config.get("model_sha", "main"),
|
60 |
-
# trust_remote_code=True,
|
61 |
-
# test_tokenizer=False,
|
62 |
-
# )
|
63 |
-
# architecture = "?"
|
64 |
-
# if model_config is not None:
|
65 |
-
# architectures = getattr(model_config, "architectures", None)
|
66 |
-
# if architectures:
|
67 |
-
# architecture = ";".join(architectures)
|
68 |
-
|
69 |
-
# Extract results available in this file (some results are split in several files)
|
70 |
-
results = {}
|
71 |
-
for task in Tasks:
|
72 |
-
task = task.value
|
73 |
-
|
74 |
-
# We average all scores of a given metric (not all metrics are present in all files)
|
75 |
-
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
76 |
-
if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
-
continue
|
78 |
-
|
79 |
-
mean_acc = np.mean(accs) * 100.0
|
80 |
-
results[task.benchmark] = mean_acc
|
81 |
-
|
82 |
-
return self(
|
83 |
-
eval_name=result_key,
|
84 |
-
full_model=full_model,
|
85 |
-
org=org,
|
86 |
-
model=model,
|
87 |
-
results=results,
|
88 |
-
precision=precision,
|
89 |
-
revision=config.get("model_sha", ""),
|
90 |
-
# still_on_hub=still_on_hub,
|
91 |
-
# architecture=architecture,
|
92 |
-
)
|
93 |
-
|
94 |
-
def update_with_request_file(self, requests_path):
|
95 |
-
"""Finds the relevant request file for the current model and updates info with it"""
|
96 |
-
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
97 |
-
|
98 |
-
try:
|
99 |
-
with open(request_file) as f:
|
100 |
-
request = json.load(f)
|
101 |
-
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
102 |
-
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
103 |
-
self.license = request.get("license", "?")
|
104 |
-
self.likes = request.get("likes", 0)
|
105 |
-
self.num_params = request.get("params", 0)
|
106 |
-
self.date = request.get("submitted_time", "")
|
107 |
-
except Exception:
|
108 |
-
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
109 |
-
|
110 |
-
def to_dict(self):
|
111 |
-
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
-
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
113 |
-
data_dict = {
|
114 |
-
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
-
AutoEvalColumn.precision.name: self.precision.value.name,
|
116 |
-
AutoEvalColumn.model_type.name: self.model_type.value.name,
|
117 |
-
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
118 |
-
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
119 |
-
AutoEvalColumn.architecture.name: self.architecture,
|
120 |
-
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
121 |
-
AutoEvalColumn.revision.name: self.revision,
|
122 |
-
AutoEvalColumn.average.name: average,
|
123 |
-
AutoEvalColumn.license.name: self.license,
|
124 |
-
AutoEvalColumn.likes.name: self.likes,
|
125 |
-
AutoEvalColumn.params.name: self.num_params,
|
126 |
-
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
127 |
-
}
|
128 |
-
|
129 |
-
for task in Tasks:
|
130 |
-
data_dict[task.value.col_name] = self.results[task.value.benchmark]
|
131 |
-
|
132 |
-
return data_dict
|
133 |
-
|
134 |
-
|
135 |
-
def get_request_file_for_model(requests_path, model_name, precision):
|
136 |
-
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
137 |
-
request_files = os.path.join(
|
138 |
-
requests_path,
|
139 |
-
f"{model_name}_eval_request_*.json",
|
140 |
-
)
|
141 |
-
request_files = glob.glob(request_files)
|
142 |
-
|
143 |
-
# Select correct request file (precision)
|
144 |
-
request_file = ""
|
145 |
-
request_files = sorted(request_files, reverse=True)
|
146 |
-
for tmp_request_file in request_files:
|
147 |
-
with open(tmp_request_file) as f:
|
148 |
-
req_content = json.load(f)
|
149 |
-
if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
|
150 |
-
request_file = tmp_request_file
|
151 |
-
return request_file
|
152 |
-
|
153 |
-
|
154 |
-
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
155 |
-
"""From the path of the results folder root, extract all needed info for results"""
|
156 |
-
model_result_filepaths = []
|
157 |
-
|
158 |
-
for root, _, files in os.walk(results_path):
|
159 |
-
# We should only have json files in model results
|
160 |
-
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
161 |
-
continue
|
162 |
-
|
163 |
-
# Sort the files by date
|
164 |
-
try:
|
165 |
-
files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
|
166 |
-
except dateutil.parser._parser.ParserError:
|
167 |
-
files = [files[-1]]
|
168 |
-
|
169 |
-
for file in files:
|
170 |
-
model_result_filepaths.append(os.path.join(root, file))
|
171 |
-
|
172 |
-
eval_results = {}
|
173 |
-
for model_result_filepath in model_result_filepaths:
|
174 |
-
# Creation of result
|
175 |
-
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
176 |
-
eval_result.update_with_request_file(requests_path)
|
177 |
-
|
178 |
-
# Store results of same eval together
|
179 |
-
eval_name = eval_result.eval_name
|
180 |
-
if eval_name in eval_results.keys():
|
181 |
-
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
182 |
-
else:
|
183 |
-
eval_results[eval_name] = eval_result
|
184 |
-
|
185 |
-
results = []
|
186 |
-
for v in eval_results.values():
|
187 |
-
try:
|
188 |
-
v.to_dict() # we test if the dict version is complete
|
189 |
-
results.append(v)
|
190 |
-
except KeyError: # not all eval values present
|
191 |
-
continue
|
192 |
-
|
193 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/encodechka/parser.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from io import StringIO
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
+
import markdown
|
5 |
+
import requests
|
6 |
+
from settings import get_settings
|
7 |
+
|
8 |
+
|
9 |
+
def get_readme() -> str:
|
10 |
+
url = "https://raw.githubusercontent.com/avidale/encodechka/master/README.md"
|
11 |
+
response = requests.get(url)
|
12 |
+
return response.text
|
13 |
+
|
14 |
+
|
15 |
+
def get_readme_html() -> str:
|
16 |
+
return markdown.markdown(get_readme(), extensions=['tables'])
|
17 |
+
|
18 |
+
|
19 |
+
def get_readme_df() -> pd.DataFrame:
|
20 |
+
performance, leaderboard = pd.read_html(StringIO(get_readme_html()))
|
21 |
+
performance = performance.set_index("model")
|
22 |
+
leaderboard = leaderboard.set_index("model")
|
23 |
+
df = pd.concat([performance, leaderboard], axis=1)
|
24 |
+
return df
|
25 |
+
|
26 |
+
|
27 |
+
def update_leaderboard_table() -> None:
|
28 |
+
df = get_readme_df()
|
29 |
+
df.to_csv(get_settings().LEADERBOARD_FILE_PATH)
|
src/encodechka/populate.py
CHANGED
@@ -1,60 +1,13 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
from typing import Any
|
4 |
-
|
5 |
import pandas as pd
|
6 |
-
from display.formatting import has_no_nan_values, make_clickable_model
|
7 |
-
from display.utils import AutoEvalColumn, EvalQueueColumn
|
8 |
-
from leaderboard.read_evals import EvalResult, get_raw_eval_results
|
9 |
-
|
10 |
-
|
11 |
-
def get_leaderboard_df(
|
12 |
-
results_path: str, requests_path: str, cols: list, benchmark_cols: list
|
13 |
-
) -> tuple[list[EvalResult], Any]:
|
14 |
-
"""Creates a dataframe from all the individual experiment results"""
|
15 |
-
raw_data = get_raw_eval_results(results_path, requests_path)
|
16 |
-
all_data_json = [v.to_dict() for v in raw_data]
|
17 |
-
|
18 |
-
df = pd.DataFrame.from_records(all_data_json)
|
19 |
-
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
20 |
-
df = df[cols].round(decimals=2)
|
21 |
-
|
22 |
-
# filter out if any of the benchmarks have not been produced
|
23 |
-
df = df[has_no_nan_values(df, benchmark_cols)]
|
24 |
-
return raw_data, df
|
25 |
|
|
|
|
|
|
|
26 |
|
27 |
-
def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
28 |
-
"""Creates the different dataframes for the evaluation queues requestes"""
|
29 |
-
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
30 |
-
all_evals = []
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
39 |
-
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
40 |
-
|
41 |
-
all_evals.append(data)
|
42 |
-
elif ".md" not in entry:
|
43 |
-
# this is a folder
|
44 |
-
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
|
45 |
-
for sub_entry in sub_entries:
|
46 |
-
file_path = os.path.join(save_path, entry, sub_entry)
|
47 |
-
with open(file_path) as fp:
|
48 |
-
data = json.load(fp)
|
49 |
-
|
50 |
-
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
51 |
-
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
52 |
-
all_evals.append(data)
|
53 |
-
|
54 |
-
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
55 |
-
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
56 |
-
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
57 |
-
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
58 |
-
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
59 |
-
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
|
60 |
-
return df_finished[cols], df_running[cols], df_pending[cols]
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
from display.formatting import make_clickable_model
|
4 |
+
from display.utils import AutoEvalColumn
|
5 |
+
from settings import Settings
|
6 |
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
def get_leaderboard_df() -> pd.DataFrame:
|
9 |
+
"""Creates a dataframe from all the individual experiment results"""
|
10 |
+
df = pd.read_csv(Settings().LEADERBOARD_FILE_PATH).sort_values(by="STS", ascending=False)
|
11 |
+
df[AutoEvalColumn.is_private.name] = df["model"].apply(lambda x: "/" in x)
|
12 |
+
df["model"] = df["model"].apply(make_clickable_model)
|
13 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/encodechka/settings.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from huggingface_hub import HfApi
|
4 |
+
from pydantic_settings import BaseSettings
|
5 |
+
|
6 |
+
|
7 |
+
class Settings(BaseSettings):
|
8 |
+
# TOKEN: str
|
9 |
+
OWNER: str = "Samoed"
|
10 |
+
REPO_ID: str = f"{OWNER}/Encodechka"
|
11 |
+
QUEUE_REPO: str = f"{OWNER}/requests"
|
12 |
+
RESULTS_REPO: str = f"{OWNER}/results"
|
13 |
+
CACHE_PATH: str = "."
|
14 |
+
EVAL_REQUESTS_PATH: str = os.path.join(CACHE_PATH, "eval-queue")
|
15 |
+
EVAL_RESULTS_PATH: str = os.path.join(CACHE_PATH, "eval-results")
|
16 |
+
EVAL_REQUESTS_PATH_BACKEND: str = os.path.join(CACHE_PATH, "eval-queue-bk")
|
17 |
+
EVAL_RESULTS_PATH_BACKEND: str = os.path.join(CACHE_PATH, "eval-results-bk")
|
18 |
+
ENCODECHKA_URL: str = "https://raw.githubusercontent.com/avidale/encodechka/master/README.md"
|
19 |
+
LEADERBOARD_FILE_PATH: str = os.path.join(CACHE_PATH, "leaderboard.csv")
|
20 |
+
|
21 |
+
|
22 |
+
def get_settings():
|
23 |
+
return Settings()
|
24 |
+
|
25 |
+
|
26 |
+
# API = HfApi(token=get_settings().TOKEN)
|
src/encodechka/submission/__init__.py
DELETED
File without changes
|
src/encodechka/submission/check_validity.py
DELETED
@@ -1,131 +0,0 @@
|
|
1 |
-
# import json
|
2 |
-
# import os
|
3 |
-
# from collections import defaultdict
|
4 |
-
#
|
5 |
-
# import huggingface_hub
|
6 |
-
# from huggingface_hub import ModelCard
|
7 |
-
# from huggingface_hub.hf_api import ModelInfo
|
8 |
-
# from transformers import AutoConfig
|
9 |
-
# from transformers.models.auto.tokenization_auto import AutoTokenizer
|
10 |
-
#
|
11 |
-
#
|
12 |
-
# def check_model_card(repo_id: str) -> tuple[bool, str]:
|
13 |
-
# """Checks if the model card and license exist and have been filled"""
|
14 |
-
# try:
|
15 |
-
# card = ModelCard.load(repo_id)
|
16 |
-
# except huggingface_hub.utils.EntryNotFoundError:
|
17 |
-
# return (
|
18 |
-
# False,
|
19 |
-
# "Please add a model card to your model to explain how you trained/fine-tuned it.",
|
20 |
-
# )
|
21 |
-
#
|
22 |
-
# # Enforce license metadata
|
23 |
-
# if card.data.license is None:
|
24 |
-
# if not ("license_name" in card.data and "license_link" in card.data):
|
25 |
-
# return False, (
|
26 |
-
# "License not found. Please add a license to your model card using the `license` metadata or a"
|
27 |
-
# " `license_name`/`license_link` pair."
|
28 |
-
# )
|
29 |
-
#
|
30 |
-
# # Enforce card content
|
31 |
-
# if len(card.text) < 200:
|
32 |
-
# return False, "Please add a description to your model card, it is too short."
|
33 |
-
#
|
34 |
-
# return True, ""
|
35 |
-
#
|
36 |
-
#
|
37 |
-
def is_model_on_hub(
|
38 |
-
model_name: str,
|
39 |
-
revision: str,
|
40 |
-
token: str | None = None,
|
41 |
-
trust_remote_code=False,
|
42 |
-
test_tokenizer=False,
|
43 |
-
) -> tuple[bool, str]:
|
44 |
-
"""Checks if the model model_name is on the hub,
|
45 |
-
and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
46 |
-
raise NotImplementedError("Replace with huggingface_hub API")
|
47 |
-
# try:
|
48 |
-
# config = AutoConfig.from_pretrained(
|
49 |
-
# model_name,
|
50 |
-
# revision=revision,
|
51 |
-
# trust_remote_code=trust_remote_code,
|
52 |
-
# token=token,
|
53 |
-
# )
|
54 |
-
# if test_tokenizer:
|
55 |
-
# try:
|
56 |
-
# tk = AutoTokenizer.from_pretrained(
|
57 |
-
# model_name,
|
58 |
-
# revision=revision,
|
59 |
-
# trust_remote_code=trust_remote_code,
|
60 |
-
# token=token,
|
61 |
-
# )
|
62 |
-
# except ValueError as e:
|
63 |
-
# return (
|
64 |
-
# False,
|
65 |
-
# f"uses a tokenizer which is not in a transformers release: {e}",
|
66 |
-
# None,
|
67 |
-
# )
|
68 |
-
# except Exception:
|
69 |
-
# return (
|
70 |
-
# False,
|
71 |
-
# "'s tokenizer cannot be loaded. Is your tokenizer class in a
|
72 |
-
# stable transformers release, and correctly configured?",
|
73 |
-
# None,
|
74 |
-
# )
|
75 |
-
# return True, None, config
|
76 |
-
#
|
77 |
-
# except ValueError:
|
78 |
-
# return (
|
79 |
-
# False,
|
80 |
-
# "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow
|
81 |
-
# these models to be automatically submitted to the leaderboard.",
|
82 |
-
# None,
|
83 |
-
# )
|
84 |
-
#
|
85 |
-
# except Exception:
|
86 |
-
# return False, "was not found on hub!", None
|
87 |
-
|
88 |
-
|
89 |
-
#
|
90 |
-
#
|
91 |
-
# def get_model_size(model_info: ModelInfo, precision: str):
|
92 |
-
# """Gets the model size from the configuration, or the model name if the
|
93 |
-
# configuration does not contain the information."""
|
94 |
-
# try:
|
95 |
-
# model_size = round(model_info.safetensors["total"] / 1e9, 3)
|
96 |
-
# except (AttributeError, TypeError):
|
97 |
-
# return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
98 |
-
#
|
99 |
-
# size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
|
100 |
-
# model_size = size_factor * model_size
|
101 |
-
# return model_size
|
102 |
-
#
|
103 |
-
#
|
104 |
-
# def get_model_arch(model_info: ModelInfo):
|
105 |
-
# """Gets the model architecture from the configuration"""
|
106 |
-
# return model_info.config.get("architectures", "Unknown")
|
107 |
-
#
|
108 |
-
#
|
109 |
-
# def already_submitted_models(requested_models_dir: str) -> set[str]:
|
110 |
-
# """Gather a list of already submitted models to avoid duplicates"""
|
111 |
-
# depth = 1
|
112 |
-
# file_names = []
|
113 |
-
# users_to_submission_dates = defaultdict(list)
|
114 |
-
#
|
115 |
-
# for root, _, files in os.walk(requested_models_dir):
|
116 |
-
# current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
|
117 |
-
# if current_depth == depth:
|
118 |
-
# for file in files:
|
119 |
-
# if not file.endswith(".json"):
|
120 |
-
# continue
|
121 |
-
# with open(os.path.join(root, file)) as f:
|
122 |
-
# info = json.load(f)
|
123 |
-
# file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
|
124 |
-
#
|
125 |
-
# # Select organisation
|
126 |
-
# if info["model"].count("/") == 0 or "submitted_time" not in info:
|
127 |
-
# continue
|
128 |
-
# organisation, _ = info["model"].split("/")
|
129 |
-
# users_to_submission_dates[organisation].append(info["submitted_time"])
|
130 |
-
#
|
131 |
-
# return set(file_names), users_to_submission_dates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/encodechka/submission/submit.py
DELETED
@@ -1,125 +0,0 @@
|
|
1 |
-
# import json
|
2 |
-
# import os
|
3 |
-
# from datetime import datetime, timezone
|
4 |
-
#
|
5 |
-
# from ..display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
# from ..envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
|
7 |
-
# from .check_validity import (
|
8 |
-
# already_submitted_models,
|
9 |
-
# check_model_card,
|
10 |
-
# get_model_size,
|
11 |
-
# is_model_on_hub,
|
12 |
-
# )
|
13 |
-
#
|
14 |
-
# REQUESTED_MODELS = None
|
15 |
-
# USERS_TO_SUBMISSION_DATES = None
|
16 |
-
#
|
17 |
-
#
|
18 |
-
# def add_new_eval(
|
19 |
-
# model: str,
|
20 |
-
# base_model: str,
|
21 |
-
# revision: str,
|
22 |
-
# precision: str,
|
23 |
-
# weight_type: str,
|
24 |
-
# model_type: str,
|
25 |
-
# ):
|
26 |
-
# global REQUESTED_MODELS
|
27 |
-
# global USERS_TO_SUBMISSION_DATES
|
28 |
-
# if not REQUESTED_MODELS:
|
29 |
-
# REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
|
30 |
-
#
|
31 |
-
# user_name = ""
|
32 |
-
# model_path = model
|
33 |
-
# if "/" in model:
|
34 |
-
# user_name = model.split("/")[0]
|
35 |
-
# model_path = model.split("/")[1]
|
36 |
-
#
|
37 |
-
# precision = precision.split(" ")[0]
|
38 |
-
# current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
39 |
-
#
|
40 |
-
# if model_type is None or model_type == "":
|
41 |
-
# return styled_error("Please select a model type.")
|
42 |
-
#
|
43 |
-
# # Does the model actually exist?
|
44 |
-
# if revision == "":
|
45 |
-
# revision = "main"
|
46 |
-
#
|
47 |
-
# # Is the model on the hub?
|
48 |
-
# if weight_type in ["Delta", "Adapter"]:
|
49 |
-
# base_model_on_hub, error, _ = is_model_on_hub(
|
50 |
-
# model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
|
51 |
-
# )
|
52 |
-
# if not base_model_on_hub:
|
53 |
-
# return styled_error(f'Base model "{base_model}" {error}')
|
54 |
-
#
|
55 |
-
# if not weight_type == "Adapter":
|
56 |
-
# model_on_hub, error, _ = is_model_on_hub(
|
57 |
-
# model_name=model, revision=revision, token=TOKEN, test_tokenizer=True
|
58 |
-
# )
|
59 |
-
# if not model_on_hub:
|
60 |
-
# return styled_error(f'Model "{model}" {error}')
|
61 |
-
#
|
62 |
-
# # Is the model info correctly filled?
|
63 |
-
# try:
|
64 |
-
# model_info = API.model_info(repo_id=model, revision=revision)
|
65 |
-
# except Exception:
|
66 |
-
# return styled_error("Could not get your model information. Please fill it up properly.")
|
67 |
-
#
|
68 |
-
# model_size = get_model_size(model_info=model_info, precision=precision)
|
69 |
-
#
|
70 |
-
# # Were the model card and license filled?
|
71 |
-
# try:
|
72 |
-
# license = model_info.cardData["license"]
|
73 |
-
# except Exception:
|
74 |
-
# return styled_error("Please select a license for your model")
|
75 |
-
#
|
76 |
-
# modelcard_OK, error_msg = check_model_card(model)
|
77 |
-
# if not modelcard_OK:
|
78 |
-
# return styled_error(error_msg)
|
79 |
-
#
|
80 |
-
# # Seems good, creating the eval
|
81 |
-
# print("Adding new eval")
|
82 |
-
#
|
83 |
-
# eval_entry = {
|
84 |
-
# "model": model,
|
85 |
-
# "base_model": base_model,
|
86 |
-
# "revision": revision,
|
87 |
-
# "precision": precision,
|
88 |
-
# "weight_type": weight_type,
|
89 |
-
# "status": "PENDING",
|
90 |
-
# "submitted_time": current_time,
|
91 |
-
# "model_type": model_type,
|
92 |
-
# "likes": model_info.likes,
|
93 |
-
# "params": model_size,
|
94 |
-
# "license": license,
|
95 |
-
# "private": False,
|
96 |
-
# }
|
97 |
-
#
|
98 |
-
# # Check for duplicate submission
|
99 |
-
# if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
|
100 |
-
# return styled_warning("This model has been already submitted.")
|
101 |
-
#
|
102 |
-
# print("Creating eval file")
|
103 |
-
# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
104 |
-
# os.makedirs(OUT_DIR, exist_ok=True)
|
105 |
-
# out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
|
106 |
-
#
|
107 |
-
# with open(out_path, "w") as f:
|
108 |
-
# f.write(json.dumps(eval_entry))
|
109 |
-
#
|
110 |
-
# print("Uploading eval file")
|
111 |
-
# API.upload_file(
|
112 |
-
# path_or_fileobj=out_path,
|
113 |
-
# path_in_repo=out_path.split("eval-queue/")[1],
|
114 |
-
# repo_id=QUEUE_REPO,
|
115 |
-
# repo_type="dataset",
|
116 |
-
# commit_message=f"Add {model} to eval queue",
|
117 |
-
# )
|
118 |
-
#
|
119 |
-
# # Remove the local file
|
120 |
-
# os.remove(out_path)
|
121 |
-
#
|
122 |
-
# return styled_message(
|
123 |
-
# "Your request has been submitted to the evaluation queue!\n
|
124 |
-
# Please wait for up to an hour for the model to show in the PENDING list."
|
125 |
-
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/cassettes/test_parser.yaml
ADDED
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
interactions:
|
2 |
+
- request:
|
3 |
+
body: null
|
4 |
+
headers:
|
5 |
+
Accept:
|
6 |
+
- '*/*'
|
7 |
+
Accept-Encoding:
|
8 |
+
- gzip, deflate
|
9 |
+
Connection:
|
10 |
+
- keep-alive
|
11 |
+
User-Agent:
|
12 |
+
- python-requests/2.32.3
|
13 |
+
method: GET
|
14 |
+
uri: https://raw.githubusercontent.com/avidale/encodechka/master/README.md
|
15 |
+
response:
|
16 |
+
body:
|
17 |
+
string: "# encodechka\n## encodechka-eval\n\n\u042D\u0442\u043E\u0442 \u0440\u0435\u043F\u043E\u0437\u0438\u0442\u043E\u0440\u0438\u0439
|
18 |
+
- \u0440\u0430\u0437\u0432\u0438\u0442\u0438\u0435 \u043F\u043E\u0434\u0445\u043E\u0434\u0430
|
19 |
+
\u043A \u043E\u0446\u0435\u043D\u043A\u0435 \u043C\u043E\u0434\u0435\u043B\u0435\u0439
|
20 |
+
\u0438\u0437 \u043F\u043E\u0441\u0442\u0430\n[\u041C\u0430\u043B\u0435\u043D\u044C\u043A\u0438\u0439
|
21 |
+
\u0438 \u0431\u044B\u0441\u0442\u0440\u044B\u0439 BERT \u0434\u043B\u044F
|
22 |
+
\u0440\u0443\u0441\u0441\u043A\u043E\u0433\u043E \u044F\u0437\u044B\u043A\u0430](https://habr.com/ru/post/562064),
|
23 |
+
\n\u044D\u0432\u043E\u043B\u044E\u0446\u0438\u043E\u043D\u0438\u0440\u043E\u0432\u0430\u0432\u0448\u0435\u0433\u043E
|
24 |
+
\u0432 [\u0420\u0435\u0439\u0442\u0438\u043D\u0433 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u044B\u0445
|
25 |
+
\u044D\u043D\u043A\u043E\u0434\u0435\u0440\u043E\u0432 \u043F\u0440\u0435\u0434\u043B\u043E\u0436\u0435\u043D\u0438\u0439](https://habr.com/ru/post/669674/).\n\u0418\u0434\u0435\u044F
|
26 |
+
\u0432 \u0442\u043E\u043C, \u0447\u0442\u043E\u0431\u044B \u043F\u043E\u043D\u044F\u0442\u044C,
|
27 |
+
\u043A\u0430\u043A \u0445\u043E\u0440\u043E\u0448\u043E \u0440\u0430\u0437\u043D\u044B\u0435
|
28 |
+
\u043C\u043E\u0434\u0435\u043B\u0438 \u043F\u0440\u0435\u0432\u0440\u0430\u0449\u0430\u044E\u0442
|
29 |
+
\u043A\u043E\u0440\u043E\u0442\u043A\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B\n\u0432
|
30 |
+
\u043E\u0441\u043C\u044B\u0441\u043B\u0435\u043D\u043D\u044B\u0435 \u0432\u0435\u043A\u0442\u043E\u0440\u044B.\n\n\u041F\u043E\u0445\u043E\u0436\u0438\u0435
|
31 |
+
\u043F\u0440\u043E\u0435\u043A\u0442\u044B:\n* [RussianSuperGLUE](https://russiansuperglue.com/):
|
32 |
+
\u0444\u043E\u043A\u0443\u0441 \u043D\u0430 \u0434\u043E\u043E\u0431\u0443\u0447\u0430\u0435\u043C\u044B\u0445
|
33 |
+
\u043C\u043E\u0434\u0435\u043B\u044F\u0445\n* [MOROCCO](https://github.com/RussianNLP/MOROCCO/):
|
34 |
+
RussianSuperGLUE + \u043E\u0446\u0435\u043D\u043A\u0430 \u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0435\u043B\u044C\u043D\u043E\u0441\u0442\u0438,
|
35 |
+
\u0442\u0440\u0443\u0434\u043D\u043E\u0432\u043E\u0441\u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u043C\n*
|
36 |
+
[RuSentEval](https://github.com/RussianNLP/RuSentEval): \u0431\u043E\u043B\u0435\u0435
|
37 |
+
\u0430\u043A\u0430\u0434\u0435\u043C\u0438\u0447\u0435\u0441\u043A\u0438\u0435/\u043B\u0438\u043D\u0433\u0432\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435
|
38 |
+
\u0437\u0430\u0434\u0430\u0447\u0438\n* \u0421\u0442\u0430\u0442\u044C\u044F
|
39 |
+
\u043E\u0442 \u0412\u044B\u0448\u043A\u0438 [Popov et al, 2019](https://arxiv.org/abs/1910.13291):
|
40 |
+
\u043F\u0435\u0440\u0432\u0430\u044F \u043D\u0430\u0443\u0447\u043D\u0430\u044F
|
41 |
+
\u0441\u0442\u0430\u0442\u044C\u044F \u043D\u0430 \u044D\u0442\u0443 \u0442\u0435\u043C\u0443,
|
42 |
+
\u043D\u043E \u043C\u0430\u043B\u043E\u0432\u0430\u0442\u043E \u043C\u043E\u0434\u0435\u043B\u0435\u0439
|
43 |
+
\u0438 \u0437\u0430\u0434\u0430\u0447\n* [SentEvalRu](https://github.com/comptechml/SentEvalRu)
|
44 |
+
\u0438 [deepPavlovEval](https://github.com/deepmipt/deepPavlovEval): \u0434\u0432\u0430
|
45 |
+
\u0445\u043E\u0440\u043E\u0448\u0438\u0445, \u043D\u043E \u0434\u0430\u0432\u043D\u043E
|
46 |
+
\u043D\u0435 \u043E\u0431\u043D\u043E\u0432\u043B\u044F\u0432\u0448\u0438\u0445\u0441\u044F
|
47 |
+
\u0431\u0435\u043D\u0447\u043C\u0430\u0440\u043A\u0430. \n\n\u041F\u0440\u0438\u043C\u0435\u0440
|
48 |
+
\u0437\u0430\u043F\u0443\u0441\u043A\u0430 \u043C\u0435\u0442\u0440\u0438\u043A
|
49 |
+
\u2013 \u0432 \u0431\u043B\u043E\u043A\u043D\u043E\u0442\u0435 [evaluation
|
50 |
+
example](https://github.com/avidale/encodechka/blob/master/evaluation%20example.ipynb).
|
51 |
+
\n\n\u0411\u043B\u043E\u043A\u043D\u043E\u0442 \u0434\u043B\u044F \u0432\u043E\u0441\u043F\u0440\u043E\u0438\u0437\u0432\u0435\u0434\u0435\u043D\u0438\u044F
|
52 |
+
\u043B\u0438\u0434\u0435\u0440\u0431\u043E\u0440\u0434\u0430: [v2021](https://colab.research.google.com/drive/1fu2i7A-Yr-85Ex_NvIyeCIO7lN2R7P-k?usp=sharing),
|
53 |
+
\n[v2023](https://colab.research.google.com/drive/1t956aJsp5qPnst3379vI8NNRqiqJUFMn?usp=sharing).\n\n###
|
54 |
+
\u041B\u0438\u0434\u0435\u0440\u0431\u043E\u0440\u0434\n\n\u0420\u0430\u043D\u0436\u0438\u0440\u043E\u0432\u0430\u043D\u0438\u0435
|
55 |
+
\u043C\u043E\u0434\u0435\u043B\u0435\u0439 \u0432 \u043F\u043E \u0441\u0440\u0435\u0434\u043D\u0435\u043C\u0443
|
56 |
+
\u043A\u0430\u0447\u0435\u0441\u0442\u0432\u0443 \u0438 \u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0435\u043B\u044C\u043D\u043E\u0441\u0442\u0438.
|
57 |
+
\n\u041F\u043E\u0434\u0441\u0432\u0435\u0447\u0435\u043D\u044B \u041F\u0430\u0440\u0435\u0442\u043E-\u043E\u043F\u0442\u0438\u043C\u0430\u043B\u044C\u043D\u044B\u0435
|
58 |
+
\u043C\u043E\u0434\u0435\u043B\u0438 \u043F\u043E \u043A\u0430\u0436\u0434\u043E\u043C\u0443
|
59 |
+
\u0438\u0437 \u043A\u0440\u0438\u0442\u0435\u0440\u0438\u0435\u0432. \n\n|
|
60 |
+
model | CPU |
|
61 |
+
GPU | size | Mean S | Mean S+W | dim |\n|:------------------------------------------------------------|:----------|:---------|:--------------|---------:|:-----------|------:|\n|
|
62 |
+
BAAI/bge-m3 | 523.4 |
|
63 |
+
22.5 | **2166.0** | 0.787 | 0.696 | 1024 |\n| intfloat/multilingual-e5-large-instruct
|
64 |
+
\ | 501.5 | 25.71 | **2136.0** | 0.784 | 0.684
|
65 |
+
\ | 1024 |\n| intfloat/multilingual-e5-large |
|
66 |
+
**506.8** | **30.8** | **2135.9389** | 0.78 | 0.686 | 1024 |\n|
|
67 |
+
sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | **20.5** |
|
68 |
+
**19.9** | **1081.8485** | 0.762 | | 768 |\n| intfloat/multilingual-e5-base
|
69 |
+
\ | 130.61 | 14.39 | **1061.0** | 0.761
|
70 |
+
| 0.669 | 768 |\n| intfloat/multilingual-e5-small |
|
71 |
+
40.86 | 12.09 | **449.0** | 0.742 | 0.645 | 384 |\n|
|
72 |
+
symanto/sn-xlm-roberta-base-snli-mnli-anli-xnli | **20.2** |
|
73 |
+
**16.5** | **1081.8474** | 0.739 | | 768 |\n| cointegrated/LaBSE-en-ru
|
74 |
+
\ | 133.4 | **15.3** | **489.6621**
|
75 |
+
\ | 0.739 | 0.668 | 768 |\n| sentence-transformers/LaBSE |
|
76 |
+
135.1 | **13.3** | 1796.5078 | 0.739 | 0.667 | 768 |\n|
|
77 |
+
MUSE-3 | 200.1 |
|
78 |
+
30.7 | **303.0** | 0.736 | | 512 |\n| text-embedding-ada-002
|
79 |
+
\ | ? | | ? |
|
80 |
+
\ 0.734 | | 1536 |\n| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
81 |
+
| **18.2** | 14.9 | 479.2547 | 0.734 | | 384 |\n|
|
82 |
+
sentence-transformers/distiluse-base-multilingual-cased-v1 | **11.8** |
|
83 |
+
**8.0** | 517.7452 | 0.722 | | 512 |\n| SONAR |
|
84 |
+
? | ? | 3060.0 | 0.721 | | 1024 |\n|
|
85 |
+
facebook/nllb-200-distilled-600M | 252.3 |
|
86 |
+
15.9 | 1577.4828 | 0.709 | 0.64 | 1024 |\n| sentence-transformers/distiluse-base-multilingual-cased-v2
|
87 |
+
\ | **11.2** | 9.2 | 517.7453 | 0.708 | | 512 |\n|
|
88 |
+
cointegrated/rubert-tiny2 | **6.2** |
|
89 |
+
**4.6** | **111.3823** | 0.704 | 0.638 | 312 |\n| ai-forever/sbert_large_mt_nlu_ru
|
90 |
+
\ | 504.5 | 29.7 | 1628.6539 | 0.703
|
91 |
+
| 0.626 | 1024 |\n| laser |
|
92 |
+
192.5 | 13.5 | 200.0 | 0.699 | | 1024 |\n|
|
93 |
+
laser2 | 163.4 |
|
94 |
+
8.6 | 175.0 | 0.694 | | 1024 |\n| ai-forever/sbert_large_nlu_ru
|
95 |
+
\ | 497.7 | 29.9 | 1628.6539 | 0.688
|
96 |
+
| 0.626 | 1024 |\n| clips/mfaq |
|
97 |
+
18.1 | 18.2 | 1081.8576 | 0.687 | | 768 |\n|
|
98 |
+
cointegrated/rut5-base-paraphraser | 137.0 |
|
99 |
+
15.6 | 412.0015 | 0.685 | 0.634 | 768 |\n| DeepPavlov/rubert-base-cased-sentence
|
100 |
+
\ | 128.4 | 13.2 | 678.5215 | 0.678 |
|
101 |
+
0.612 | 768 |\n| DeepPavlov/distilrubert-base-cased-conversational
|
102 |
+
\ | 64.2 | 10.4 | 514.002 | 0.676 | 0.624 |
|
103 |
+
\ 768 |\n| DeepPavlov/distilrubert-tiny-cased-conversational |
|
104 |
+
21.2 | **3.3** | 405.8292 | 0.67 | 0.616 | 768 |\n|
|
105 |
+
cointegrated/rut5-base-multitask | 136.9 |
|
106 |
+
12.7 | 412.0015 | 0.668 | 0.623 | 768 |\n| ai-forever/ruRoberta-large
|
107 |
+
\ | 512.3 | 25.5 | 1355.7162 |
|
108 |
+
\ 0.666 | 0.609 | 1024 |\n| DeepPavlov/rubert-base-cased-conversational
|
109 |
+
\ | 127.5 | 16.3 | 678.5215 | 0.653 | 0.606
|
110 |
+
\ | 768 |\n| deepvk/deberta-v1-base |
|
111 |
+
128.6 | 19.0 | 473.2402 | 0.653 | 0.591 | 768 |\n|
|
112 |
+
cointegrated/rubert-tiny | 7.5 |
|
113 |
+
5.9 | **44.97** | 0.645 | 0.575 | 312 |\n| ai-forever/FRED-T5-large
|
114 |
+
\ | 479.4 | 23.3 | 1372.9988 |
|
115 |
+
\ 0.639 | 0.551 | 1024 |\n| inkoziev/sbert_synonymy |
|
116 |
+
6.9 | 4.2 | 111.3823 | 0.637 | 0.566 | 312 |\n|
|
117 |
+
numind/NuNER-multilingual-v0.1 | 186.9 |
|
118 |
+
10 | 678.0 | 0.633 | 0.572 | 768 |\n| cointegrated/rubert-tiny-toxicity
|
119 |
+
\ | 10 | 5.5 | 47.2 | 0.621
|
120 |
+
| 0.553 | 312 |\n| ft_geowac_full |
|
121 |
+
**0.3** | | 1910.0 | 0.617 | 0.55 | 300 |\n|
|
122 |
+
bert-base-multilingual-cased | 141.4 |
|
123 |
+
13.7 | 678.5215 | 0.614 | 0.565 | 768 |\n| ai-forever/ruT5-large
|
124 |
+
\ | 489.6 | 20.2 | 1277.7571
|
125 |
+
\ | 0.61 | 0.578 | 1024 |\n| cointegrated/rut5-small |
|
126 |
+
37.6 | 8.6 | 111.3162 | 0.602 | 0.564 | 512 |\n|
|
127 |
+
ft_geowac_21mb | 1.2 |
|
128 |
+
\ | **21.0** | 0.597 | 0.531 | 300 |\n| inkoziev/sbert_pq
|
129 |
+
\ | 7.4 | 4.2 | 111.3823
|
130 |
+
\ | 0.596 | 0.526 | 312 |\n| ai-forever/ruT5-base |
|
131 |
+
126.3 | 12.8 | 418.2325 | 0.571 | 0.544 | 768 |\n|
|
132 |
+
hashing_1000_char | 0.5 |
|
133 |
+
\ | **1.0** | 0.557 | 0.464 | 1000 |\n| cointegrated/rut5-base
|
134 |
+
\ | 127.8 | 15.5 | 412.0014 |
|
135 |
+
\ 0.554 | 0.53 | 768 |\n| hashing_300_char |
|
136 |
+
0.8 | | 1.0 | 0.529 | 0.433 | 300 |\n|
|
137 |
+
hashing_1000 | **0.2** |
|
138 |
+
\ | 1.0 | 0.513 | 0.416 | 1000 |\n| hashing_300
|
139 |
+
\ | 0.3 | |
|
140 |
+
1.0 | 0.491 | 0.397 | 300 |\n\n\u0420\u0430\u043D\u0436\u0438\u0440\u043E\u0432\u0430\u043D\u0438\u0435
|
141 |
+
\u043C\u043E\u0434\u0435\u043B\u0435\u0439 \u043F\u043E \u0437\u0430\u0434\u0430\u0447\u0430\u043C.\n\u041F\u043E\u0434\u0441\u0432\u0435\u0447\u0435\u043D\u044B
|
142 |
+
\u043D\u0430\u0438\u043B\u0443\u0447\u0448\u0438\u0435 \u043C\u043E\u0434\u0435\u043B\u0438
|
143 |
+
\u043F\u043E \u043A\u0430\u0436\u0434\u043E\u0439 \u0438\u0437 \u0437\u0430\u0434\u0430\u0447.
|
144 |
+
\n\n| model | STS |
|
145 |
+
PI | NLI | SA | TI | IA | IC | ICX |
|
146 |
+
NE1 | NE2 |\n|:------------------------------------------------------------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|\n|
|
147 |
+
BAAI/bge-m3 | **0.86** | **0.75**
|
148 |
+
| 0.51 | **0.82** | 0.97 | 0.79 | 0.81 | **0.78** | 0.24 |
|
149 |
+
0.42 |\n| intfloat/multilingual-e5-large-instruct |
|
150 |
+
0.86 | 0.74 | 0.47 | 0.81 | 0.98 | 0.8 | **0.82**
|
151 |
+
| 0.77 | 0.21 | 0.35 |\n| intfloat/multilingual-e5-large |
|
152 |
+
0.86 | 0.73 | 0.47 | 0.81 | 0.98 | 0.8 | 0.82 |
|
153 |
+
0.77 | 0.24 | 0.37 |\n| sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
154 |
+
| 0.85 | 0.66 | 0.54 | 0.79 | 0.95 | 0.78 | 0.79 |
|
155 |
+
0.74 | | |\n| intfloat/multilingual-e5-base |
|
156 |
+
0.83 | 0.7 | 0.46 | 0.8 | 0.96 | 0.78 | 0.8 |
|
157 |
+
0.74 | 0.23 | 0.38 |\n| intfloat/multilingual-e5-small |
|
158 |
+
0.82 | 0.71 | 0.46 | 0.76 | 0.96 | 0.76 | 0.78 |
|
159 |
+
0.69 | 0.23 | 0.27 |\n| symanto/sn-xlm-roberta-base-snli-mnli-anli-xnli
|
160 |
+
\ | 0.76 | 0.6 | **0.86** | 0.76 | 0.91 | 0.72
|
161 |
+
\ | 0.71 | 0.6 | | |\n| cointegrated/LaBSE-en-ru
|
162 |
+
\ | 0.79 | 0.66 | 0.43 | 0.76
|
163 |
+
\ | 0.95 | 0.77 | 0.79 | 0.77 | 0.35 | 0.42 |\n|
|
164 |
+
sentence-transformers/LaBSE | 0.79 | 0.66
|
165 |
+
\ | 0.43 | 0.76 | 0.95 | 0.77 | 0.79 | 0.76 | 0.35
|
166 |
+
\ | 0.41 |\n| MUSE-3 |
|
167 |
+
0.81 | 0.61 | 0.42 | 0.77 | 0.96 | 0.79 | 0.77 |
|
168 |
+
0.75 | | |\n| text-embedding-ada-002 |
|
169 |
+
0.78 | 0.66 | 0.44 | 0.77 | 0.96 | 0.77 | 0.75 |
|
170 |
+
0.73 | | |\n| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
171 |
+
| 0.84 | 0.62 | 0.5 | 0.76 | 0.92 | 0.74 | 0.77 |
|
172 |
+
0.72 | | |\n| sentence-transformers/distiluse-base-multilingual-cased-v1
|
173 |
+
\ | 0.8 | 0.6 | 0.43 | 0.75 | 0.94 | 0.76 | 0.76
|
174 |
+
\ | 0.74 | | |\n| SONAR |
|
175 |
+
0.71 | 0.58 | 0.41 | 0.77 | 0.98 | 0.79 | 0.78 |
|
176 |
+
0.74 | | |\n| facebook/nllb-200-distilled-600M |
|
177 |
+
0.71 | 0.54 | 0.41 | 0.76 | 0.95 | 0.76 | 0.8 |
|
178 |
+
0.75 | 0.31 | 0.42 |\n| sentence-transformers/distiluse-base-multilingual-cased-v2
|
179 |
+
\ | 0.79 | 0.55 | 0.42 | 0.75 | 0.91 | 0.75 | 0.76
|
180 |
+
\ | 0.73 | | |\n| cointegrated/rubert-tiny2 |
|
181 |
+
0.75 | 0.65 | 0.42 | 0.74 | 0.94 | 0.75 | 0.76 |
|
182 |
+
0.64 | 0.36 | 0.39 |\n| ai-forever/sbert_large_mt_nlu_ru |
|
183 |
+
0.78 | 0.65 | 0.4 | 0.8 | 0.98 | 0.8 | 0.76 |
|
184 |
+
0.45 | 0.3 | 0.34 |\n| laser |
|
185 |
+
0.75 | 0.6 | 0.41 | 0.73 | 0.96 | 0.72 | 0.72 |
|
186 |
+
0.7 | | |\n| laser2 |
|
187 |
+
0.74 | 0.6 | 0.41 | 0.73 | 0.95 | 0.72 | 0.72 |
|
188 |
+
0.69 | | |\n| ai-forever/sbert_large_nlu_ru |
|
189 |
+
0.68 | 0.62 | 0.39 | 0.78 | 0.98 | 0.8 | 0.78 |
|
190 |
+
0.48 | 0.36 | 0.4 |\n| clips/mfaq |
|
191 |
+
0.63 | 0.59 | 0.35 | 0.79 | 0.95 | 0.74 | 0.76 |
|
192 |
+
0.69 | | |\n| cointegrated/rut5-base-paraphraser |
|
193 |
+
0.65 | 0.53 | 0.4 | 0.78 | 0.95 | 0.75 | 0.75 |
|
194 |
+
0.67 | 0.45 | 0.41 |\n| DeepPavlov/rubert-base-cased-sentence
|
195 |
+
\ | 0.74 | 0.66 | 0.49 | 0.75 | 0.92
|
196 |
+
\ | 0.75 | 0.72 | 0.39 | 0.36 | 0.34 |\n| DeepPavlov/distilrubert-base-cased-conversational
|
197 |
+
\ | 0.7 | 0.56 | 0.39 | 0.76 | 0.98 | 0.78 |
|
198 |
+
0.76 | 0.48 | 0.4 | 0.43 |\n| DeepPavlov/distilrubert-tiny-cased-conversational
|
199 |
+
\ | 0.7 | 0.55 | 0.4 | 0.74 | 0.98 | 0.78 |
|
200 |
+
0.76 | 0.45 | 0.35 | 0.44 |\n| cointegrated/rut5-base-multitask
|
201 |
+
\ | 0.65 | 0.54 | 0.38 | 0.76 |
|
202 |
+
0.95 | 0.75 | 0.72 | 0.59 | 0.47 | 0.41 |\n| ai-forever/ruRoberta-large
|
203 |
+
\ | 0.7 | 0.6 | 0.35 | 0.78 |
|
204 |
+
0.98 | 0.8 | 0.78 | 0.32 | 0.3 | **0.46** |\n| DeepPavlov/rubert-base-cased-conversational
|
205 |
+
\ | 0.68 | 0.52 | 0.38 | 0.73 | 0.98 |
|
206 |
+
0.78 | 0.75 | 0.42 | 0.41 | 0.43 |\n| deepvk/deberta-v1-base
|
207 |
+
\ | 0.68 | 0.54 | 0.38 | 0.76
|
208 |
+
\ | 0.98 | 0.8 | 0.78 | 0.29 | 0.29 | 0.4 |\n|
|
209 |
+
cointegrated/rubert-tiny | 0.66 | 0.53
|
210 |
+
\ | 0.4 | 0.71 | 0.89 | 0.68 | 0.7 | 0.58 | 0.24
|
211 |
+
\ | 0.34 |\n| ai-forever/FRED-T5-large |
|
212 |
+
0.62 | 0.44 | 0.37 | 0.78 | 0.98 | **0.81** | 0.67 |
|
213 |
+
0.45 | 0.25 | 0.15 |\n| inkoziev/sbert_synonymy |
|
214 |
+
0.69 | 0.49 | 0.41 | 0.71 | 0.91 | 0.72 | 0.69 |
|
215 |
+
0.47 | 0.32 | 0.24 |\n| numind/NuNER-multilingual-v0.1 |
|
216 |
+
0.67 | 0.53 | 0.4 | 0.71 | 0.89 | 0.72 | 0.7 |
|
217 |
+
0.46 | 0.32 | 0.34 |\n| cointegrated/rubert-tiny-toxicity |
|
218 |
+
0.57 | 0.44 | 0.37 | 0.68 | **1.0** | 0.78 | 0.7 |
|
219 |
+
0.43 | 0.24 | 0.32 |\n| ft_geowac_full |
|
220 |
+
0.69 | 0.53 | 0.37 | 0.72 | 0.97 | 0.76 | 0.66 |
|
221 |
+
0.26 | 0.22 | 0.34 |\n| bert-base-multilingual-cased |
|
222 |
+
0.66 | 0.53 | 0.37 | 0.7 | 0.89 | 0.7 | 0.69 |
|
223 |
+
0.38 | 0.36 | 0.38 |\n| ai-forever/ruT5-large |
|
224 |
+
0.51 | 0.39 | 0.35 | 0.77 | 0.97 | 0.79 | 0.72 |
|
225 |
+
0.38 | 0.46 | 0.44 |\n| cointegrated/rut5-small |
|
226 |
+
0.61 | 0.53 | 0.34 | 0.73 | 0.92 | 0.71 | 0.7 |
|
227 |
+
0.27 | 0.44 | 0.38 |\n| ft_geowac_21mb |
|
228 |
+
0.68 | 0.52 | 0.36 | 0.72 | 0.96 | 0.74 | 0.65 |
|
229 |
+
0.15 | 0.21 | 0.32 |\n| inkoziev/sbert_pq |
|
230 |
+
0.57 | 0.41 | 0.38 | 0.7 | 0.92 | 0.69 | 0.68 |
|
231 |
+
0.43 | 0.26 | 0.24 |\n| ai-forever/ruT5-base |
|
232 |
+
0.5 | 0.28 | 0.34 | 0.73 | 0.97 | 0.76 | 0.7 |
|
233 |
+
0.29 | 0.45 | 0.41 |\n| hashing_1000_char |
|
234 |
+
0.7 | 0.53 | 0.4 | 0.7 | 0.84 | 0.59 | 0.63 |
|
235 |
+
0.05 | 0.05 | 0.14 |\n| cointegrated/rut5-base |
|
236 |
+
0.44 | 0.28 | 0.33 | 0.74 | 0.92 | 0.75 | 0.58 |
|
237 |
+
0.39 | **0.48** | 0.39 |\n| hashing_300_char |
|
238 |
+
0.69 | 0.51 | 0.39 | 0.67 | 0.75 | 0.57 | 0.61 |
|
239 |
+
0.04 | 0.03 | 0.08 |\n| hashing_1000 |
|
240 |
+
0.63 | 0.49 | 0.39 | 0.66 | 0.77 | 0.55 | 0.57 |
|
241 |
+
0.05 | 0.02 | 0.04 |\n| hashing_300 |
|
242 |
+
0.61 | 0.48 | 0.4 | 0.64 | 0.71 | 0.54 | 0.5 |
|
243 |
+
0.05 | 0.02 | 0.02 |\n\n#### \u0417\u0430\u0434\u0430\u0447\u0438\n-
|
244 |
+
Semantic text similarity (**STS**) \u043D\u0430 \u043E\u0441\u043D\u043E\u0432\u0435
|
245 |
+
\u043F\u0435\u0440\u0435\u0432\u0435\u0434\u0451\u043D\u043D\u043E\u0433\u043E
|
246 |
+
\u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0430 [STS-B](https://huggingface.co/datasets/stsb_multi_mt);\n-
|
247 |
+
Paraphrase identification (**PI**) \u043D\u0430 \u043E\u0441\u043D\u043E\u0432\u0435
|
248 |
+
\u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0430 paraphraser.ru;\n- Natural
|
249 |
+
language inference (**NLI**) \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
|
250 |
+
[XNLI](https://github.com/facebookresearch/XNLI);\n- Sentiment analysis (**SA**)
|
251 |
+
\u043D\u0430 \u0434\u0430\u043D\u043D\u044B\u0445 [SentiRuEval2016](http://www.dialog-21.ru/evaluation/2016/sentiment/).\n-
|
252 |
+
Toxicity identification (**TI**) \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
|
253 |
+
\u0442\u043E\u043A\u0441\u0438\u0447\u043D\u044B\u0445 \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0435\u0432
|
254 |
+
\u0438\u0437 [OKMLCup](https://cups.mail.ru/ru/contests/okmlcup2020);\n- Inappropriateness
|
255 |
+
identification (**II**) \u043D\u0430 [\u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
|
256 |
+
\u0421\u043A\u043E\u043B\u0442\u0435\u0445\u0430](https://github.com/skoltech-nlp/inappropriate-sensitive-topics);\n-
|
257 |
+
Intent classification (**IC**) \u0438 \u0435\u0451 \u043A\u0440\u043E\u0441\u0441-\u044F\u0437\u044B\u0447\u043D\u0430\u044F
|
258 |
+
\u0432\u0435\u0440\u0441\u0438\u044F **ICX** \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
|
259 |
+
[NLU-evaluation-data](https://github.com/xliuhw/NLU-Evaluation-Data), \u043A\u043E\u0442\u043E\u0440\u044B\u0439
|
260 |
+
\u044F \u0430\u0432\u0442\u043E\u043C\u0430\u0442\u0438\u0447\u0435\u0441\u043A\u0438
|
261 |
+
\u043F\u0435\u0440\u0435\u0432\u0451\u043B \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u0438\u0439.
|
262 |
+
\u0412 IC \u043A\u043B\u0430\u0441\u0441\u0438\u0444\u0438\u043A\u0430\u0442\u043E\u0440
|
263 |
+
\u043E\u0431\u0443\u0447\u0430\u0435\u0442\u0441\u044F \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u0438\u0445
|
264 |
+
\u0434\u0430\u043D\u043D\u044B\u0445, \u0430 \u0432 ICX \u2013 \u043D\u0430
|
265 |
+
\u0430\u043D\u0433\u043B\u0438\u0439\u0441\u043A\u0438\u0445, \u0430 \u0442\u0435\u0441\u0442\u0438\u0440\u0443\u0435\u0442\u0441\u044F
|
266 |
+
\u0432 \u043E\u0431\u043E\u0438\u0445 \u0441\u043B\u0443\u0447\u0430\u044F\u0445
|
267 |
+
\u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u0438\u0445.\n- \u0420\u0430\u0441\u043F\u043E\u0437\u043D\u0430\u0432\u0430\u043D\u0438\u0435
|
268 |
+
\u0438\u043C\u0435\u043D\u043E\u0432\u0430\u043D\u043D\u044B\u0445 \u0441\u0443\u0449\u043D\u043E\u0441\u0442\u0435\u0439
|
269 |
+
\u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0430\u0445 [factRuEval-2016](https://github.com/dialogue-evaluation/factRuEval-2016)
|
270 |
+
(**NE1**) \u0438 [RuDReC](https://github.com/cimm-kzn/RuDReC) (**NE2**). \u042D\u0442\u0438
|
271 |
+
\u0434\u0432\u0435 \u0437\u0430\u0434\u0430\u0447\u0438 \u0442\u0440\u0435\u0431\u0443\u044E\u0442
|
272 |
+
\u043F\u043E\u043B\u0443\u0447\u0430\u0442\u044C \u044D\u043C\u0431\u0435\u0434\u0434\u0438\u043D\u0433\u0438
|
273 |
+
\u043E\u0442\u0434\u0435\u043B\u044C\u043D\u044B\u0445 \u0442\u043E\u043A\u0435\u043D\u043E\u0432,
|
274 |
+
\u0430 \u043D\u0435 \u0446\u0435\u043B\u044B\u0445 \u043F\u0440\u0435\u0434\u043B\u043E\u0436\u0435\u043D\u0438\u0439;
|
275 |
+
\u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u0442\u0430\u043C \u0443\u0447\u0430\u0441\u0442\u0432\u0443\u044E\u0442
|
276 |
+
\u043D\u0435 \u0432\u0441\u0435 \u043C\u043E\u0434\u0435\u043B\u0438.\n\n###
|
277 |
+
Changelog\n* \u0410\u0432\u0433\u0443\u0441\u0442 2023 - \u043E\u0431\u043D\u043E\u0432\u0438\u043B
|
278 |
+
\u0440\u0435\u0439\u0442\u0438\u043D\u0433:\n * \u043F\u043E\u043F\u0440\u0430\u0432\u0438\u0432
|
279 |
+
\u043E\u0448\u0438\u0431\u043A\u0443 \u0432 \u0432\u044B\u0447\u0438\u0441\u043B\u0435\u043D\u0438\u0438
|
280 |
+
mean token embeddings\n * \u0434\u043E\u0431\u0430\u0432\u0438\u043B \u043D\u0435\u0441\u043A\u043E\u043B\u044C\u043A\u043E
|
281 |
+
\u043C\u043E\u0434\u0435\u043B\u0435\u0439, \u0432\u043A\u043B\u044E\u0447\u0430\u044F
|
282 |
+
\u043D\u043E\u0432\u043E\u0433\u043E \u043B\u0438\u0434\u0435\u0440\u0430
|
283 |
+
- `intfloat/multilingual-e5-large`\n * \u043F\u043E \u043F\u0440\u043E\u0441\u044C\u0431\u0430\u043C
|
284 |
+
\u0442\u0440\u0443\u0434\u044F\u0449\u0438\u0445\u0441\u044F, \u0434\u043E\u0431\u0430\u0432\u0438\u043B
|
285 |
+
`text-embedding-ada-002` (\u0440\u0430\u0437\u043C\u0435\u0440 \u0438 \u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0435\u043B\u044C\u043D\u043E\u0441\u0442\u044C
|
286 |
+
\u0443\u043A\u0430\u0437\u0430\u043D\u044B \u043E\u0442 \u0431\u0430\u043B\u0434\u044B)\n*
|
287 |
+
\u041B\u0435\u0442\u043E 2022 - \u043E\u043F\u0443\u0431\u043B\u0438\u043A\u043E\u0432\u0430\u043B
|
288 |
+
\u043F\u0435\u0440\u0432\u044B\u0439 \u0440\u0435\u0439\u0442\u0438\u043D\u0433\n"
|
289 |
+
headers:
|
290 |
+
Accept-Ranges:
|
291 |
+
- bytes
|
292 |
+
Access-Control-Allow-Origin:
|
293 |
+
- '*'
|
294 |
+
Cache-Control:
|
295 |
+
- max-age=300
|
296 |
+
Connection:
|
297 |
+
- keep-alive
|
298 |
+
Content-Encoding:
|
299 |
+
- gzip
|
300 |
+
Content-Length:
|
301 |
+
- '4972'
|
302 |
+
Content-Security-Policy:
|
303 |
+
- default-src 'none'; style-src 'unsafe-inline'; sandbox
|
304 |
+
Content-Type:
|
305 |
+
- text/plain; charset=utf-8
|
306 |
+
Cross-Origin-Resource-Policy:
|
307 |
+
- cross-origin
|
308 |
+
Date:
|
309 |
+
- Thu, 13 Jun 2024 17:29:26 GMT
|
310 |
+
ETag:
|
311 |
+
- W/"6ef42cd6939559c9e297cd85ab8b8a44b6ce19809ce92e1efcf39d06809cd99a"
|
312 |
+
Expires:
|
313 |
+
- Thu, 13 Jun 2024 17:34:26 GMT
|
314 |
+
Source-Age:
|
315 |
+
- '245'
|
316 |
+
Strict-Transport-Security:
|
317 |
+
- max-age=31536000
|
318 |
+
Vary:
|
319 |
+
- Authorization,Accept-Encoding,Origin
|
320 |
+
Via:
|
321 |
+
- 1.1 varnish
|
322 |
+
X-Cache:
|
323 |
+
- HIT
|
324 |
+
X-Cache-Hits:
|
325 |
+
- '0'
|
326 |
+
X-Content-Type-Options:
|
327 |
+
- nosniff
|
328 |
+
X-Fastly-Request-ID:
|
329 |
+
- 0b5812cb6e8627abe030f2ff2764205ee7247b21
|
330 |
+
X-Frame-Options:
|
331 |
+
- deny
|
332 |
+
X-GitHub-Request-Id:
|
333 |
+
- 3467:253C76:A903D8:B1E9A7:666B25FA
|
334 |
+
X-Served-By:
|
335 |
+
- cache-ams21038-AMS
|
336 |
+
X-Timer:
|
337 |
+
- S1718299767.633243,VS0,VE2
|
338 |
+
X-XSS-Protection:
|
339 |
+
- 1; mode=block
|
340 |
+
status:
|
341 |
+
code: 200
|
342 |
+
message: OK
|
343 |
+
version: 1
|
tests/test_parser.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import pytest
|
3 |
+
from src.encodechka import parser
|
4 |
+
|
5 |
+
|
6 |
+
@pytest.mark.vcr
|
7 |
+
def test_parser():
|
8 |
+
df = parser.get_readme_df()
|
9 |
+
assert isinstance(df, pd.DataFrame)
|
10 |
+
assert df.shape[1] == 16
|