Spaces:
Sleeping
Sleeping
feat: working pipeline
Browse files- .dockerignore +3 -0
- Dockerfile +17 -1
- build_and_run.sh +1 -0
- data/.gitignore +2 -0
- data/plots/.gitignore +1 -0
- data/raw/.gitignore +1 -0
- data/transformers.dvc +5 -0
- dvc.lock +13 -8
- dvc.yaml +6 -4
- poetry.lock +254 -1
- pyproject.toml +1 -0
- shad_mlops_transformers/config.py +4 -1
- shad_mlops_transformers/main.py +26 -9
- shad_mlops_transformers/model.py +25 -5
- shad_mlops_transformers/trainer.py +113 -10
- shad_mlops_transformers/ui.py +0 -0
.dockerignore
CHANGED
@@ -297,3 +297,6 @@ cython_debug/
|
|
297 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
298 |
#.idea/
|
299 |
|
|
|
|
|
|
|
|
297 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
298 |
#.idea/
|
299 |
|
300 |
+
# Custom
|
301 |
+
data/checkpoints
|
302 |
+
data/plots
|
Dockerfile
CHANGED
@@ -5,7 +5,23 @@ COPY pyproject.toml .
|
|
5 |
COPY poetry.lock .
|
6 |
|
7 |
RUN poetry install -vv --no-root --without dev
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# second time since --no-root was used
|
10 |
RUN poetry install --only-root
|
11 |
|
|
|
5 |
COPY poetry.lock .
|
6 |
|
7 |
RUN poetry install -vv --no-root --without dev
|
8 |
+
ENV TRANSFORMERS_CACHE /code/data/transformers
|
9 |
+
# configs
|
10 |
+
COPY .dvc/config ./.dvc/config
|
11 |
+
COPY dvc.lock .
|
12 |
+
COPY dvc.yaml .
|
13 |
+
COPY data/transformers.dvc ./data/
|
14 |
+
# weights
|
15 |
+
RUN --mount=type=secret,id=AWS_ACCESS_KEY_ID,mode=0444,required=true \
|
16 |
+
--mount=type=secret,id=AWS_SECRET_ACCESS_KEY,mode=0444,required=true \
|
17 |
+
git init && \
|
18 |
+
export AWS_ACCESS_KEY_ID=$(cat /run/secrets/AWS_ACCESS_KEY_ID) && \
|
19 |
+
export AWS_SECRET_ACCESS_KEY=$(cat /run/secrets/AWS_SECRET_ACCESS_KEY) && \
|
20 |
+
poetry run dvc pull
|
21 |
+
# app data
|
22 |
+
COPY .streamlit/ ./.streamlit/
|
23 |
+
COPY README.md .
|
24 |
+
COPY shad_mlops_transformers/ ./shad_mlops_transformers/
|
25 |
# second time since --no-root was used
|
26 |
RUN poetry install --only-root
|
27 |
|
build_and_run.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
docker buildx build --secret id=AWS_ACCESS_KEY_ID,env=AWS_ACCESS_KEY_ID --secret=id=AWS_SECRET_ACCESS_KEY,env=AWS_SECRET_ACCESS_KEY -t hw-5 . && docker run -it --rm -p 7860:7860 hw-5
|
data/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
/checkpoints
|
2 |
+
/transformers
|
data/plots/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/train.png
|
data/raw/.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
/arxivData.json
|
|
|
|
1 |
/arxivData.json
|
2 |
+
/mapper.json
|
data/transformers.dvc
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
outs:
|
2 |
+
- md5: 35823750e091281d2f55996922ba9b09.dir
|
3 |
+
size: 882342643
|
4 |
+
nfiles: 14
|
5 |
+
path: transformers
|
dvc.lock
CHANGED
@@ -7,18 +7,23 @@ stages:
|
|
7 |
md5: a314e2f4eab544a46e6f95802ecde647
|
8 |
size: 72422946
|
9 |
- path: shad_mlops_transformers/model.py
|
10 |
-
md5:
|
11 |
-
size:
|
12 |
-
isexec: true
|
13 |
- path: shad_mlops_transformers/trainer.py
|
14 |
-
md5:
|
15 |
-
size:
|
16 |
params:
|
17 |
shad_mlops_transformers/config.py:
|
18 |
-
Config.batch_size:
|
19 |
Config.random_seed: 42
|
20 |
Config.test_size: 0.2
|
21 |
outs:
|
22 |
- path: data/models/model.torch
|
23 |
-
md5:
|
24 |
-
size:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
md5: a314e2f4eab544a46e6f95802ecde647
|
8 |
size: 72422946
|
9 |
- path: shad_mlops_transformers/model.py
|
10 |
+
md5: fb30feb5d875b7d2f25275b80c902d46
|
11 |
+
size: 2855
|
|
|
12 |
- path: shad_mlops_transformers/trainer.py
|
13 |
+
md5: 89ba94c0a48741a998d08f379920a1e0
|
14 |
+
size: 7415
|
15 |
params:
|
16 |
shad_mlops_transformers/config.py:
|
17 |
+
Config.batch_size: 64
|
18 |
Config.random_seed: 42
|
19 |
Config.test_size: 0.2
|
20 |
outs:
|
21 |
- path: data/models/model.torch
|
22 |
+
md5: b08f4441d6ad9042ebfa19035d6c365a
|
23 |
+
size: 438842393
|
24 |
+
- path: data/plots/train.png
|
25 |
+
md5: 627358cecadf27fb8a938e40b13bd993
|
26 |
+
size: 27309
|
27 |
+
- path: data/raw/mapper.json
|
28 |
+
md5: e4c22adf587676d22ed58c812f41f409
|
29 |
+
size: 855
|
dvc.yaml
CHANGED
@@ -7,10 +7,12 @@ stages:
|
|
7 |
- data/raw/arxivData.json
|
8 |
params:
|
9 |
- shad_mlops_transformers/config.py:
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
outs:
|
14 |
# NOTE должно совпадать с конфигом
|
15 |
- data/models/model.torch
|
16 |
-
|
|
|
|
|
|
7 |
- data/raw/arxivData.json
|
8 |
params:
|
9 |
- shad_mlops_transformers/config.py:
|
10 |
+
- Config.batch_size
|
11 |
+
- Config.random_seed
|
12 |
+
- Config.test_size
|
13 |
outs:
|
14 |
# NOTE должно совпадать с конфигом
|
15 |
- data/models/model.torch
|
16 |
+
- data/raw/mapper.json
|
17 |
+
- data/plots/train.png:
|
18 |
+
persist: true
|
poetry.lock
CHANGED
@@ -752,6 +752,81 @@ files = [
|
|
752 |
[package.dependencies]
|
753 |
six = "*"
|
754 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
755 |
[[package]]
|
756 |
name = "cryptography"
|
757 |
version = "40.0.2"
|
@@ -794,6 +869,18 @@ test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-co
|
|
794 |
test-randomorder = ["pytest-randomly"]
|
795 |
tox = ["tox"]
|
796 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
797 |
[[package]]
|
798 |
name = "decorator"
|
799 |
version = "5.1.1"
|
@@ -1255,6 +1342,32 @@ files = [
|
|
1255 |
atpublic = ">=2.3"
|
1256 |
psutil = ">=5.9.0"
|
1257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1258 |
[[package]]
|
1259 |
name = "frozenlist"
|
1260 |
version = "1.3.3"
|
@@ -1621,6 +1734,84 @@ pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
|
|
1621 |
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
|
1622 |
format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
|
1623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1624 |
[[package]]
|
1625 |
name = "kombu"
|
1626 |
version = "5.2.4"
|
@@ -1757,6 +1948,68 @@ files = [
|
|
1757 |
{file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"},
|
1758 |
]
|
1759 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1760 |
[[package]]
|
1761 |
name = "mccabe"
|
1762 |
version = "0.7.0"
|
@@ -3880,4 +4133,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
|
|
3880 |
[metadata]
|
3881 |
lock-version = "2.0"
|
3882 |
python-versions = "^3.10"
|
3883 |
-
content-hash = "
|
|
|
752 |
[package.dependencies]
|
753 |
six = "*"
|
754 |
|
755 |
+
[[package]]
|
756 |
+
name = "contourpy"
|
757 |
+
version = "1.0.7"
|
758 |
+
description = "Python library for calculating contours of 2D quadrilateral grids"
|
759 |
+
category = "main"
|
760 |
+
optional = false
|
761 |
+
python-versions = ">=3.8"
|
762 |
+
files = [
|
763 |
+
{file = "contourpy-1.0.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95c3acddf921944f241b6773b767f1cbce71d03307270e2d769fd584d5d1092d"},
|
764 |
+
{file = "contourpy-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc1464c97579da9f3ab16763c32e5c5d5bb5fa1ec7ce509a4ca6108b61b84fab"},
|
765 |
+
{file = "contourpy-1.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8acf74b5d383414401926c1598ed77825cd530ac7b463ebc2e4f46638f56cce6"},
|
766 |
+
{file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c71fdd8f1c0f84ffd58fca37d00ca4ebaa9e502fb49825484da075ac0b0b803"},
|
767 |
+
{file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f99e9486bf1bb979d95d5cffed40689cb595abb2b841f2991fc894b3452290e8"},
|
768 |
+
{file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87f4d8941a9564cda3f7fa6a6cd9b32ec575830780677932abdec7bcb61717b0"},
|
769 |
+
{file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9e20e5a1908e18aaa60d9077a6d8753090e3f85ca25da6e25d30dc0a9e84c2c6"},
|
770 |
+
{file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a877ada905f7d69b2a31796c4b66e31a8068b37aa9b78832d41c82fc3e056ddd"},
|
771 |
+
{file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6381fa66866b0ea35e15d197fc06ac3840a9b2643a6475c8fff267db8b9f1e69"},
|
772 |
+
{file = "contourpy-1.0.7-cp310-cp310-win32.whl", hash = "sha256:3c184ad2433635f216645fdf0493011a4667e8d46b34082f5a3de702b6ec42e3"},
|
773 |
+
{file = "contourpy-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:3caea6365b13119626ee996711ab63e0c9d7496f65641f4459c60a009a1f3e80"},
|
774 |
+
{file = "contourpy-1.0.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed33433fc3820263a6368e532f19ddb4c5990855e4886088ad84fd7c4e561c71"},
|
775 |
+
{file = "contourpy-1.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38e2e577f0f092b8e6774459317c05a69935a1755ecfb621c0a98f0e3c09c9a5"},
|
776 |
+
{file = "contourpy-1.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ae90d5a8590e5310c32a7630b4b8618cef7563cebf649011da80874d0aa8f414"},
|
777 |
+
{file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130230b7e49825c98edf0b428b7aa1125503d91732735ef897786fe5452b1ec2"},
|
778 |
+
{file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58569c491e7f7e874f11519ef46737cea1d6eda1b514e4eb5ac7dab6aa864d02"},
|
779 |
+
{file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d43960d809c4c12508a60b66cb936e7ed57d51fb5e30b513934a4a23874fae"},
|
780 |
+
{file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:152fd8f730c31fd67fe0ffebe1df38ab6a669403da93df218801a893645c6ccc"},
|
781 |
+
{file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9056c5310eb1daa33fc234ef39ebfb8c8e2533f088bbf0bc7350f70a29bde1ac"},
|
782 |
+
{file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a9d7587d2fdc820cc9177139b56795c39fb8560f540bba9ceea215f1f66e1566"},
|
783 |
+
{file = "contourpy-1.0.7-cp311-cp311-win32.whl", hash = "sha256:4ee3ee247f795a69e53cd91d927146fb16c4e803c7ac86c84104940c7d2cabf0"},
|
784 |
+
{file = "contourpy-1.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:5caeacc68642e5f19d707471890f037a13007feba8427eb7f2a60811a1fc1350"},
|
785 |
+
{file = "contourpy-1.0.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd7dc0e6812b799a34f6d12fcb1000539098c249c8da54f3566c6a6461d0dbad"},
|
786 |
+
{file = "contourpy-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0f9d350b639db6c2c233d92c7f213d94d2e444d8e8fc5ca44c9706cf72193772"},
|
787 |
+
{file = "contourpy-1.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e96a08b62bb8de960d3a6afbc5ed8421bf1a2d9c85cc4ea73f4bc81b4910500f"},
|
788 |
+
{file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:031154ed61f7328ad7f97662e48660a150ef84ee1bc8876b6472af88bf5a9b98"},
|
789 |
+
{file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e9ebb4425fc1b658e13bace354c48a933b842d53c458f02c86f371cecbedecc"},
|
790 |
+
{file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efb8f6d08ca7998cf59eaf50c9d60717f29a1a0a09caa46460d33b2924839dbd"},
|
791 |
+
{file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6c180d89a28787e4b73b07e9b0e2dac7741261dbdca95f2b489c4f8f887dd810"},
|
792 |
+
{file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b8d587cc39057d0afd4166083d289bdeff221ac6d3ee5046aef2d480dc4b503c"},
|
793 |
+
{file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:769eef00437edf115e24d87f8926955f00f7704bede656ce605097584f9966dc"},
|
794 |
+
{file = "contourpy-1.0.7-cp38-cp38-win32.whl", hash = "sha256:62398c80ef57589bdbe1eb8537127321c1abcfdf8c5f14f479dbbe27d0322e66"},
|
795 |
+
{file = "contourpy-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:57119b0116e3f408acbdccf9eb6ef19d7fe7baf0d1e9aaa5381489bc1aa56556"},
|
796 |
+
{file = "contourpy-1.0.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30676ca45084ee61e9c3da589042c24a57592e375d4b138bd84d8709893a1ba4"},
|
797 |
+
{file = "contourpy-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e927b3868bd1e12acee7cc8f3747d815b4ab3e445a28d2e5373a7f4a6e76ba1"},
|
798 |
+
{file = "contourpy-1.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:366a0cf0fc079af5204801786ad7a1c007714ee3909e364dbac1729f5b0849e5"},
|
799 |
+
{file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89ba9bb365446a22411f0673abf6ee1fea3b2cf47b37533b970904880ceb72f3"},
|
800 |
+
{file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71b0bf0c30d432278793d2141362ac853859e87de0a7dee24a1cea35231f0d50"},
|
801 |
+
{file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2"},
|
802 |
+
{file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b6d0f9e1d39dbfb3977f9dd79f156c86eb03e57a7face96f199e02b18e58d32a"},
|
803 |
+
{file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7f6979d20ee5693a1057ab53e043adffa1e7418d734c1532e2d9e915b08d8ec2"},
|
804 |
+
{file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5dd34c1ae752515318224cba7fc62b53130c45ac6a1040c8b7c1a223c46e8967"},
|
805 |
+
{file = "contourpy-1.0.7-cp39-cp39-win32.whl", hash = "sha256:c5210e5d5117e9aec8c47d9156d1d3835570dd909a899171b9535cb4a3f32693"},
|
806 |
+
{file = "contourpy-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:60835badb5ed5f4e194a6f21c09283dd6e007664a86101431bf870d9e86266c4"},
|
807 |
+
{file = "contourpy-1.0.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ce41676b3d0dd16dbcfabcc1dc46090aaf4688fd6e819ef343dbda5a57ef0161"},
|
808 |
+
{file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a011cf354107b47c58ea932d13b04d93c6d1d69b8b6dce885e642531f847566"},
|
809 |
+
{file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31a55dccc8426e71817e3fe09b37d6d48ae40aae4ecbc8c7ad59d6893569c436"},
|
810 |
+
{file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69f8ff4db108815addd900a74df665e135dbbd6547a8a69333a68e1f6e368ac2"},
|
811 |
+
{file = "contourpy-1.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efe99298ba37e37787f6a2ea868265465410822f7bea163edcc1bd3903354ea9"},
|
812 |
+
{file = "contourpy-1.0.7-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a1e97b86f73715e8670ef45292d7cc033548266f07d54e2183ecb3c87598888f"},
|
813 |
+
{file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc331c13902d0f50845099434cd936d49d7a2ca76cb654b39691974cb1e4812d"},
|
814 |
+
{file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24847601071f740837aefb730e01bd169fbcaa610209779a78db7ebb6e6a7051"},
|
815 |
+
{file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abf298af1e7ad44eeb93501e40eb5a67abbf93b5d90e468d01fc0c4451971afa"},
|
816 |
+
{file = "contourpy-1.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:64757f6460fc55d7e16ed4f1de193f362104285c667c112b50a804d482777edd"},
|
817 |
+
{file = "contourpy-1.0.7.tar.gz", hash = "sha256:d8165a088d31798b59e91117d1f5fc3df8168d8b48c4acc10fc0df0d0bdbcc5e"},
|
818 |
+
]
|
819 |
+
|
820 |
+
[package.dependencies]
|
821 |
+
numpy = ">=1.16"
|
822 |
+
|
823 |
+
[package.extras]
|
824 |
+
bokeh = ["bokeh", "chromedriver", "selenium"]
|
825 |
+
docs = ["furo", "sphinx-copybutton"]
|
826 |
+
mypy = ["contourpy[bokeh]", "docutils-stubs", "mypy (==0.991)", "types-Pillow"]
|
827 |
+
test = ["Pillow", "matplotlib", "pytest"]
|
828 |
+
test-no-images = ["pytest"]
|
829 |
+
|
830 |
[[package]]
|
831 |
name = "cryptography"
|
832 |
version = "40.0.2"
|
|
|
869 |
test-randomorder = ["pytest-randomly"]
|
870 |
tox = ["tox"]
|
871 |
|
872 |
+
[[package]]
|
873 |
+
name = "cycler"
|
874 |
+
version = "0.11.0"
|
875 |
+
description = "Composable style cycles"
|
876 |
+
category = "main"
|
877 |
+
optional = false
|
878 |
+
python-versions = ">=3.6"
|
879 |
+
files = [
|
880 |
+
{file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"},
|
881 |
+
{file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"},
|
882 |
+
]
|
883 |
+
|
884 |
[[package]]
|
885 |
name = "decorator"
|
886 |
version = "5.1.1"
|
|
|
1342 |
atpublic = ">=2.3"
|
1343 |
psutil = ">=5.9.0"
|
1344 |
|
1345 |
+
[[package]]
|
1346 |
+
name = "fonttools"
|
1347 |
+
version = "4.39.3"
|
1348 |
+
description = "Tools to manipulate font files"
|
1349 |
+
category = "main"
|
1350 |
+
optional = false
|
1351 |
+
python-versions = ">=3.8"
|
1352 |
+
files = [
|
1353 |
+
{file = "fonttools-4.39.3-py3-none-any.whl", hash = "sha256:64c0c05c337f826183637570ac5ab49ee220eec66cf50248e8df527edfa95aeb"},
|
1354 |
+
{file = "fonttools-4.39.3.zip", hash = "sha256:9234b9f57b74e31b192c3fc32ef1a40750a8fbc1cd9837a7b7bfc4ca4a5c51d7"},
|
1355 |
+
]
|
1356 |
+
|
1357 |
+
[package.extras]
|
1358 |
+
all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.0.0)", "xattr", "zopfli (>=0.1.4)"]
|
1359 |
+
graphite = ["lz4 (>=1.7.4.2)"]
|
1360 |
+
interpolatable = ["munkres", "scipy"]
|
1361 |
+
lxml = ["lxml (>=4.0,<5)"]
|
1362 |
+
pathops = ["skia-pathops (>=0.5.0)"]
|
1363 |
+
plot = ["matplotlib"]
|
1364 |
+
repacker = ["uharfbuzz (>=0.23.0)"]
|
1365 |
+
symfont = ["sympy"]
|
1366 |
+
type1 = ["xattr"]
|
1367 |
+
ufo = ["fs (>=2.2.0,<3)"]
|
1368 |
+
unicode = ["unicodedata2 (>=15.0.0)"]
|
1369 |
+
woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
|
1370 |
+
|
1371 |
[[package]]
|
1372 |
name = "frozenlist"
|
1373 |
version = "1.3.3"
|
|
|
1734 |
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
|
1735 |
format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
|
1736 |
|
1737 |
+
[[package]]
|
1738 |
+
name = "kiwisolver"
|
1739 |
+
version = "1.4.4"
|
1740 |
+
description = "A fast implementation of the Cassowary constraint solver"
|
1741 |
+
category = "main"
|
1742 |
+
optional = false
|
1743 |
+
python-versions = ">=3.7"
|
1744 |
+
files = [
|
1745 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6"},
|
1746 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c"},
|
1747 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3"},
|
1748 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938"},
|
1749 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d"},
|
1750 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09"},
|
1751 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de"},
|
1752 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32"},
|
1753 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-win32.whl", hash = "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408"},
|
1754 |
+
{file = "kiwisolver-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004"},
|
1755 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6"},
|
1756 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2"},
|
1757 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca"},
|
1758 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69"},
|
1759 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514"},
|
1760 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494"},
|
1761 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5"},
|
1762 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51"},
|
1763 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da"},
|
1764 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4"},
|
1765 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626"},
|
1766 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750"},
|
1767 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4"},
|
1768 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-win32.whl", hash = "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e"},
|
1769 |
+
{file = "kiwisolver-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686"},
|
1770 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6"},
|
1771 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf"},
|
1772 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b"},
|
1773 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597"},
|
1774 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede"},
|
1775 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c"},
|
1776 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-win32.whl", hash = "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3"},
|
1777 |
+
{file = "kiwisolver-1.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166"},
|
1778 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454"},
|
1779 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0"},
|
1780 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"},
|
1781 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae"},
|
1782 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0"},
|
1783 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1"},
|
1784 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d"},
|
1785 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c"},
|
1786 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-win32.whl", hash = "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191"},
|
1787 |
+
{file = "kiwisolver-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766"},
|
1788 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8"},
|
1789 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897"},
|
1790 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824"},
|
1791 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29"},
|
1792 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f"},
|
1793 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd"},
|
1794 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac"},
|
1795 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9"},
|
1796 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-win32.whl", hash = "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea"},
|
1797 |
+
{file = "kiwisolver-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b"},
|
1798 |
+
{file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a"},
|
1799 |
+
{file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d"},
|
1800 |
+
{file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a"},
|
1801 |
+
{file = "kiwisolver-1.4.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871"},
|
1802 |
+
{file = "kiwisolver-1.4.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9"},
|
1803 |
+
{file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8"},
|
1804 |
+
{file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286"},
|
1805 |
+
{file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb"},
|
1806 |
+
{file = "kiwisolver-1.4.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f"},
|
1807 |
+
{file = "kiwisolver-1.4.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008"},
|
1808 |
+
{file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767"},
|
1809 |
+
{file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9"},
|
1810 |
+
{file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2"},
|
1811 |
+
{file = "kiwisolver-1.4.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b"},
|
1812 |
+
{file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"},
|
1813 |
+
]
|
1814 |
+
|
1815 |
[[package]]
|
1816 |
name = "kombu"
|
1817 |
version = "5.2.4"
|
|
|
1948 |
{file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"},
|
1949 |
]
|
1950 |
|
1951 |
+
[[package]]
|
1952 |
+
name = "matplotlib"
|
1953 |
+
version = "3.7.1"
|
1954 |
+
description = "Python plotting package"
|
1955 |
+
category = "main"
|
1956 |
+
optional = false
|
1957 |
+
python-versions = ">=3.8"
|
1958 |
+
files = [
|
1959 |
+
{file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:95cbc13c1fc6844ab8812a525bbc237fa1470863ff3dace7352e910519e194b1"},
|
1960 |
+
{file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:08308bae9e91aca1ec6fd6dda66237eef9f6294ddb17f0d0b3c863169bf82353"},
|
1961 |
+
{file = "matplotlib-3.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:544764ba51900da4639c0f983b323d288f94f65f4024dc40ecb1542d74dc0500"},
|
1962 |
+
{file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56d94989191de3fcc4e002f93f7f1be5da476385dde410ddafbb70686acf00ea"},
|
1963 |
+
{file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99bc9e65901bb9a7ce5e7bb24af03675cbd7c70b30ac670aa263240635999a4"},
|
1964 |
+
{file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb7d248c34a341cd4c31a06fd34d64306624c8cd8d0def7abb08792a5abfd556"},
|
1965 |
+
{file = "matplotlib-3.7.1-cp310-cp310-win32.whl", hash = "sha256:ce463ce590f3825b52e9fe5c19a3c6a69fd7675a39d589e8b5fbe772272b3a24"},
|
1966 |
+
{file = "matplotlib-3.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:3d7bc90727351fb841e4d8ae620d2d86d8ed92b50473cd2b42ce9186104ecbba"},
|
1967 |
+
{file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:770a205966d641627fd5cf9d3cb4b6280a716522cd36b8b284a8eb1581310f61"},
|
1968 |
+
{file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f67bfdb83a8232cb7a92b869f9355d677bce24485c460b19d01970b64b2ed476"},
|
1969 |
+
{file = "matplotlib-3.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bf092f9210e105f414a043b92af583c98f50050559616930d884387d0772aba"},
|
1970 |
+
{file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89768d84187f31717349c6bfadc0e0d8c321e8eb34522acec8a67b1236a66332"},
|
1971 |
+
{file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83111e6388dec67822e2534e13b243cc644c7494a4bb60584edbff91585a83c6"},
|
1972 |
+
{file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a867bf73a7eb808ef2afbca03bcdb785dae09595fbe550e1bab0cd023eba3de0"},
|
1973 |
+
{file = "matplotlib-3.7.1-cp311-cp311-win32.whl", hash = "sha256:fbdeeb58c0cf0595efe89c05c224e0a502d1aa6a8696e68a73c3efc6bc354304"},
|
1974 |
+
{file = "matplotlib-3.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c0bd19c72ae53e6ab979f0ac6a3fafceb02d2ecafa023c5cca47acd934d10be7"},
|
1975 |
+
{file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:6eb88d87cb2c49af00d3bbc33a003f89fd9f78d318848da029383bfc08ecfbfb"},
|
1976 |
+
{file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:cf0e4f727534b7b1457898c4f4ae838af1ef87c359b76dcd5330fa31893a3ac7"},
|
1977 |
+
{file = "matplotlib-3.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:46a561d23b91f30bccfd25429c3c706afe7d73a5cc64ef2dfaf2b2ac47c1a5dc"},
|
1978 |
+
{file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8704726d33e9aa8a6d5215044b8d00804561971163563e6e6591f9dcf64340cc"},
|
1979 |
+
{file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4cf327e98ecf08fcbb82685acaf1939d3338548620ab8dfa02828706402c34de"},
|
1980 |
+
{file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:617f14ae9d53292ece33f45cba8503494ee199a75b44de7717964f70637a36aa"},
|
1981 |
+
{file = "matplotlib-3.7.1-cp38-cp38-win32.whl", hash = "sha256:7c9a4b2da6fac77bcc41b1ea95fadb314e92508bf5493ceff058e727e7ecf5b0"},
|
1982 |
+
{file = "matplotlib-3.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:14645aad967684e92fc349493fa10c08a6da514b3d03a5931a1bac26e6792bd1"},
|
1983 |
+
{file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:81a6b377ea444336538638d31fdb39af6be1a043ca5e343fe18d0f17e098770b"},
|
1984 |
+
{file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:28506a03bd7f3fe59cd3cd4ceb2a8d8a2b1db41afede01f66c42561b9be7b4b7"},
|
1985 |
+
{file = "matplotlib-3.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c587963b85ce41e0a8af53b9b2de8dddbf5ece4c34553f7bd9d066148dc719c"},
|
1986 |
+
{file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bf26ade3ff0f27668989d98c8435ce9327d24cffb7f07d24ef609e33d582439"},
|
1987 |
+
{file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:def58098f96a05f90af7e92fd127d21a287068202aa43b2a93476170ebd99e87"},
|
1988 |
+
{file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb"},
|
1989 |
+
{file = "matplotlib-3.7.1-cp39-cp39-win32.whl", hash = "sha256:4f99e1b234c30c1e9714610eb0c6d2f11809c9c78c984a613ae539ea2ad2eb4b"},
|
1990 |
+
{file = "matplotlib-3.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:3ba2af245e36990facf67fde840a760128ddd71210b2ab6406e640188d69d136"},
|
1991 |
+
{file = "matplotlib-3.7.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3032884084f541163f295db8a6536e0abb0db464008fadca6c98aaf84ccf4717"},
|
1992 |
+
{file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a2cb34336110e0ed8bb4f650e817eed61fa064acbefeb3591f1b33e3a84fd96"},
|
1993 |
+
{file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b867e2f952ed592237a1828f027d332d8ee219ad722345b79a001f49df0936eb"},
|
1994 |
+
{file = "matplotlib-3.7.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:57bfb8c8ea253be947ccb2bc2d1bb3862c2bccc662ad1b4626e1f5e004557042"},
|
1995 |
+
{file = "matplotlib-3.7.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:438196cdf5dc8d39b50a45cb6e3f6274edbcf2254f85fa9b895bf85851c3a613"},
|
1996 |
+
{file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21e9cff1a58d42e74d01153360de92b326708fb205250150018a52c70f43c290"},
|
1997 |
+
{file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d4725d70b7c03e082bbb8a34639ede17f333d7247f56caceb3801cb6ff703d"},
|
1998 |
+
{file = "matplotlib-3.7.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:97cc368a7268141afb5690760921765ed34867ffb9655dd325ed207af85c7529"},
|
1999 |
+
{file = "matplotlib-3.7.1.tar.gz", hash = "sha256:7b73305f25eab4541bd7ee0b96d87e53ae9c9f1823be5659b806cd85786fe882"},
|
2000 |
+
]
|
2001 |
+
|
2002 |
+
[package.dependencies]
|
2003 |
+
contourpy = ">=1.0.1"
|
2004 |
+
cycler = ">=0.10"
|
2005 |
+
fonttools = ">=4.22.0"
|
2006 |
+
kiwisolver = ">=1.0.1"
|
2007 |
+
numpy = ">=1.20"
|
2008 |
+
packaging = ">=20.0"
|
2009 |
+
pillow = ">=6.2.0"
|
2010 |
+
pyparsing = ">=2.3.1"
|
2011 |
+
python-dateutil = ">=2.7"
|
2012 |
+
|
2013 |
[[package]]
|
2014 |
name = "mccabe"
|
2015 |
version = "0.7.0"
|
|
|
4133 |
[metadata]
|
4134 |
lock-version = "2.0"
|
4135 |
python-versions = "^3.10"
|
4136 |
+
content-hash = "9d2dae6cd1e049281df195405d1464a66292a89fa944e788ca2f9ff4874579bd"
|
pyproject.toml
CHANGED
@@ -17,6 +17,7 @@ scikit-learn = "^1.2.2"
|
|
17 |
numpy = "^1.24.2"
|
18 |
loguru = "^0.7.0"
|
19 |
dvc = {version = "^2.54.0", extras = ["s3"]}
|
|
|
20 |
|
21 |
|
22 |
[tool.poetry.group.dev.dependencies]
|
|
|
17 |
numpy = "^1.24.2"
|
18 |
loguru = "^0.7.0"
|
19 |
dvc = {version = "^2.54.0", extras = ["s3"]}
|
20 |
+
matplotlib = "^3.7.1"
|
21 |
|
22 |
|
23 |
[tool.poetry.group.dev.dependencies]
|
shad_mlops_transformers/config.py
CHANGED
@@ -8,7 +8,10 @@ basedir = Path(__file__).parent
|
|
8 |
class Config(BaseSettings):
|
9 |
data_dir: Path = basedir.parent / "data"
|
10 |
raw_data_dir: Path = data_dir / "raw"
|
11 |
-
|
|
|
|
|
|
|
12 |
random_seed: int = 42
|
13 |
test_size: float = 0.2
|
14 |
weights_path: Path = data_dir / "models" / "model.torch"
|
|
|
8 |
class Config(BaseSettings):
|
9 |
data_dir: Path = basedir.parent / "data"
|
10 |
raw_data_dir: Path = data_dir / "raw"
|
11 |
+
checkpoints_folders = data_dir / "checkpoints"
|
12 |
+
plots_dir: Path = data_dir / "plots"
|
13 |
+
batch_size: int = 64
|
14 |
+
epochs: int = 1
|
15 |
random_seed: int = 42
|
16 |
test_size: float = 0.2
|
17 |
weights_path: Path = data_dir / "models" / "model.torch"
|
shad_mlops_transformers/main.py
CHANGED
@@ -1,15 +1,32 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
|
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
|
7 |
|
8 |
-
|
9 |
-
|
|
|
10 |
|
11 |
-
text = st.text_area("TEXT HERE")
|
12 |
|
13 |
-
|
|
|
|
|
|
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from loguru import logger
|
4 |
|
5 |
+
from shad_mlops_transformers.model import DocumentClassifier
|
6 |
+
from shad_mlops_transformers.trainer import load_mapper
|
|
|
7 |
|
8 |
+
# tokenizer = AutoTokenizer.from_pretrained("Davlan/distilbert-base-multilingual-cased-ner-hrl")
|
9 |
+
# model = AutoModelForTokenClassification.from_pretrained("Davlan/distilbert-base-multilingual-cased-ner-hrl")
|
10 |
+
# nlp = pipeline("ner", model=model, tokenizer=tokenizer)
|
11 |
|
|
|
12 |
|
13 |
+
@st.cache_resource
|
14 |
+
def load_model():
|
15 |
+
# NOTE hardcoded
|
16 |
+
return DocumentClassifier(n_classes=63).from_file()
|
17 |
|
18 |
+
|
19 |
+
mapper = load_mapper()
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
model = load_model()
|
23 |
+
st.markdown("### Predict tags for article summary")
|
24 |
+
# st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
|
25 |
+
text = st.text_input("Enter your summary")
|
26 |
+
raw_predictions = model(text)
|
27 |
+
best_class = torch.argmax(raw_predictions, dim=1)
|
28 |
+
inverse_mapper = {v: k for k, v in mapper.items()}
|
29 |
+
key = best_class.item()
|
30 |
+
to_show = inverse_mapper.get(key, "unknown")
|
31 |
+
logger.debug(f"key={key}, to_show={to_show}")
|
32 |
+
st.markdown(f"predicted label: {to_show}")
|
shad_mlops_transformers/model.py
CHANGED
@@ -19,17 +19,28 @@ from shad_mlops_transformers.config import config
|
|
19 |
# print(p)
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
class DocumentClassifier(nn.Module):
|
23 |
-
def __init__(self, n_classes: int = 2):
|
24 |
super().__init__()
|
25 |
self.model_name = "bert-base-uncased"
|
26 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
27 |
self.encoder = AutoModel.from_pretrained(self.model_name)
|
|
|
28 |
self.n_classes = n_classes
|
29 |
self.model = nn.Sequential(
|
30 |
OrderedDict(
|
31 |
[
|
32 |
-
("fc", nn.Linear(in_features=self.encoder.pooler.dense.out_features, out_features=n_classes)),
|
|
|
|
|
33 |
("sm", nn.Softmax()),
|
34 |
]
|
35 |
)
|
@@ -37,18 +48,27 @@ class DocumentClassifier(nn.Module):
|
|
37 |
self.trainable_params = self.model.parameters()
|
38 |
|
39 |
def forward(self, text):
|
40 |
-
tok_info = self.tokenize(text)
|
41 |
with torch.no_grad():
|
|
|
42 |
embeddings = self.encoder(**tok_info)["pooler_output"]
|
43 |
return self.model(embeddings)
|
44 |
|
45 |
def tokenize(self, x: str) -> dict:
|
46 |
-
return self.tokenizer(x, padding=True, truncation=True, return_tensors="pt")
|
47 |
|
48 |
def from_file(self, path: Path = config.weights_path) -> "DocumentClassifier":
|
49 |
-
self.load_state_dict(torch.load(path))
|
50 |
return self
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
if __name__ == "__main__":
|
54 |
data = ["This article describes machine learning"]
|
|
|
19 |
# print(p)
|
20 |
|
21 |
|
22 |
+
class extract_tensor(nn.Module):
|
23 |
+
def forward(self, x):
|
24 |
+
# Output shape (batch, features, hidden)
|
25 |
+
tensor, _ = x
|
26 |
+
# Reshape shape (batch, hidden)
|
27 |
+
return tensor[:, :]
|
28 |
+
|
29 |
+
|
30 |
class DocumentClassifier(nn.Module):
|
31 |
+
def __init__(self, n_classes: int = 2, device: torch.device = torch.device("cpu")):
|
32 |
super().__init__()
|
33 |
self.model_name = "bert-base-uncased"
|
34 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
35 |
self.encoder = AutoModel.from_pretrained(self.model_name)
|
36 |
+
self.device = device
|
37 |
self.n_classes = n_classes
|
38 |
self.model = nn.Sequential(
|
39 |
OrderedDict(
|
40 |
[
|
41 |
+
# ("fc", nn.Linear(in_features=self.encoder.pooler.dense.out_features, out_features=n_classes)),
|
42 |
+
("lstm", nn.LSTM(input_size=self.encoder.pooler.dense.out_features, hidden_size=n_classes)),
|
43 |
+
("extract", extract_tensor()),
|
44 |
("sm", nn.Softmax()),
|
45 |
]
|
46 |
)
|
|
|
48 |
self.trainable_params = self.model.parameters()
|
49 |
|
50 |
def forward(self, text):
|
|
|
51 |
with torch.no_grad():
|
52 |
+
tok_info = self.tokenize(text)
|
53 |
embeddings = self.encoder(**tok_info)["pooler_output"]
|
54 |
return self.model(embeddings)
|
55 |
|
56 |
def tokenize(self, x: str) -> dict:
|
57 |
+
return self.ensure_device(self.tokenizer(x, padding=True, truncation=True, return_tensors="pt"))
|
58 |
|
59 |
def from_file(self, path: Path = config.weights_path) -> "DocumentClassifier":
|
60 |
+
self.load_state_dict(torch.load(path, map_location=torch.device("cpu")))
|
61 |
return self
|
62 |
|
63 |
+
def ensure_device(self, tok_output):
|
64 |
+
tokens_tensor = tok_output["input_ids"].to(self.device)
|
65 |
+
token_type_ids = tok_output["token_type_ids"].to(self.device)
|
66 |
+
attention_mask = tok_output["attention_mask"].to(self.device)
|
67 |
+
|
68 |
+
output = {"input_ids": tokens_tensor, "token_type_ids": token_type_ids, "attention_mask": attention_mask}
|
69 |
+
|
70 |
+
return output
|
71 |
+
|
72 |
|
73 |
if __name__ == "__main__":
|
74 |
data = ["This article describes machine learning"]
|
shad_mlops_transformers/trainer.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import json
|
2 |
|
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import torch.nn as nn
|
@@ -13,7 +14,7 @@ from shad_mlops_transformers.model import DocumentClassifier
|
|
13 |
|
14 |
|
15 |
class ArxivDataset(Dataset):
|
16 |
-
def __init__(self, raw_data: list[dict]):
|
17 |
"""Разово вычитываем и сохраняем весь датасет."""
|
18 |
logger.info("reading data")
|
19 |
self.x = []
|
@@ -21,7 +22,10 @@ class ArxivDataset(Dataset):
|
|
21 |
# self.data = []
|
22 |
whitelist_labels = ["math", "cs"]
|
23 |
i = 0
|
24 |
-
|
|
|
|
|
|
|
25 |
for item in raw_data:
|
26 |
tmp_y = []
|
27 |
# да простят мне это потомки, но там зачем-то люди засунули питоновский dict в строку!
|
@@ -30,7 +34,7 @@ class ArxivDataset(Dataset):
|
|
30 |
# пока берем только теги из whitelist
|
31 |
if not any([real_tag.startswith(x) for x in whitelist_labels]):
|
32 |
continue
|
33 |
-
if real_tag not in self.class_mapper:
|
34 |
self.class_mapper[real_tag] = i
|
35 |
i += 1
|
36 |
tmp_y.append(self.class_mapper[real_tag])
|
@@ -60,22 +64,91 @@ def make_train_val():
|
|
60 |
return train_test_split(_raw_json, test_size=config.test_size, shuffle=True, random_state=config.random_seed)
|
61 |
|
62 |
|
63 |
-
def
|
|
|
64 |
model.train()
|
65 |
losses_tr = []
|
66 |
for text, true_label in tqdm(loader):
|
|
|
67 |
optimizer.zero_grad()
|
68 |
pred = model(text)
|
69 |
loss = criterion(pred, true_label)
|
70 |
|
71 |
loss.backward()
|
72 |
optimizer.step()
|
73 |
-
|
74 |
-
|
|
|
|
|
75 |
|
76 |
return model, optimizer, np.mean(losses_tr)
|
77 |
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def collator(x):
|
80 |
return x[0]
|
81 |
|
@@ -85,17 +158,47 @@ def save_model(model: DocumentClassifier):
|
|
85 |
torch.save(model.state_dict(), config.weights_path)
|
86 |
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
def main():
|
|
|
|
|
89 |
train, val = make_train_val()
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
loader_train = DataLoader(dataset_train, batch_size=config.batch_size, shuffle=True, drop_last=True)
|
93 |
loader_val = DataLoader(dataset_val, batch_size=config.batch_size, shuffle=True, drop_last=True)
|
94 |
|
95 |
-
model = DocumentClassifier(n_classes=len(dataset_train.classes))
|
96 |
optimizer = torch.optim.Adam(model.trainable_params)
|
|
|
|
|
|
|
97 |
loss = nn.CrossEntropyLoss()
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
save_model(model)
|
100 |
|
101 |
|
|
|
1 |
import json
|
2 |
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
import torch.nn as nn
|
|
|
14 |
|
15 |
|
16 |
class ArxivDataset(Dataset):
|
17 |
+
def __init__(self, raw_data: list[dict], class_mapper: dict[str, int] | None = None):
|
18 |
"""Разово вычитываем и сохраняем весь датасет."""
|
19 |
logger.info("reading data")
|
20 |
self.x = []
|
|
|
22 |
# self.data = []
|
23 |
whitelist_labels = ["math", "cs"]
|
24 |
i = 0
|
25 |
+
if class_mapper is None:
|
26 |
+
self.class_mapper = {}
|
27 |
+
else:
|
28 |
+
self.class_mapper = class_mapper
|
29 |
for item in raw_data:
|
30 |
tmp_y = []
|
31 |
# да простят мне это потомки, но там зачем-то люди засунули питоновский dict в строку!
|
|
|
34 |
# пока берем только теги из whitelist
|
35 |
if not any([real_tag.startswith(x) for x in whitelist_labels]):
|
36 |
continue
|
37 |
+
if class_mapper is None and real_tag not in self.class_mapper:
|
38 |
self.class_mapper[real_tag] = i
|
39 |
i += 1
|
40 |
tmp_y.append(self.class_mapper[real_tag])
|
|
|
64 |
return train_test_split(_raw_json, test_size=config.test_size, shuffle=True, random_state=config.random_seed)
|
65 |
|
66 |
|
67 |
+
def run_epoch(model: DocumentClassifier, optimizer: torch.optim.Optimizer, loader: DataLoader, criterion, device):
|
68 |
+
model.to(device)
|
69 |
model.train()
|
70 |
losses_tr = []
|
71 |
for text, true_label in tqdm(loader):
|
72 |
+
true_label = true_label.to(device)
|
73 |
optimizer.zero_grad()
|
74 |
pred = model(text)
|
75 |
loss = criterion(pred, true_label)
|
76 |
|
77 |
loss.backward()
|
78 |
optimizer.step()
|
79 |
+
current_loss = loss.item()
|
80 |
+
# logger.debug(f"current loss: {current_loss}")
|
81 |
+
losses_tr.append(current_loss)
|
82 |
+
# break
|
83 |
|
84 |
return model, optimizer, np.mean(losses_tr)
|
85 |
|
86 |
|
87 |
+
def val(model, loader, criterion, target_p: float = 0.95, device: torch.device = torch.device("cpu")):
|
88 |
+
model.eval()
|
89 |
+
losses_val = []
|
90 |
+
with torch.no_grad():
|
91 |
+
for text, true_label in tqdm(loader):
|
92 |
+
true_label = true_label.to(device)
|
93 |
+
pred = model(text)
|
94 |
+
loss = criterion(pred, true_label)
|
95 |
+
losses_val.append(loss.item())
|
96 |
+
# break
|
97 |
+
|
98 |
+
return np.mean(losses_val), None
|
99 |
+
|
100 |
+
|
101 |
+
def train_loop(
|
102 |
+
model: DocumentClassifier,
|
103 |
+
optimizer: torch.optim.Optimizer,
|
104 |
+
train_loader: DataLoader,
|
105 |
+
val_loader: DataLoader,
|
106 |
+
criterion,
|
107 |
+
scheduler: torch.optim.lr_scheduler.ReduceLROnPlateau,
|
108 |
+
device,
|
109 |
+
val_every: int = 1,
|
110 |
+
):
|
111 |
+
losses = {"train": [], "val": []}
|
112 |
+
best_val_loss = np.Inf
|
113 |
+
metrics = {}
|
114 |
+
for epoch in range(1, config.epochs + 1):
|
115 |
+
logger.info(f"#{epoch}/{config.epochs}:")
|
116 |
+
model, optimizer, loss = run_epoch(
|
117 |
+
model=model, optimizer=optimizer, loader=train_loader, criterion=criterion, device=device
|
118 |
+
)
|
119 |
+
losses["train"].append(loss)
|
120 |
+
if not (epoch % val_every):
|
121 |
+
loss, metrics_ = val(model, val_loader, criterion, device=device)
|
122 |
+
losses["val"].append(loss)
|
123 |
+
if metrics_ is not None:
|
124 |
+
for name, value in metrics_.items():
|
125 |
+
metrics[name].append(value)
|
126 |
+
|
127 |
+
# Сохраняем лучшую по валидации модель
|
128 |
+
if loss < best_val_loss:
|
129 |
+
config.checkpoints_folders.mkdir(parents=True, exist_ok=True)
|
130 |
+
torch.save(
|
131 |
+
{
|
132 |
+
"epoch": epoch,
|
133 |
+
"model_state_dict": model.state_dict(),
|
134 |
+
"optimizer_state_dict": optimizer.state_dict(),
|
135 |
+
"scheduler_state_dict": scheduler.state_dict(),
|
136 |
+
"losses": losses,
|
137 |
+
},
|
138 |
+
config.checkpoints_folders / f"epoch_{epoch}.pt",
|
139 |
+
)
|
140 |
+
best_val_loss = loss
|
141 |
+
|
142 |
+
scheduler.step(loss)
|
143 |
+
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
|
144 |
+
ax.plot(losses["train"], "r.-", label="train")
|
145 |
+
ax.plot(losses["val"], "g.-", label="val")
|
146 |
+
ax.grid(True)
|
147 |
+
ax.legend()
|
148 |
+
config.plots_dir.mkdir(exist_ok=True, parents=True)
|
149 |
+
fig.savefig(config.plots_dir / "train.png")
|
150 |
+
|
151 |
+
|
152 |
def collator(x):
|
153 |
return x[0]
|
154 |
|
|
|
158 |
torch.save(model.state_dict(), config.weights_path)
|
159 |
|
160 |
|
161 |
+
def load_mapper():
|
162 |
+
path = (config.raw_data_dir / "mapper.json").absolute()
|
163 |
+
logger.info(f"opening mapper in path: {path}")
|
164 |
+
with open(path, "r") as f:
|
165 |
+
return json.load(f)
|
166 |
+
|
167 |
+
|
168 |
def main():
|
169 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
170 |
+
logger.info(f"using device {device}")
|
171 |
train, val = make_train_val()
|
172 |
+
dataset_full = ArxivDataset(train + val) # только для вычисления маппинга
|
173 |
+
cm = dataset_full.class_mapper
|
174 |
+
logger.info("writing global class mapper to json")
|
175 |
+
with open(config.raw_data_dir / "mapper.json", "w") as f:
|
176 |
+
json.dump(cm, f)
|
177 |
+
logger.info("[Done] writing global class mapper to json")
|
178 |
+
del dataset_full
|
179 |
+
dataset_train = ArxivDataset(train, class_mapper=cm)
|
180 |
+
dataset_val = ArxivDataset(val, class_mapper=cm)
|
181 |
loader_train = DataLoader(dataset_train, batch_size=config.batch_size, shuffle=True, drop_last=True)
|
182 |
loader_val = DataLoader(dataset_val, batch_size=config.batch_size, shuffle=True, drop_last=True)
|
183 |
|
184 |
+
model = DocumentClassifier(n_classes=len(dataset_train.classes), device=device)
|
185 |
optimizer = torch.optim.Adam(model.trainable_params)
|
186 |
+
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
187 |
+
optimizer, mode="min", factor=0.25, patience=4, threshold=0.001, verbose=True
|
188 |
+
)
|
189 |
loss = nn.CrossEntropyLoss()
|
190 |
+
|
191 |
+
logger.info("running train loop")
|
192 |
+
train_loop(
|
193 |
+
model=model,
|
194 |
+
optimizer=optimizer,
|
195 |
+
train_loader=loader_train,
|
196 |
+
val_loader=loader_val,
|
197 |
+
criterion=loss,
|
198 |
+
scheduler=scheduler,
|
199 |
+
device=device,
|
200 |
+
val_every=1,
|
201 |
+
)
|
202 |
save_model(model)
|
203 |
|
204 |
|
shad_mlops_transformers/ui.py
DELETED
File without changes
|