GitHub Action commited on
Commit
8b414b0
1 Parent(s): 1579a76

refs/heads/ci-cd/hugging-face

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +3 -0
  2. .flake8 +33 -0
  3. .github/workflows/cd.yaml +48 -0
  4. .github/workflows/ci.yaml +41 -0
  5. .github/workflows/docs.yaml +46 -0
  6. .github/workflows/kaggle.yaml +32 -0
  7. .github/workflows/snyk.yaml +30 -0
  8. .gitignore +118 -0
  9. .pre-commit-config.yaml +24 -0
  10. Dockerfile +7 -0
  11. LICENSE +21 -0
  12. Makefile +23 -0
  13. codecov.yml +5 -0
  14. data/.gitkeep +0 -0
  15. data/weights/.gitignore +0 -0
  16. data/weights/config.yaml +9 -0
  17. data/weights/cv_fold_0/weights.ckpt +3 -0
  18. data/weights/cv_fold_1/weights.ckpt +3 -0
  19. data/weights/cv_fold_2/weights.ckpt +3 -0
  20. data/weights/cv_fold_3/weights.ckpt +3 -0
  21. data/weights/cv_fold_4/weights.ckpt +3 -0
  22. data/weights/cv_results.csv +7 -0
  23. data/weights/dataset-metadata.json +9 -0
  24. data/weights/submission.csv +4 -0
  25. data/word_frequencies/unigram_freq.csv +0 -0
  26. demo/__init__.py +0 -0
  27. demo/app.py +93 -0
  28. demo/utils.py +29 -0
  29. docs/Makefile +12 -0
  30. docs/images/demo.jpeg +0 -0
  31. docs/images/logo.png +0 -0
  32. docs/make.bat +35 -0
  33. docs/source/_templates/layout.html +31 -0
  34. docs/source/_templates/notused_packages.rst_t +52 -0
  35. docs/source/conf.py +78 -0
  36. docs/source/contribute.rst +12 -0
  37. docs/source/index.rst +14 -0
  38. docs/source/usage.rst +26 -0
  39. mypy.ini +7 -0
  40. poetry.lock +0 -0
  41. pyproject.toml +50 -0
  42. src/__init__.py +0 -0
  43. src/config/conf/config.yaml +11 -0
  44. src/config/conf/experiment/sanity_bert_finetuning_predictor.yaml +19 -0
  45. src/config/conf/experiment/sanity_bert_with_handcrafted_feature_predictor.yaml +13 -0
  46. src/config/conf/experiment/sanity_constant_predictor.yaml +9 -0
  47. src/config/conf/experiment/sanity_many_bert_with_handcrafted_feature_predictor.yaml +15 -0
  48. src/config/conf/predictor/bert_finetuning_predictor.yaml +16 -0
  49. src/config/conf/predictor/bert_with_handcrafted_feature_predictor.yaml +4 -0
  50. src/config/conf/predictor/constant_predictor.yaml +1 -0
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ DATA_PATH = ""
2
+ BOT_TOKEN = ""
3
+ CHAT_ID =
.flake8 ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-complexity = 10
3
+ max_line_length = 138
4
+ exclude =
5
+ # no need to traverse our git directory
6
+ .git,
7
+ # there's no value in checking cache directories
8
+ __pycache__,
9
+ # this directory is mostly auto-generated
10
+ docs/source,
11
+ extend-ignore =
12
+ # H101: use TODO(NAME)
13
+ H101,
14
+ # H202: assertRaises Exception too broad
15
+ H202,
16
+ # H233: python 3.x incompatible use of print operator
17
+ H233,
18
+ # H301: one import per line
19
+ H301,
20
+ # H306: imports not in alphabetical order (time, os)
21
+ H306,
22
+ # H401: docstring should not start with a space
23
+ H401,
24
+ # H403: multi line docstrings should end on a new line
25
+ H403,
26
+ # H404: multi line docstring should start without a leading new line
27
+ H404,
28
+ # H405: multi line docstring summary not separated with an empty line
29
+ H405,
30
+ # H501: do not use self.__dict__ for string formatting
31
+ H501,
32
+ # E203: colons should not have any space before them
33
+ E203,
.github/workflows/cd.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: continuous-deployment
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - ci-cd/hugging-face
7
+ release:
8
+ types: [published]
9
+
10
+ jobs:
11
+ continuous-deployment:
12
+ runs-on: ubuntu-22.04
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ - name: Set up Python 3.7.13
16
+ uses: actions/setup-python@v2
17
+ with:
18
+ python-version: 3.7.13
19
+ - name: Install dependencies
20
+ run: |
21
+ curl -sSL https://install.python-poetry.org | python3 -
22
+ poetry --no-root install
23
+ - name: Clone Hugging Face repo
24
+ run: |
25
+ cd ..
26
+ git clone https://huggingface.co/spaces/Booguy/linguask
27
+ - name: Move files to repo
28
+ run: |
29
+ cd ..
30
+ mv ./linguask/.git/ .
31
+ mv ./linguask/README.md .
32
+ rm -rf ./linguask/*
33
+ mv .git ./linguask
34
+ mv README.md ./linguask
35
+ cd ./automatic-essay-evaluator
36
+ rm -rf ./.git
37
+ rm README.md
38
+ cd ..
39
+ cp -a ./automatic-essay-evaluator/. ./linguask
40
+ - name: Commit all files
41
+ run: |
42
+ cd ..
43
+ cd linguask
44
+ git add .
45
+ git config --local user.email "action@github.com"
46
+ git config --local user.name "GitHub Action"
47
+ git commit -m ${{ github.ref }}
48
+ git push https://Booguy:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Booguy/linguask.git
.github/workflows/ci.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: continuous-integration
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ continuous-integration:
13
+ runs-on: ubuntu-22.04
14
+ steps:
15
+ - uses: actions/checkout@v2
16
+ - name: Set up Python 3.7.13
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: 3.7.13
20
+ - name: Install dependencies
21
+ run: |
22
+ curl -sSL https://install.python-poetry.org | python3 -
23
+ poetry --no-root install
24
+ - uses: jamescurtin/isort-action@master
25
+ - name: Lint with flake8
26
+ run: |
27
+ poetry run flake8
28
+ - name: Lint with static type checker mypy
29
+ run: |
30
+ poetry run mypy .
31
+ - name: Test with pytest
32
+ run: |
33
+ poetry run pytest
34
+ - name: Coverage report
35
+ run: |
36
+ poetry run coverage run -m pytest
37
+ poetry run coverage xml
38
+ - name: Upload Coverage Report to Codecov
39
+ uses: codecov/codecov-action@v3
40
+ with:
41
+ token: ${{ secrets.CODECOV_TOKEN }}
.github/workflows/docs.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: sphinx-documentation
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build_job:
10
+ runs-on: ubuntu-22.04
11
+ env:
12
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13
+
14
+ steps:
15
+ - name: Checkout
16
+ uses: actions/checkout@v2.3.4
17
+ - name: Set up Python 3.7.13
18
+ uses: actions/setup-python@v2.2.1
19
+ with:
20
+ python-version: 3.7.13
21
+ - name: Install dependencies
22
+ run: |
23
+ curl -sSL https://install.python-poetry.org | python3 -
24
+ poetry lock
25
+ poetry --no-root install
26
+ - name: Make the sphinx docs
27
+ run: |
28
+ poetry run make -C docs clean
29
+ poetry run make -C docs html
30
+ - name: Commit generated files
31
+ run: |
32
+ cd docs/build/html
33
+ git init
34
+ touch .nojekyll
35
+ git add -A
36
+ git config --local user.email "action@github.com"
37
+ git config --local user.name "GitHub Action"
38
+ git config --global --add safe.directory '*'
39
+ git commit -m ${{ github.ref }}
40
+ - name: Push to destination branch
41
+ uses: ad-m/github-push-action@master
42
+ with:
43
+ github_token: ${{ secrets.GITHUB_TOKEN }}
44
+ branch: gh-pages
45
+ force: true
46
+ directory: ./docs/build/html
.github/workflows/kaggle.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: kaggle-submission
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - main
7
+ types: [closed]
8
+
9
+
10
+ jobs:
11
+ kaggle-api:
12
+ if: ${{ github.event.pull_request.merged }}
13
+ runs-on: ubuntu-22.04
14
+ steps:
15
+ - uses: actions/checkout@v2
16
+ - name: Set up Python 3.7.13
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: 3.7.13
20
+ - name: Install dependencies
21
+ run: |
22
+ curl -sSL https://install.python-poetry.org | python3 -
23
+ poetry lock
24
+ poetry --no-root install
25
+ - name: Authenticate to Kaggle
26
+ run: |
27
+ mkdir ~/.kaggle
28
+ echo ${{ secrets.KAGGLE }} >> ~/.kaggle/kaggle.json
29
+ - name: Push kernel
30
+ run: poetry run kaggle kernels push -p src/
31
+ - name: Update weights
32
+ run: poetry run kaggle datasets version -p data/weights --dir-mode zip -m ${{ github.ref }}
.github/workflows/snyk.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: snyk-vulnerability-testing
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ snyk-testing:
13
+ runs-on: ubuntu-22.04
14
+ steps:
15
+ - uses: actions/checkout@v2
16
+ - name: Set up Python 3.7.13
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: 3.7.13
20
+ - name: Install snyk
21
+ run: |
22
+ curl https://static.snyk.io/cli/latest/snyk-linux -o snyk
23
+ chmod +x ./snyk
24
+ mv ./snyk /usr/local/bin/
25
+ - name: Authenticate in snyk
26
+ run: |
27
+ snyk auth ${{ secrets.SNYK_TOKEN }}
28
+ - name: Test via snyk
29
+ run: |
30
+ snyk test
.gitignore ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ .hypothesis/
48
+ .pytest_cache/
49
+
50
+ # Translations
51
+ *.mo
52
+ *.pot
53
+
54
+ # Django stuff:
55
+ *.log
56
+ local_settings.py
57
+ db.sqlite3
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # pyenv
76
+ .python-version
77
+
78
+ # celery beat schedule file
79
+ celerybeat-schedule
80
+
81
+ # SageMath parsed files
82
+ *.sage.py
83
+
84
+ # Environments
85
+ .env
86
+ .venv
87
+ env/
88
+ venv/
89
+ ENV/
90
+ env.bak/
91
+ venv.bak/
92
+
93
+ # Spyder project settings
94
+ .spyderproject
95
+ .spyproject
96
+
97
+ # Rope project settings
98
+ .ropeproject
99
+
100
+ # mkdocs documentation
101
+ /site
102
+
103
+ # mypy
104
+ .mypy_cache/
105
+
106
+ # MACOS
107
+ .DS_Store
108
+ data/raw/*
109
+ catboost_info/
110
+ .idea/
111
+ requirements-dev.txt
112
+
113
+ # logging
114
+ checkpoints/
115
+ # hydra-outputs
116
+ outputs/
117
+ # weights
118
+ demo/model.ckpt
.pre-commit-config.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.2.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-added-large-files
9
+ args: ['--maxkb=5000']
10
+ - repo: https://github.com/pre-commit/mirrors-autopep8
11
+ rev: v1.6.0
12
+ hooks:
13
+ - id: autopep8
14
+ - repo: https://github.com/pycqa/isort
15
+ rev: 5.10.1
16
+ hooks:
17
+ - id: isort
18
+ name: isort (python)
19
+ - repo: https://github.com/myint/autoflake
20
+ rev: v1.4
21
+ hooks:
22
+ - id: autoflake
23
+ entry: autoflake
24
+ args: [--in-place, --remove-all-unused-imports]
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ RUN curl -sSL https://install.python-poetry.org | python3 -
4
+
5
+ RUN poetry --no-root install
6
+
7
+ CMD ["poetry", "run", "make", "build"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Linguask
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Makefile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ build: download_data download_weights run
2
+
3
+ download_data:
4
+ mkdir -p data/raw
5
+ cd data/raw; \
6
+ rm *; \
7
+ kaggle competitions download -c feedback-prize-english-language-learning; \
8
+ unzip feedback-prize-english-language-learning.zip; \
9
+ rm feedback-prize-english-language-learning.zip
10
+
11
+ test:
12
+ flake8
13
+ isort .
14
+ pytest -p no:cacheprovider
15
+
16
+ download_weights:
17
+ cd ./demo; \
18
+ kaggle datasets download -d alukaevdanis/feedback-prize-weights; \
19
+ unzip feedback-prize-weights.zip; \
20
+ rm feedback-prize-weights.zip
21
+
22
+ run:
23
+ PYTHONPATH=. streamlit run demo/app.py
codecov.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ coverage:
2
+ status:
3
+ patch:
4
+ default:
5
+ target: 75%
data/.gitkeep ADDED
File without changes
data/weights/.gitignore ADDED
File without changes
data/weights/config.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ predictor:
2
+ _target_: src.solutions.constant_predictor.ConstantPredictorSolution
3
+ validator:
4
+ _target_: src.cross_validate.CrossValidation
5
+ saving_dir: checkpoints
6
+ n_splits: 5
7
+ timestamp: ${now:%Y-%m-%d}/${now:%H-%M-%S}
8
+ cwd: ${hydra:runtime.cwd}
9
+ name: sanity checking of ConstantPredictorSolution
data/weights/cv_fold_0/weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
3
+ size 4
data/weights/cv_fold_1/weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
3
+ size 4
data/weights/cv_fold_2/weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
3
+ size 4
data/weights/cv_fold_3/weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
3
+ size 4
data/weights/cv_fold_4/weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
3
+ size 4
data/weights/cv_results.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ,cohesion,syntax,vocabulary,phraseology,grammar,conventions
2
+ 0,0.6608945522512667,0.6302303497468349,0.641527949378647,0.6712010545904032,0.6961855568035836,0.688344859959337
3
+ 1,0.6984637874679632,0.6770818988115173,0.6537810547264135,0.6873910050778178,0.7192102801753979,0.6977768863470648
4
+ 2,0.667332361584851,0.6364362897377401,0.614977179207726,0.6768457760846118,0.6855281534006166,0.6721059014106314
5
+ 3,0.6735313662174706,0.6424358218267724,0.6209266237831667,0.65206733997094,0.6982348955111474,0.6678112510340573
6
+ 4,0.6718680299109107,0.6374401330898904,0.6123724356957945,0.6426845869171515,0.7030259769714595,0.654269863059915
7
+ overall,0.6744180194864924,0.644724898642551,0.6287170485583495,0.6660379525281849,0.7004369725724409,0.676061752362201
data/weights/dataset-metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "title": "Linguask by Vitsyn-Morgunov-Nikulin",
3
+ "id": "alukaevdanis/weights-linguask",
4
+ "licenses": [
5
+ {
6
+ "name": "CC0-1.0"
7
+ }
8
+ ]
9
+ }
data/weights/submission.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
2
+ 0000C359D63E,3.0,3.0,3.0,3.0,3.0,3.0
3
+ 000BAD50D026,3.0,3.0,3.0,3.0,3.0,3.0
4
+ 00367BB2546B,3.0,3.0,3.0,3.0,3.0,3.0
data/word_frequencies/unigram_freq.csv ADDED
The diff for this file is too large to render. See raw diff
 
demo/__init__.py ADDED
File without changes
demo/app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import seaborn as sns
4
+ import streamlit as st
5
+
6
+ from demo.utils import load_model, process_text
7
+
8
+ st.set_page_config(
9
+ page_title="BERT Keyword Extractor",
10
+ page_icon="🎈",
11
+ )
12
+
13
+
14
+ def _max_width_():
15
+ max_width_str = "max-width: 1400px;"
16
+ st.markdown(
17
+ f"""
18
+ <style>
19
+ .reportview-container .main .block-container{{
20
+ {max_width_str}
21
+ }}
22
+ </style>
23
+ """,
24
+ unsafe_allow_html=True,
25
+ )
26
+
27
+
28
+ st.header("🔑 Automated Essay Evaluator")
29
+
30
+ with st.expander("ℹ️ - About this app", expanded=True):
31
+ st.write(
32
+ """
33
+ - This application demonstrates how automated essay evaluation works: given as an input text with max. \
34
+ length of 512, it scores it (from 1.0 to 4.0) for different criteria: cohesion, syntax, vocabulary, \
35
+ phraseology, grammar and conventions.
36
+ - This solution is based on fine-tuned deberta-v3-large model.
37
+ """
38
+ )
39
+
40
+ st.markdown("")
41
+
42
+ st.markdown("")
43
+ st.markdown("## 📌 **Paste document**", unsafe_allow_html=True)
44
+ with st.form(key="my_form"):
45
+ _, c2, _ = st.columns([0.07, 5, 0.07])
46
+
47
+ with c2:
48
+ doc = st.text_area(
49
+ "Paste your text below (max 500 words)",
50
+ height=510,
51
+ )
52
+
53
+ MAX_WORDS = 500
54
+
55
+ res = len(re.findall(r"\w+", doc))
56
+ doc = doc[:MAX_WORDS]
57
+
58
+ submit_button = st.form_submit_button(label="✨ Assess my text!")
59
+
60
+ if not submit_button:
61
+ st.stop()
62
+
63
+ st.markdown("## 🎈 **Check results**")
64
+
65
+ st.header("")
66
+
67
+ cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])
68
+
69
+ st.header("")
70
+
71
+ model = load_model()
72
+ df = process_text(doc, model)
73
+
74
+ df.index += 1
75
+
76
+ # Add styling
77
+ cmGreen = sns.light_palette("green", as_cmap=True)
78
+ cmRed = sns.light_palette("red", as_cmap=True)
79
+ df = df.style.background_gradient(
80
+ cmap=cmGreen,
81
+ subset=[
82
+ "Grade",
83
+ ],
84
+ )
85
+
86
+
87
+ format_dictionary = {
88
+ "Relevancy": "{:.1%}",
89
+ }
90
+
91
+ df = df.format(format_dictionary)
92
+
93
+ st.table(df)
demo/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import torch
4
+
5
+ from src.model_finetuning.config import CONFIG
6
+ from src.model_finetuning.model import BertLightningModel
7
+ from src.utils import get_target_columns
8
+
9
+
10
+ @st.cache(allow_output_mutation=True)
11
+ def load_model() -> BertLightningModel:
12
+
13
+ ckpt_path = "demo/model.ckpt"
14
+ model = BertLightningModel.load_from_checkpoint(ckpt_path, config=CONFIG, map_location='cpu')
15
+
16
+ return model
17
+
18
+
19
+ @torch.no_grad()
20
+ def process_text(_text: str, _model: BertLightningModel) -> pd.DataFrame:
21
+ tokens = _model.tokenizer([_text], return_tensors='pt')
22
+ outputs = _model(tokens)[0].tolist()
23
+
24
+ df = pd.DataFrame({
25
+ 'Criterion': get_target_columns(),
26
+ 'Grade': outputs
27
+ })
28
+
29
+ return df
docs/Makefile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SPHINXOPTS ?=
2
+ SPHINXBUILD ?= sphinx-build
3
+ SOURCEDIR = source
4
+ BUILDDIR = build
5
+
6
+ help:
7
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
8
+
9
+ .PHONY: help Makefile
10
+
11
+ %: Makefile
12
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
docs/images/demo.jpeg ADDED
docs/images/logo.png ADDED
docs/make.bat ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ if "%1" == "" goto help
14
+
15
+ %SPHINXBUILD% >NUL 2>NUL
16
+ if errorlevel 9009 (
17
+ echo.
18
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19
+ echo.installed, then set the SPHINXBUILD environment variable to point
20
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
21
+ echo.may add the Sphinx directory to PATH.
22
+ echo.
23
+ echo.If you don't have Sphinx installed, grab it from
24
+ echo.http://sphinx-doc.org/
25
+ exit /b 1
26
+ )
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
docs/source/_templates/layout.html ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "!layout.html" %}
2
+
3
+ {% block menu %}
4
+ {{ super() }}
5
+ <p class="caption">
6
+ <span class="caption-text">Indices</span>
7
+ </p>
8
+ <ul>
9
+ <li class="toctree-l1"><a href= "{{pathto('genindex.html', 1)}}">Everything</a></li>
10
+ <li class="toctree-l1"><a href= "{{pathto('py-modindex.html', 1)}}">Module Index</a></li>
11
+ </ul>
12
+
13
+ {% if menu_links %}
14
+ <p class="caption">
15
+ <span class="caption-text">External links</span>
16
+ </p>
17
+ <ul>
18
+ {% for text, link in menu_links %}
19
+ <li class="toctree-l1"><a href="{{ link }}">{{ text }}</a></li>
20
+ {% endfor %}
21
+ </ul>
22
+ {% endif %}
23
+ {% endblock %}
24
+
25
+ {% block htmltitle %}
26
+ {% if title == '' or title == 'Home' %}
27
+ <title>{{ docstitle|e }}</title>
28
+ {% else %}
29
+ <title>{{ title|striptags|e }}{{ titlesuffix }}</title>
30
+ {% endif %}
31
+ {% endblock %}
docs/source/_templates/notused_packages.rst_t ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro automodule(modname, options) -%}
2
+ .. automodule:: {{ modname }}
3
+ {%- for option in options %}
4
+ :{{ option }}:
5
+ {%- endfor %}
6
+ {%- endmacro %}
7
+
8
+ {%- macro toctree(docnames) -%}
9
+ .. toctree::
10
+ :maxdepth: {{ maxdepth }}
11
+ {% for docname in docnames %}
12
+ {{ docname }}
13
+ {%- endfor %}
14
+ {%- endmacro %}
15
+
16
+ {%- if is_namespace %}
17
+ {{- [pkgname, "namespace"] | join(" ") | e | heading }}
18
+ {% else %}
19
+ {{- [pkgname, "package"] | join(" ") | e | heading }}
20
+ {% endif %}
21
+
22
+ {%- if is_namespace %}
23
+ .. py:module:: {{ pkgname }}
24
+ {% endif %}
25
+
26
+ {%- if modulefirst and not is_namespace %}
27
+ {{ automodule(pkgname, automodule_options) }}
28
+ {% endif %}
29
+
30
+ {%- if subpackages %}
31
+ Subpackages
32
+ -----------
33
+
34
+ {{ toctree(subpackages) }}
35
+ {% endif %}
36
+
37
+ {%- if submodules %}
38
+ Submodules
39
+ ----------
40
+ {% if separatemodules %}
41
+ {{ toctree(submodules) }}
42
+ {% else %}
43
+ {%- for submodule in submodules %}
44
+ {% if show_headings %}
45
+ {{- [submodule, "module"] | join(" ") | e | heading(2) }}
46
+ {% endif %}
47
+ {{ automodule(submodule, automodule_options) }}
48
+ {% endfor %}
49
+ {%- endif %}
50
+ {%- endif %}
51
+
52
+ # https://github.com/sphinx-doc/sphinx/blob/master/sphinx/templates/apidoc
docs/source/conf.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration file for the Sphinx documentation builder.
2
+ import os
3
+ import sys
4
+
5
+ # Need this so sphinx can find lumache.py. Change is .py files are elsewhere than root.
6
+ sys.path.insert(0, os.path.abspath('../..'))
7
+ sys.path.insert(0, os.path.abspath('../../src'))
8
+
9
+ # -- Project information
10
+
11
+ project = 'Linguask'
12
+ copyright = '2022'
13
+ author = 'Multiple'
14
+
15
+ release = '0.1'
16
+ version = '0.1.0'
17
+
18
+ # -- General configuration
19
+
20
+ extensions = [
21
+ 'sphinx.ext.duration',
22
+ 'sphinx.ext.doctest',
23
+ 'sphinx.ext.autodoc',
24
+ 'sphinx.ext.autosummary',
25
+ 'sphinx.ext.intersphinx',
26
+ 'autoapi.extension',
27
+ # 'sphinxcontrib.apidoc',
28
+ ]
29
+
30
+ autoapi_type = 'python'
31
+ autoapi_dirs = ['../../src']
32
+
33
+ intersphinx_mapping = {
34
+ 'python': ('https://docs.python.org/3/', None),
35
+ 'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
36
+ }
37
+ intersphinx_disabled_domains = ['std']
38
+
39
+ templates_path = ['_templates']
40
+
41
+ # -- Options for HTML output
42
+
43
+ html_theme = 'sphinx_rtd_theme'
44
+
45
+ # -- Options for EPUB output
46
+ epub_show_urls = 'footnote'
47
+
48
+ html_context = {
49
+ "display_github": True, # Integrate GitHub
50
+ "github_repo": "vitsyn-morgunov-and-nikulin/automatic-essay-evaluator", # Repo name
51
+ "github_version": "main", # Version
52
+ "conf_py_path": "docs/source/", # Path in the checkout to the docs root
53
+ }
54
+
55
+ # -- Options for HTML output -------------------------------------------------
56
+
57
+ html_short_title = "topobathy"
58
+ html_show_sourcelink = False
59
+ html_show_sphinx = True
60
+ html_show_copyright = True
61
+
62
+ # Add any paths that contain custom static files (such as style sheets) here,
63
+ # relative to this directory. They are copied after the builtin static files,
64
+ # so a file named "default.css" will overwrite the builtin "default.css".
65
+ html_static_path = ['_static']
66
+ repository_url = "https://github.com/Vitsyn-Morgunov-and-Nikulin/automatic-essay-evaluator"
67
+ html_context = {
68
+ "menu_links": [
69
+ (
70
+ '<i class="fa fa-github fa-fw"></i> Source Code',
71
+ repository_url,
72
+ ),
73
+ (
74
+ '<i class="fa fa-book fa-fw"></i> License',
75
+ f"{repository_url}/blob/main/LICENSE",
76
+ ),
77
+ ],
78
+ }
docs/source/contribute.rst ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ How to contribute
2
+ =================
3
+
4
+ In our development process we followed practices described by Uncle Bob in his magnificent "Clean Code". Please, consult this book in case any trouble.
5
+
6
+ Make a fork of this repository, and develop your own tool. Make sure it is error-free and the test coverage is at least 60 percent. Update :code:`config` files accordingly, and check their operability.
7
+
8
+ While producing your code, use this famous `git workflow <https://nvie.com/posts/a-successful-git-branching-model/>`_. Also note that our branches use prefixes :code:`feature/`, :code:`fix/`, and :code:`ci-cd/`.
9
+
10
+ Further, send a pull request. In the comment, write the main features of the tool, the technology stack used, and a brief description of the algorithms. This should be enough for us to accept your code.
11
+
12
+ To check the quality of the code, we use :code:`flake8` and :code:`codacy`.
docs/source/index.rst ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Welcome to Linguask!
2
+ ===================================
3
+
4
+ This page documents our solutions to `Feedback Prize Competition <https://www.kaggle.com/competitions/feedback-prize-english-language-learning>`_.
5
+
6
+ .. note::
7
+
8
+ This project is under active development... Meanwhile, check out our relevant version that is `available on this link <https://huggingface.co/spaces/Booguy/automatic-essay-evaluator>`_!
9
+
10
+ .. toctree::
11
+ :hidden:
12
+
13
+ usage
14
+ contribute
docs/source/usage.rst ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Getting Started
2
+ =====
3
+
4
+
5
+ .. _prerequisites:
6
+
7
+ Prerequisites
8
+ ------------
9
+ 1. GNU `make` utility (`link <https://www.gnu.org/software/make/>`_)
10
+ 2. Python of version 3.7.13 (`link <https://www.python.org/downloads/release/python-3713/>`_)
11
+ 3. Packaging manager `poetry` (`link <https://python-poetry.org>`_)
12
+ 4. At least 2Gb on your hard disk
13
+
14
+ .. code-block:: console
15
+
16
+ poetry lock
17
+ poetry --no-root install
18
+
19
+ Run application locally
20
+ ----------------
21
+
22
+ To your delight, it's done via a single command:
23
+
24
+ .. code-block:: console
25
+
26
+ poetry run make build
mypy.ini ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [mypy]
2
+ disallow_untyped_defs = False
3
+ ignore_missing_imports = True
4
+ ignore_errors = True
5
+
6
+ [mypy-src.*]
7
+ ignore_errors = False
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.pytest.ini_options]
2
+ pythonpath = "."
3
+
4
+ [tool.poetry]
5
+ name = "automatic-essay-evaluator"
6
+ version = "0.1.0"
7
+ description = "Automated tool for evaluation of natural language texts."
8
+ authors = ["Danis Alukaev <d.alukaev@innopolis.university>"]
9
+ license = "MIT"
10
+ readme = "README.md"
11
+ packages = [{include = "automatic_essay_evaluator"}]
12
+
13
+ [tool.poetry.dependencies]
14
+ python = "3.7.13"
15
+ flake8 = "^5.0.4"
16
+ hydra-core = "^1.3.0"
17
+ python-dotenv = "^0.21.0"
18
+ coverage = "^6.5.0"
19
+ streamlit = "^1.16.0"
20
+ transformers = {extras = ["sentencepiece"], version = "^4.25.1"}
21
+ sphinx = ">=4.0"
22
+ sphinx-rtd-theme = "^1.1.1"
23
+ sphinx-autoapi = "^2.0.0"
24
+ mypy = "^0.991"
25
+ types-requests = "^2.28.11.5"
26
+ pytest-cov = "^4.0.0"
27
+
28
+ [tool.poetry.group.dev.dependencies]
29
+ flake8 = "^5.0.4"
30
+ hydra-core = "^1.3.0"
31
+ isort = "^5.11.3"
32
+ kaggle = "^1.5.12"
33
+ numpy = "^1.21.6"
34
+ pre-commit = "^2.20.0"
35
+ pytest = "^7.2.0"
36
+ pytorch-lightning = "^1.8.5.post0"
37
+ tqdm = "^4.64.1"
38
+ torch = "^1.13.1"
39
+ transformers = "^4.25.1"
40
+ pyspellchecker = "^0.7.1"
41
+ catboost = "^1.1.1"
42
+ scikit-learn = "^1.0.2"
43
+ wandb = "^0.13.7"
44
+ easydict = "^1.10"
45
+ nltk = "^3.8"
46
+ seaborn = "^0.12.1"
47
+
48
+ [build-system]
49
+ requires = ["poetry-core"]
50
+ build-backend = "poetry.core.masonry.api"
src/__init__.py ADDED
File without changes
src/config/conf/config.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - predictor: bert_with_handcrafted_feature_predictor
3
+ - validator: cross_validator
4
+ - _self_
5
+
6
+ timestamp: ${now:%Y-%m-%d}/${now:%H-%M-%S}
7
+ cwd: ${hydra:runtime.cwd}
8
+
9
+ hydra:
10
+ job:
11
+ chdir: True
src/config/conf/experiment/sanity_bert_finetuning_predictor.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ name: sanity checking of BertFinetuningPredictor
4
+
5
+ defaults:
6
+ - override /predictor: bert_finetuning_predictor
7
+
8
+ validator:
9
+ saving_dir: checkpoints
10
+
11
+ predictor:
12
+ batch_size: 8
13
+ num_workers: 8
14
+ max_length: 64
15
+ weight_decay: 0.01
16
+ accelerator: gpu
17
+ max_epochs: 2
18
+ train_size: 0.8
19
+ num_cross_val_splits: 5
src/config/conf/experiment/sanity_bert_with_handcrafted_feature_predictor.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ name: sanity checking of BertWithHandcraftedFeaturePredictor
4
+
5
+ defaults:
6
+ - override /predictor: bert_with_handcrafted_feature_predictor
7
+
8
+ validator:
9
+ saving_dir: checkpoints
10
+ n_splits: 2
11
+
12
+ predictor:
13
+ catboost_iter: 5
src/config/conf/experiment/sanity_constant_predictor.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ name: sanity checking of ConstantPredictorSolution
4
+
5
+ defaults:
6
+ - override /predictor: constant_predictor
7
+
8
+ validator:
9
+ saving_dir: checkpoints
src/config/conf/experiment/sanity_many_bert_with_handcrafted_feature_predictor.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ name: sanity checking of ManyBertWithHandcraftedFeaturePredictor
4
+
5
+ defaults:
6
+ - override /predictor: many_bert_with_handcrafted_feature_predictor
7
+
8
+ validator:
9
+ saving_dir: checkpoints
10
+
11
+ predictor:
12
+ model_names:
13
+ - bert-base-uncased
14
+ - bert-base-cased
15
+ catboost_iter: 5
src/config/conf/predictor/bert_finetuning_predictor.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: src.solutions.bert_finetune_solution.BertFinetuningPredictor
2
+ model_name: microsoft/deberta-v3-large
3
+ num_classes: 6
4
+ lr: 2e-5
5
+ batch_size: 8
6
+ num_workers: 8
7
+ max_length: 512
8
+ weight_decay: 0.01
9
+ accelerator: gpu
10
+ max_epochs: 5
11
+ accumulate_grad_batches: 4
12
+ precision: 16
13
+ gradient_clip_val: 1000
14
+ train_size: 0.8
15
+ num_cross_val_splits: 5
16
+ num_frozen_layers: 20
src/config/conf/predictor/bert_with_handcrafted_feature_predictor.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ _target_: src.solutions.bert_featurizer_solution.BertWithHandcraftedFeaturePredictor
2
+ model_name: bert-base-uncased
3
+ catboost_iter: 500
4
+ saving_dir: checkpoints
src/config/conf/predictor/constant_predictor.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ _target_: src.solutions.constant_predictor.ConstantPredictorSolution