Spaces:

Booguy
/

linguask

Build error

App Files Files Community

GitHub Action commited on Dec 20, 2022

Commit

8b414b0

1 Parent(s): 1579a76

refs/heads/ci-cd/hugging-face

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +3 -0
.flake8 +33 -0
.github/workflows/cd.yaml +48 -0
.github/workflows/ci.yaml +41 -0
.github/workflows/docs.yaml +46 -0
.github/workflows/kaggle.yaml +32 -0
.github/workflows/snyk.yaml +30 -0
.gitignore +118 -0
.pre-commit-config.yaml +24 -0
Dockerfile +7 -0
LICENSE +21 -0
Makefile +23 -0
codecov.yml +5 -0
data/.gitkeep +0 -0
data/weights/.gitignore +0 -0
data/weights/config.yaml +9 -0
data/weights/cv_fold_0/weights.ckpt +3 -0
data/weights/cv_fold_1/weights.ckpt +3 -0
data/weights/cv_fold_2/weights.ckpt +3 -0
data/weights/cv_fold_3/weights.ckpt +3 -0
data/weights/cv_fold_4/weights.ckpt +3 -0
data/weights/cv_results.csv +7 -0
data/weights/dataset-metadata.json +9 -0
data/weights/submission.csv +4 -0
data/word_frequencies/unigram_freq.csv +0 -0
demo/__init__.py +0 -0
demo/app.py +93 -0
demo/utils.py +29 -0
docs/Makefile +12 -0
docs/images/demo.jpeg +0 -0
docs/images/logo.png +0 -0
docs/make.bat +35 -0
docs/source/_templates/layout.html +31 -0
docs/source/_templates/notused_packages.rst_t +52 -0
docs/source/conf.py +78 -0
docs/source/contribute.rst +12 -0
docs/source/index.rst +14 -0
docs/source/usage.rst +26 -0
mypy.ini +7 -0
poetry.lock +0 -0
pyproject.toml +50 -0
src/__init__.py +0 -0
src/config/conf/config.yaml +11 -0
src/config/conf/experiment/sanity_bert_finetuning_predictor.yaml +19 -0
src/config/conf/experiment/sanity_bert_with_handcrafted_feature_predictor.yaml +13 -0
src/config/conf/experiment/sanity_constant_predictor.yaml +9 -0
src/config/conf/experiment/sanity_many_bert_with_handcrafted_feature_predictor.yaml +15 -0
src/config/conf/predictor/bert_finetuning_predictor.yaml +16 -0
src/config/conf/predictor/bert_with_handcrafted_feature_predictor.yaml +4 -0
src/config/conf/predictor/constant_predictor.yaml +1 -0

.env.example ADDED Viewed

	@@ -0,0 +1,3 @@

+DATA_PATH = ""
+BOT_TOKEN = ""
+CHAT_ID =

.flake8 ADDED Viewed

	@@ -0,0 +1,33 @@

+[flake8]
+max-complexity = 10
+max_line_length = 138
+exclude =
+    # no need to traverse our git directory
+    .git,
+    # there's no value in checking cache directories
+    __pycache__,
+    # this directory is mostly auto-generated
+    docs/source,
+extend-ignore =
+    # H101: use TODO(NAME)
+    H101,
+    # H202: assertRaises Exception too broad
+    H202,
+    # H233: python 3.x incompatible use of print operator
+    H233,
+    # H301: one import per line
+    H301,
+    # H306: imports not in alphabetical order (time, os)
+    H306,
+    # H401: docstring should not start with a space
+    H401,
+    # H403: multi line docstrings should end on a new line
+    H403,
+    # H404: multi line docstring should start without a leading new line
+    H404,
+    # H405: multi line docstring summary not separated with an empty line
+    H405,
+    # H501: do not use self.__dict__ for string formatting
+    H501,
+    # E203: colons should not have any space before them
+    E203,

.github/workflows/cd.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+name: continuous-deployment
+on:
+  push:
+    branches:
+      - ci-cd/hugging-face
+  release:
+    types: [published]
+jobs:
+  continuous-deployment:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7.13
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7.13
+      - name: Install dependencies
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry --no-root install
+      - name: Clone Hugging Face repo
+        run: |
+          cd ..
+          git clone https://huggingface.co/spaces/Booguy/linguask
+      - name: Move files to repo
+        run: |
+          cd ..
+          mv ./linguask/.git/ .
+          mv ./linguask/README.md .
+          rm -rf ./linguask/*
+          mv .git ./linguask
+          mv README.md ./linguask
+          cd ./automatic-essay-evaluator
+          rm -rf ./.git
+          rm README.md
+          cd ..
+          cp -a ./automatic-essay-evaluator/. ./linguask
+      - name: Commit all files
+        run: |
+          cd ..
+          cd linguask
+          git add .
+          git config --local user.email "action@github.com"
+          git config --local user.name "GitHub Action"
+          git commit -m ${{ github.ref }}
+          git push https://Booguy:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Booguy/linguask.git

.github/workflows/ci.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+name: continuous-integration
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+jobs:
+  continuous-integration:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7.13
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7.13
+      - name: Install dependencies
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry --no-root install
+      - uses: jamescurtin/isort-action@master
+      - name: Lint with flake8
+        run: |
+          poetry run flake8
+      - name: Lint with static type checker mypy
+        run: |
+          poetry run mypy .
+      - name: Test with pytest
+        run: |
+          poetry run pytest
+      - name: Coverage report
+        run: |
+          poetry run coverage run -m pytest
+          poetry run coverage xml
+      - name: Upload Coverage Report to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}

.github/workflows/docs.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+name: sphinx-documentation
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build_job:
+    runs-on: ubuntu-22.04
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2.3.4
+      - name: Set up Python 3.7.13
+        uses: actions/setup-python@v2.2.1
+        with:
+          python-version: 3.7.13
+      - name: Install dependencies
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry lock
+          poetry --no-root install
+      - name: Make the sphinx docs
+        run: |
+          poetry run make -C docs clean
+          poetry run make -C docs html
+      - name: Commit generated files
+        run: |
+          cd docs/build/html
+          git init
+          touch .nojekyll
+          git add -A
+          git config --local user.email "action@github.com"
+          git config --local user.name "GitHub Action"
+          git config --global --add safe.directory '*'
+          git commit -m ${{ github.ref }}
+      - name: Push to destination branch
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: gh-pages
+          force: true
+          directory: ./docs/build/html

.github/workflows/kaggle.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: kaggle-submission
+on:
+  pull_request:
+    branches:
+      - main
+    types: [closed]
+jobs:
+  kaggle-api:
+    if: ${{ github.event.pull_request.merged }}
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7.13
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7.13
+      - name: Install dependencies
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry lock
+          poetry --no-root install
+      - name: Authenticate to Kaggle
+        run: |
+          mkdir ~/.kaggle
+          echo ${{ secrets.KAGGLE }} >> ~/.kaggle/kaggle.json
+      - name: Push kernel
+        run: poetry run kaggle kernels push -p src/
+      - name: Update weights
+        run: poetry run kaggle datasets version -p data/weights --dir-mode zip -m ${{ github.ref }}

.github/workflows/snyk.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+name: snyk-vulnerability-testing
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+jobs:
+  snyk-testing:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7.13
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7.13
+      - name: Install snyk
+        run: |
+          curl https://static.snyk.io/cli/latest/snyk-linux -o snyk
+          chmod +x ./snyk
+          mv ./snyk /usr/local/bin/
+      - name: Authenticate in snyk
+        run: |
+          snyk auth ${{ secrets.SNYK_TOKEN }}
+      - name: Test via snyk
+        run: |
+          snyk test

.gitignore ADDED Viewed

	@@ -0,0 +1,118 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# MACOS
+.DS_Store
+data/raw/*
+catboost_info/
+.idea/
+requirements-dev.txt
+# logging
+checkpoints/
+# hydra-outputs
+outputs/
+# weights
+demo/model.ckpt

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+        args: ['--maxkb=5000']
+-   repo: https://github.com/pre-commit/mirrors-autopep8
+    rev: v1.6.0
+    hooks:
+    -   id: autopep8
+-   repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
+        name: isort (python)
+-   repo: https://github.com/myint/autoflake
+    rev: v1.4
+    hooks:
+    -   id: autoflake
+        entry: autoflake
+        args: [--in-place, --remove-all-unused-imports]

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM python:3.8-slim
+RUN curl -sSL https://install.python-poetry.org | python3 -
+RUN poetry --no-root install
+CMD ["poetry", "run", "make", "build"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Linguask
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Makefile ADDED Viewed

	@@ -0,0 +1,23 @@

+build: download_data download_weights run
+download_data:
+	mkdir -p data/raw
+	cd data/raw; \
+	rm *; \
+	kaggle competitions download -c feedback-prize-english-language-learning; \
+	unzip feedback-prize-english-language-learning.zip; \
+	rm feedback-prize-english-language-learning.zip
+test:
+	flake8
+	isort .
+	pytest  -p no:cacheprovider
+download_weights:
+	cd ./demo; \
+	kaggle datasets download -d alukaevdanis/feedback-prize-weights; \
+	unzip feedback-prize-weights.zip; \
+	rm feedback-prize-weights.zip
+run:
+	PYTHONPATH=. streamlit run demo/app.py

codecov.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+coverage:
+  status:
+    patch:
+      default:
+        target: 75%

data/.gitkeep ADDED Viewed

File without changes

data/weights/.gitignore ADDED Viewed

File without changes

data/weights/config.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+predictor:
+  _target_: src.solutions.constant_predictor.ConstantPredictorSolution
+validator:
+  _target_: src.cross_validate.CrossValidation
+  saving_dir: checkpoints
+  n_splits: 5
+timestamp: ${now:%Y-%m-%d}/${now:%H-%M-%S}
+cwd: ${hydra:runtime.cwd}
+name: sanity checking of ConstantPredictorSolution

data/weights/cv_fold_0/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
+size 4

data/weights/cv_fold_1/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
+size 4

data/weights/cv_fold_2/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
+size 4

data/weights/cv_fold_3/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
+size 4

data/weights/cv_fold_4/weights.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
+size 4

data/weights/cv_results.csv ADDED Viewed

	@@ -0,0 +1,7 @@

+,cohesion,syntax,vocabulary,phraseology,grammar,conventions
+0,0.6608945522512667,0.6302303497468349,0.641527949378647,0.6712010545904032,0.6961855568035836,0.688344859959337
+1,0.6984637874679632,0.6770818988115173,0.6537810547264135,0.6873910050778178,0.7192102801753979,0.6977768863470648
+2,0.667332361584851,0.6364362897377401,0.614977179207726,0.6768457760846118,0.6855281534006166,0.6721059014106314
+3,0.6735313662174706,0.6424358218267724,0.6209266237831667,0.65206733997094,0.6982348955111474,0.6678112510340573
+4,0.6718680299109107,0.6374401330898904,0.6123724356957945,0.6426845869171515,0.7030259769714595,0.654269863059915
+overall,0.6744180194864924,0.644724898642551,0.6287170485583495,0.6660379525281849,0.7004369725724409,0.676061752362201

data/weights/dataset-metadata.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "title": "Linguask by Vitsyn-Morgunov-Nikulin",
+  "id": "alukaevdanis/weights-linguask",
+  "licenses": [
+    {
+      "name": "CC0-1.0"
+    }
+  ]
+}

data/weights/submission.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
+0000C359D63E,3.0,3.0,3.0,3.0,3.0,3.0
+000BAD50D026,3.0,3.0,3.0,3.0,3.0,3.0
+00367BB2546B,3.0,3.0,3.0,3.0,3.0,3.0

data/word_frequencies/unigram_freq.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

demo/__init__.py ADDED Viewed

File without changes

demo/app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import re
+import seaborn as sns
+import streamlit as st
+from demo.utils import load_model, process_text
+st.set_page_config(
+    page_title="BERT Keyword Extractor",
+    page_icon="🎈",
+)
+def _max_width_():
+    max_width_str = "max-width: 1400px;"
+    st.markdown(
+        f"""
+    <style>
+    .reportview-container .main .block-container{{
+        {max_width_str}
+    }}
+    </style>
+    """,
+        unsafe_allow_html=True,
+    )
+st.header("🔑 Automated Essay Evaluator")
+with st.expander("ℹ️ - About this app", expanded=True):
+    st.write(
+        """
+        -   This application demonstrates how automated essay evaluation works: given as an input text with max. \
+        length of 512, it scores it (from 1.0 to 4.0) for different criteria: cohesion, syntax, vocabulary, \
+        phraseology, grammar and conventions.
+        -   This solution is based on fine-tuned deberta-v3-large model.
+        """
+    )
+    st.markdown("")
+st.markdown("")
+st.markdown("## 📌 **Paste document**", unsafe_allow_html=True)
+with st.form(key="my_form"):
+    _, c2, _ = st.columns([0.07, 5, 0.07])
+    with c2:
+        doc = st.text_area(
+            "Paste your text below (max 500 words)",
+            height=510,
+        )
+        MAX_WORDS = 500
+        res = len(re.findall(r"\w+", doc))
+        doc = doc[:MAX_WORDS]
+        submit_button = st.form_submit_button(label="✨ Assess my text!")
+if not submit_button:
+    st.stop()
+st.markdown("## 🎈  **Check results**")
+st.header("")
+cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])
+st.header("")
+model = load_model()
+df = process_text(doc, model)
+df.index += 1
+# Add styling
+cmGreen = sns.light_palette("green", as_cmap=True)
+cmRed = sns.light_palette("red", as_cmap=True)
+df = df.style.background_gradient(
+    cmap=cmGreen,
+    subset=[
+        "Grade",
+    ],
+)
+format_dictionary = {
+    "Relevancy": "{:.1%}",
+}
+df = df.format(format_dictionary)
+st.table(df)

demo/utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import pandas as pd
+import streamlit as st
+import torch
+from src.model_finetuning.config import CONFIG
+from src.model_finetuning.model import BertLightningModel
+from src.utils import get_target_columns
+@st.cache(allow_output_mutation=True)
+def load_model() -> BertLightningModel:
+    ckpt_path = "demo/model.ckpt"
+    model = BertLightningModel.load_from_checkpoint(ckpt_path, config=CONFIG, map_location='cpu')
+    return model
+@torch.no_grad()
+def process_text(_text: str, _model: BertLightningModel) -> pd.DataFrame:
+    tokens = _model.tokenizer([_text], return_tensors='pt')
+    outputs = _model(tokens)[0].tolist()
+    df = pd.DataFrame({
+        'Criterion': get_target_columns(),
+        'Grade': outputs
+    })
+    return df

docs/Makefile ADDED Viewed

	@@ -0,0 +1,12 @@

+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

docs/images/demo.jpeg ADDED Viewed

docs/images/logo.png ADDED Viewed

docs/make.bat ADDED Viewed

	@@ -0,0 +1,35 @@

+@ECHO OFF
+pushd %~dp0
+REM Command file for Sphinx documentation
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+if "%1" == "" goto help
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+:end
+popd

docs/source/_templates/layout.html ADDED Viewed

	@@ -0,0 +1,31 @@

+{% extends "!layout.html" %}
+{% block menu %}
+    {{ super() }}
+        <p class="caption">
+            <span class="caption-text">Indices</span>
+        </p>
+        <ul>
+    <li class="toctree-l1"><a href= "{{pathto('genindex.html', 1)}}">Everything</a></li>
+    <li class="toctree-l1"><a href= "{{pathto('py-modindex.html', 1)}}">Module Index</a></li>
+    </ul>
+    {% if menu_links %}
+        <p class="caption">
+            <span class="caption-text">External links</span>
+        </p>
+        <ul>
+            {% for text, link in menu_links %}
+                <li class="toctree-l1"><a href="{{ link }}">{{ text }}</a></li>
+            {% endfor %}
+        </ul>
+    {% endif %}
+{% endblock %}
+{% block htmltitle %}
+    {% if title == '' or title == 'Home' %}
+        <title>{{ docstitle|e }}</title>
+    {% else %}
+        <title>{{ title|striptags|e }}{{ titlesuffix }}</title>
+    {% endif %}
+{% endblock %}

docs/source/_templates/notused_packages.rst_t ADDED Viewed

	@@ -0,0 +1,52 @@

+{%- macro automodule(modname, options) -%}
+.. automodule:: {{ modname }}
+{%- for option in options %}
+   :{{ option }}:
+{%- endfor %}
+{%- endmacro %}
+{%- macro toctree(docnames) -%}
+.. toctree::
+   :maxdepth: {{ maxdepth }}
+{% for docname in docnames %}
+   {{ docname }}
+{%- endfor %}
+{%- endmacro %}
+{%- if is_namespace %}
+{{- [pkgname, "namespace"] | join(" ") | e | heading }}
+{% else %}
+{{- [pkgname, "package"] | join(" ") | e | heading }}
+{% endif %}
+{%- if is_namespace %}
+.. py:module:: {{ pkgname }}
+{% endif %}
+{%- if modulefirst and not is_namespace %}
+{{ automodule(pkgname, automodule_options) }}
+{% endif %}
+{%- if subpackages %}
+Subpackages
+-----------
+{{ toctree(subpackages) }}
+{% endif %}
+{%- if submodules %}
+Submodules
+----------
+{% if separatemodules %}
+{{ toctree(submodules) }}
+{% else %}
+{%- for submodule in submodules %}
+{% if show_headings %}
+{{- [submodule, "module"] | join(" ") | e | heading(2) }}
+{% endif %}
+{{ automodule(submodule, automodule_options) }}
+{% endfor %}
+{%- endif %}
+{%- endif %}
+# https://github.com/sphinx-doc/sphinx/blob/master/sphinx/templates/apidoc

docs/source/conf.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# Configuration file for the Sphinx documentation builder.
+import os
+import sys
+# Need this so sphinx can find lumache.py. Change is .py files are elsewhere than root.
+sys.path.insert(0, os.path.abspath('../..'))
+sys.path.insert(0, os.path.abspath('../../src'))
+# -- Project information
+project = 'Linguask'
+copyright = '2022'
+author = 'Multiple'
+release = '0.1'
+version = '0.1.0'
+# -- General configuration
+extensions = [
+    'sphinx.ext.duration',
+    'sphinx.ext.doctest',
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.intersphinx',
+    'autoapi.extension',
+    #    'sphinxcontrib.apidoc',
+]
+autoapi_type = 'python'
+autoapi_dirs = ['../../src']
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3/', None),
+    'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+}
+intersphinx_disabled_domains = ['std']
+templates_path = ['_templates']
+# -- Options for HTML output
+html_theme = 'sphinx_rtd_theme'
+# -- Options for EPUB output
+epub_show_urls = 'footnote'
+html_context = {
+    "display_github": True,  # Integrate GitHub
+    "github_repo": "vitsyn-morgunov-and-nikulin/automatic-essay-evaluator",  # Repo name
+    "github_version": "main",  # Version
+    "conf_py_path": "docs/source/",  # Path in the checkout to the docs root
+}
+# -- Options for HTML output -------------------------------------------------
+html_short_title = "topobathy"
+html_show_sourcelink = False
+html_show_sphinx = True
+html_show_copyright = True
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+repository_url = "https://github.com/Vitsyn-Morgunov-and-Nikulin/automatic-essay-evaluator"
+html_context = {
+    "menu_links": [
+        (
+            '<i class="fa fa-github fa-fw"></i> Source Code',
+            repository_url,
+        ),
+        (
+            '<i class="fa fa-book fa-fw"></i> License',
+            f"{repository_url}/blob/main/LICENSE",
+        ),
+    ],
+}

docs/source/contribute.rst ADDED Viewed

	@@ -0,0 +1,12 @@

+How to contribute
+=================
+In our development process we followed practices described by Uncle Bob in his magnificent "Clean Code". Please, consult this book in case any trouble.
+Make a fork of this repository, and develop your own tool. Make sure it is error-free and the test coverage is at least 60 percent. Update :code:`config` files accordingly, and check their operability.
+While producing your code, use this famous `git workflow <https://nvie.com/posts/a-successful-git-branching-model/>`_. Also note that our branches use prefixes :code:`feature/`, :code:`fix/`, and :code:`ci-cd/`.
+Further, send a pull request. In the comment, write the main features of the tool, the technology stack used, and a brief description of the algorithms. This should be enough for us to accept your code.
+To check the quality of the code, we use :code:`flake8` and :code:`codacy`.

docs/source/index.rst ADDED Viewed

	@@ -0,0 +1,14 @@

+Welcome to Linguask!
+===================================
+This page documents our solutions to `Feedback Prize Competition <https://www.kaggle.com/competitions/feedback-prize-english-language-learning>`_.
+.. note::
+   This project is under active development... Meanwhile, check out our relevant version that is `available on this link <https://huggingface.co/spaces/Booguy/automatic-essay-evaluator>`_!
+.. toctree::
+   :hidden:
+   usage
+   contribute

docs/source/usage.rst ADDED Viewed

	@@ -0,0 +1,26 @@

+Getting Started
+=====
+.. _prerequisites:
+Prerequisites
+------------
+1. GNU `make` utility (`link <https://www.gnu.org/software/make/>`_)
+2. Python of version 3.7.13 (`link <https://www.python.org/downloads/release/python-3713/>`_)
+3. Packaging manager `poetry` (`link <https://python-poetry.org>`_)
+4. At least 2Gb on your hard disk
+.. code-block:: console
+   poetry lock
+   poetry --no-root install
+Run application locally
+----------------
+To your delight, it's done via a single command:
+.. code-block:: console
+   poetry run make build

mypy.ini ADDED Viewed

	@@ -0,0 +1,7 @@

+[mypy]
+disallow_untyped_defs = False
+ignore_missing_imports = True
+ignore_errors = True
+[mypy-src.*]
+ignore_errors = False

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,50 @@

+[tool.pytest.ini_options]
+pythonpath = "."
+[tool.poetry]
+name = "automatic-essay-evaluator"
+version = "0.1.0"
+description = "Automated tool for evaluation of natural language texts."
+authors = ["Danis Alukaev <d.alukaev@innopolis.university>"]
+license = "MIT"
+readme = "README.md"
+packages = [{include = "automatic_essay_evaluator"}]
+[tool.poetry.dependencies]
+python = "3.7.13"
+flake8 = "^5.0.4"
+hydra-core = "^1.3.0"
+python-dotenv = "^0.21.0"
+coverage = "^6.5.0"
+streamlit = "^1.16.0"
+transformers = {extras = ["sentencepiece"], version = "^4.25.1"}
+sphinx = ">=4.0"
+sphinx-rtd-theme = "^1.1.1"
+sphinx-autoapi = "^2.0.0"
+mypy = "^0.991"
+types-requests = "^2.28.11.5"
+pytest-cov = "^4.0.0"
+[tool.poetry.group.dev.dependencies]
+flake8 = "^5.0.4"
+hydra-core = "^1.3.0"
+isort = "^5.11.3"
+kaggle = "^1.5.12"
+numpy = "^1.21.6"
+pre-commit = "^2.20.0"
+pytest = "^7.2.0"
+pytorch-lightning = "^1.8.5.post0"
+tqdm = "^4.64.1"
+torch = "^1.13.1"
+transformers = "^4.25.1"
+pyspellchecker = "^0.7.1"
+catboost = "^1.1.1"
+scikit-learn = "^1.0.2"
+wandb = "^0.13.7"
+easydict = "^1.10"
+nltk = "^3.8"
+seaborn = "^0.12.1"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

src/__init__.py ADDED Viewed

File without changes

src/config/conf/config.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+defaults:
+  - predictor: bert_with_handcrafted_feature_predictor
+  - validator: cross_validator
+  - _self_
+timestamp: ${now:%Y-%m-%d}/${now:%H-%M-%S}
+cwd: ${hydra:runtime.cwd}
+hydra:
+  job:
+    chdir: True

src/config/conf/experiment/sanity_bert_finetuning_predictor.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+# @package _global_
+name: sanity checking of BertFinetuningPredictor
+defaults:
+  - override /predictor: bert_finetuning_predictor
+validator:
+  saving_dir: checkpoints
+predictor:
+  batch_size: 8
+  num_workers: 8
+  max_length: 64
+  weight_decay: 0.01
+  accelerator: gpu
+  max_epochs: 2
+  train_size: 0.8
+  num_cross_val_splits: 5

src/config/conf/experiment/sanity_bert_with_handcrafted_feature_predictor.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+# @package _global_
+name: sanity checking of BertWithHandcraftedFeaturePredictor
+defaults:
+  - override /predictor: bert_with_handcrafted_feature_predictor
+validator:
+  saving_dir: checkpoints
+  n_splits: 2
+predictor:
+  catboost_iter: 5

src/config/conf/experiment/sanity_constant_predictor.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# @package _global_
+name: sanity checking of ConstantPredictorSolution
+defaults:
+  - override /predictor: constant_predictor
+validator:
+  saving_dir: checkpoints

src/config/conf/experiment/sanity_many_bert_with_handcrafted_feature_predictor.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+# @package _global_
+name: sanity checking of ManyBertWithHandcraftedFeaturePredictor
+defaults:
+  - override /predictor: many_bert_with_handcrafted_feature_predictor
+validator:
+  saving_dir: checkpoints
+predictor:
+  model_names:
+    - bert-base-uncased
+    - bert-base-cased
+  catboost_iter: 5

src/config/conf/predictor/bert_finetuning_predictor.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+_target_: src.solutions.bert_finetune_solution.BertFinetuningPredictor
+model_name: microsoft/deberta-v3-large
+num_classes: 6
+lr: 2e-5
+batch_size: 8
+num_workers: 8
+max_length: 512
+weight_decay: 0.01
+accelerator: gpu
+max_epochs: 5
+accumulate_grad_batches: 4
+precision: 16
+gradient_clip_val: 1000
+train_size: 0.8
+num_cross_val_splits: 5
+num_frozen_layers: 20

src/config/conf/predictor/bert_with_handcrafted_feature_predictor.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: src.solutions.bert_featurizer_solution.BertWithHandcraftedFeaturePredictor
+model_name: bert-base-uncased
+catboost_iter: 500
+saving_dir: checkpoints

src/config/conf/predictor/constant_predictor.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ _target_: src.solutions.constant_predictor.ConstantPredictorSolution