klasocki commited on
Commit
03eec87
β€’
1 Parent(s): 5262393

Make the project installable, move dependencies to setup.py

Browse files

Simplifies testing and dockerfile.
Docker now runs 80 internally, but still exposes 8000

.dockerignore CHANGED
@@ -1,7 +1,7 @@
1
  .idea
2
  data/
3
  .pytest_cache
 
4
  .gitignore
5
  README.md
6
  openapi.yaml
7
-
 
1
  .idea
2
  data/
3
  .pytest_cache
4
+ .eggs
5
  .gitignore
6
  README.md
7
  openapi.yaml
 
Dockerfile CHANGED
@@ -6,22 +6,20 @@ ENV PYTHONUNBUFFERED=1
6
  RUN python -m venv /venv
7
  ENV PATH="/venv/bin:$PATH"
8
 
9
- COPY requirements.txt .
10
- COPY test-requirements.txt .
11
  RUN pip install --upgrade pip
12
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
13
 
14
- COPY src/baseline.py src/baseline.py
15
  ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
16
- RUN python src/baseline.py # This pre-downloads models and tokenizers
17
 
18
  COPY . .
19
 
20
  FROM base as test
21
 
22
- RUN pip install -r test-requirements.txt
23
- WORKDIR src
24
- RUN python -m pytest ../tests
25
 
26
  FROM python:3.10-slim as deploy
27
 
@@ -33,5 +31,5 @@ ENV PATH="/venv/bin:$PATH"
33
  ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
34
  COPY --from=base /coma-fixer/.cache /coma-fixer/.cache
35
 
36
- EXPOSE 8000
37
- CMD uvicorn "app:app" --port 8000 --host "0.0.0.0"
 
6
  RUN python -m venv /venv
7
  ENV PATH="/venv/bin:$PATH"
8
 
9
+ COPY setup.py .
 
10
  RUN pip install --upgrade pip
11
+ RUN pip install --no-cache-dir --upgrade .
12
 
13
+ COPY commafixer/src/baseline.py commafixer/src/baseline.py
14
  ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
15
+ RUN python commafixer/src/baseline.py # This pre-downloads models and tokenizers
16
 
17
  COPY . .
18
 
19
  FROM base as test
20
 
21
+ RUN pip install .[test]
22
+ RUN python -m pytest tests
 
23
 
24
  FROM python:3.10-slim as deploy
25
 
 
31
  ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
32
  COPY --from=base /coma-fixer/.cache /coma-fixer/.cache
33
 
34
+ EXPOSE 80
35
+ CMD uvicorn "app:app" --port 80 --host "0.0.0.0"
README.md CHANGED
@@ -10,8 +10,17 @@ pinned: true
10
  app_port: 8000
11
  ---
12
 
 
 
13
  `docker log [id]` for logs from the container.
14
 
15
  `docker build -t comma-fixer --target test .` for tests
16
 
17
- `git push hub` to deploy to huggingface hub, after adding a remote
 
 
 
 
 
 
 
 
10
  app_port: 8000
11
  ---
12
 
13
+ `sudo service docker start`
14
+
15
  `docker log [id]` for logs from the container.
16
 
17
  `docker build -t comma-fixer --target test .` for tests
18
 
19
+ `git push hub` to deploy to huggingface hub, after adding a remote
20
+
21
+ Reported token classification F1 scores on commas for different languages:
22
+
23
+ | English | German | French | Italian |
24
+ |---------|--------|--------|---------|
25
+ | 0.819 | 0.945 | 0.831 | 0.798 |
26
+
app.py CHANGED
@@ -1,11 +1,10 @@
1
  from os.path import realpath
2
-
3
  import uvicorn
4
  from fastapi import FastAPI
5
  from fastapi.responses import FileResponse
6
  from fastapi.staticfiles import StaticFiles
7
 
8
- from routers import baseline
9
 
10
  app = FastAPI()
11
  app.include_router(baseline.router, prefix='/baseline')
 
1
  from os.path import realpath
 
2
  import uvicorn
3
  from fastapi import FastAPI
4
  from fastapi.responses import FileResponse
5
  from fastapi.staticfiles import StaticFiles
6
 
7
+ from commafixer.routers import baseline
8
 
9
  app = FastAPI()
10
  app.include_router(baseline.router, prefix='/baseline')
__init__.py β†’ commafixer/__init__.py RENAMED
File without changes
{routers β†’ commafixer/routers}/__init__.py RENAMED
File without changes
{routers β†’ commafixer/routers}/baseline.py RENAMED
@@ -1,7 +1,7 @@
1
  from fastapi import APIRouter, HTTPException
2
  import logging
3
 
4
- from src.baseline import BaselineCommaFixer
5
 
6
 
7
  logger = logging.Logger(__name__)
 
1
  from fastapi import APIRouter, HTTPException
2
  import logging
3
 
4
+ from commafixer.src.baseline import BaselineCommaFixer
5
 
6
 
7
  logger = logging.Logger(__name__)
{src β†’ commafixer/src}/__init__.py RENAMED
File without changes
{src β†’ commafixer/src}/baseline.py RENAMED
File without changes
docker-compose.yml CHANGED
@@ -18,11 +18,11 @@ services:
18
  context: ./
19
  dockerfile: Dockerfile
20
  container_name: comma-fixer
21
- command: uvicorn --host 0.0.0.0 --port 8000 "app:app"
22
  ports:
23
- - "8000:8000"
24
- # volumes:
25
- # - ./:/comma-fixer
26
  # networks:
27
  # my-network:
28
  # aliases:
 
18
  context: ./
19
  dockerfile: Dockerfile
20
  container_name: comma-fixer
21
+ command: uvicorn --host 0.0.0.0 --port 80 "app:app" --reload
22
  ports:
23
+ - "8000:80"
24
+ volumes:
25
+ - .:/comma-fixer
26
  # networks:
27
  # my-network:
28
  # aliases:
openapi.yaml CHANGED
@@ -2,7 +2,7 @@ openapi: 3.0.3
2
  info:
3
  title: Comma fixer
4
  description: Comma fixer, using machine learning to fix placement of commas within a string of text.
5
- version: 1.0.0
6
  servers:
7
  - url: 'https://localhost:5000'
8
  paths:
 
2
  info:
3
  title: Comma fixer
4
  description: Comma fixer, using machine learning to fix placement of commas within a string of text.
5
+ version: 0.1.0
6
  servers:
7
  - url: 'https://localhost:5000'
8
  paths:
requirements.txt CHANGED
@@ -1,9 +1 @@
1
- fastapi==0.101.1
2
- gunicorn==21.2.0
3
- uvicorn==0.23.2
4
- torch==2.0.1
5
- transformers==4.31.0
6
-
7
- # for the tokenizer of the baseline model
8
- protobuf==4.24.0
9
- sentencepiece==0.1.99
 
1
+ -e .
 
 
 
 
 
 
 
 
setup.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='commafixer',
5
+ version='0.1.0',
6
+ description='Fixing commas using Deep Learning.',
7
+ author='Karol Lasocki',
8
+ author_email='karolasocki@gmail.com',
9
+ url='https://huggingface.co/spaces/klasocki/comma-fixer',
10
+ packages=find_packages(include=['commafixer', 'commafixer.*']),
11
+ install_requires=[
12
+ "fastapi == 0.101.1",
13
+ "uvicorn == 0.23.2",
14
+ "torch == 2.0.1",
15
+ "transformers == 4.31.0",
16
+ # for the tokenizer of the baseline model
17
+ "protobuf == 4.24.0",
18
+ "sentencepiece == 0.1.99",
19
+
20
+ ],
21
+ extras_require={
22
+ 'training': [
23
+ 'datasets==2.14.4',
24
+ 'notebook'
25
+ ],
26
+ 'test': [
27
+ 'pytest',
28
+ 'httpx'
29
+ ]
30
+ },
31
+ )
test-requirements.txt DELETED
@@ -1,2 +0,0 @@
1
- pytest
2
- httpx
 
 
 
tests/__init__.py DELETED
File without changes
tests/test_baseline.py CHANGED
@@ -1,5 +1,5 @@
1
  import pytest
2
- from baseline import BaselineCommaFixer, _remove_punctuation
3
 
4
 
5
  @pytest.fixture()
 
1
  import pytest
2
+ from commafixer.src.baseline import BaselineCommaFixer, _remove_punctuation
3
 
4
 
5
  @pytest.fixture()