Spaces:
Paused
Paused
Calin Rada
commited on
Commit
•
f006f31
0
Parent(s):
init
Browse files- .dockerignore +9 -0
- .env.dist +10 -0
- .github/CODEOWNERS +1 -0
- .github/dependabot.yml +18 -0
- .github/workflows/deploy-docker.yml +9 -0
- .github/workflows/pr-test.yml +41 -0
- .gitignore +165 -0
- .pre-commit-config.yaml +25 -0
- .vscode/extensions.json +5 -0
- .vscode/launch.json +27 -0
- .vscode/settings.json +16 -0
- Dockerfile +27 -0
- README.md +91 -0
- benchmark/__init__.py +0 -0
- benchmark/locustfile.py +35 -0
- docker-compose.yml +27 -0
- logging.conf +20 -0
- mappingservice/__init__.py +0 -0
- mappingservice/config.py +41 -0
- mappingservice/config_consul.py +97 -0
- mappingservice/constants.py +56 -0
- mappingservice/dependencies.py +27 -0
- mappingservice/main.py +117 -0
- mappingservice/models.py +105 -0
- mappingservice/ms/__init__.py +0 -0
- mappingservice/ms/ml_models/__init__.py +0 -0
- mappingservice/ms/ml_models/base.py +66 -0
- mappingservice/ms/ml_models/bed_type.py +25 -0
- mappingservice/ms/ml_models/environment.py +30 -0
- mappingservice/ms/ml_models/room_category.py +23 -0
- mappingservice/ms/ml_models/room_features.py +19 -0
- mappingservice/ms/ml_models/room_type.py +23 -0
- mappingservice/ms/ml_models/room_view.py +18 -0
- mappingservice/ms/model_loader.py +73 -0
- mappingservice/routers/__init__.py +0 -0
- mappingservice/routers/admin.py +13 -0
- mappingservice/routers/room.py +185 -0
- mappingservice/utils.py +154 -0
- poetry.lock +0 -0
- pyproject.toml +138 -0
- tests/__init__.py +0 -0
- tests/test_langdetect.py +79 -0
- tests/test_roomcategory.py +75 -0
- tests/test_roomenvironment.py +77 -0
- tests/test_roomtype.py +71 -0
- tests/test_roomview.py +89 -0
.dockerignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.vscode
|
2 |
+
.idea
|
3 |
+
.cache
|
4 |
+
.git
|
5 |
+
.github
|
6 |
+
.ruff_cache
|
7 |
+
.pre-commit-config.yaml
|
8 |
+
.env
|
9 |
+
act
|
.env.dist
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
API_KEY=your_api_key
|
2 |
+
HUGGINGFACE_ACCESS_TOKEN=yours_hf_token
|
3 |
+
|
4 |
+
TGX_CONSUL_AUTH_USER=
|
5 |
+
TGX_CONSUL_AUTH_PASS=
|
6 |
+
TGX_CONSUL_TOKEN=
|
7 |
+
TGX_CONSUL_NODES=tgx
|
8 |
+
|
9 |
+
HF_HUB_CACHE = /home/.cache/huggingface
|
10 |
+
|
.github/CODEOWNERS
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
* @Travelgate/eng-architecture
|
.github/dependabot.yml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# To get started with Dependabot version updates, you'll need to specify which
|
2 |
+
# package ecosystems to update and where the package manifests are located.
|
3 |
+
# Please see the documentation for all configuration options:
|
4 |
+
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
5 |
+
|
6 |
+
version: 2
|
7 |
+
updates:
|
8 |
+
# Maintain dependencies for GitHub Actions
|
9 |
+
- package-ecosystem: "github-actions"
|
10 |
+
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
11 |
+
directory: "/"
|
12 |
+
schedule:
|
13 |
+
interval: "weekly"
|
14 |
+
# Maintain dependencies for Python
|
15 |
+
- package-ecosystem: "pip"
|
16 |
+
directory: "/" # Location of package manifests
|
17 |
+
schedule:
|
18 |
+
interval: "weekly"
|
.github/workflows/deploy-docker.yml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Build and Deploy to Registry
|
2 |
+
on:
|
3 |
+
workflow_dispatch
|
4 |
+
jobs:
|
5 |
+
call-upload-to-registry:
|
6 |
+
uses: Travelgate/ci-workflows/.github/workflows/upload-to-registry.yml@main
|
7 |
+
permissions:
|
8 |
+
contents: write
|
9 |
+
secrets: inherit
|
.github/workflows/pr-test.yml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: PR Python Poetry
|
2 |
+
on:
|
3 |
+
pull_request:
|
4 |
+
branches: [ "main" ]
|
5 |
+
jobs:
|
6 |
+
build:
|
7 |
+
runs-on:
|
8 |
+
labels: ["docker"]
|
9 |
+
container:
|
10 |
+
image: python:3.11.8
|
11 |
+
steps:
|
12 |
+
- uses: actions/checkout@v4
|
13 |
+
with:
|
14 |
+
fetch-depth: '0'
|
15 |
+
- name: Install poetry
|
16 |
+
run: |
|
17 |
+
python -m pip install --upgrade pip
|
18 |
+
pip install poetry huggingface_hub[cli]
|
19 |
+
- name: Login to huggingface
|
20 |
+
env:
|
21 |
+
HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
|
22 |
+
run: |
|
23 |
+
huggingface-cli login --token=${HUGGINGFACE_TOKEN}
|
24 |
+
- name: Install dependencies
|
25 |
+
run: |
|
26 |
+
poetry update
|
27 |
+
poetry install
|
28 |
+
- name: Lint with Ruff
|
29 |
+
run: |
|
30 |
+
pip install ruff
|
31 |
+
ruff check . --output-format=github
|
32 |
+
continue-on-error: true
|
33 |
+
- name: Test with pytest
|
34 |
+
env:
|
35 |
+
APP_ENV: "dev"
|
36 |
+
HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
|
37 |
+
HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
|
38 |
+
API_KEY: ${{ secrets.API_KEY }}
|
39 |
+
HF_HUB_CACHE: "/home/.cache/huggingface"
|
40 |
+
run: poetry run pytest --cov=.
|
41 |
+
continue-on-error: false
|
.gitignore
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
.idea/
|
161 |
+
|
162 |
+
.env
|
163 |
+
consul.env
|
164 |
+
.DS_Store
|
165 |
+
act
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# See https://pre-commit.com for more information
|
2 |
+
# See https://pre-commit.com/hooks.html for more hooks
|
3 |
+
default_language_version:
|
4 |
+
python: python3.11
|
5 |
+
repos:
|
6 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
7 |
+
rev: v4.6.0
|
8 |
+
hooks:
|
9 |
+
- id: check-added-large-files
|
10 |
+
- id: check-toml
|
11 |
+
- id: check-yaml
|
12 |
+
args:
|
13 |
+
- --unsafe
|
14 |
+
- id: end-of-file-fixer
|
15 |
+
- id: trailing-whitespace
|
16 |
+
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
17 |
+
rev: v0.4.1
|
18 |
+
hooks:
|
19 |
+
- id: ruff
|
20 |
+
args:
|
21 |
+
- --fix
|
22 |
+
- id: ruff-format
|
23 |
+
ci:
|
24 |
+
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
25 |
+
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
.vscode/extensions.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
// See http://go.microsoft.com/fwlink/?LinkId=827846
|
3 |
+
// for the documentation about the extensions.json format
|
4 |
+
"recommendations": ["ms-python.python", "charliermarsh.ruff"]
|
5 |
+
}
|
.vscode/launch.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": "0.2.0",
|
3 |
+
"configurations": [
|
4 |
+
{
|
5 |
+
"name": "Python: FastAPI",
|
6 |
+
"type": "debugpy",
|
7 |
+
"request": "launch",
|
8 |
+
"module": "uvicorn",
|
9 |
+
"args": [
|
10 |
+
"mappingservice.main:app",
|
11 |
+
"--reload"
|
12 |
+
],
|
13 |
+
"justMyCode": false
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"name": "Debug Tests",
|
17 |
+
"type": "debugpy",
|
18 |
+
"justMyCode": false,
|
19 |
+
"request": "launch",
|
20 |
+
"module": "pytest",
|
21 |
+
"console": "integratedTerminal",
|
22 |
+
"env": {
|
23 |
+
"PYTEST_ADDOPTS": "--no-cov"
|
24 |
+
},
|
25 |
+
}
|
26 |
+
]
|
27 |
+
}
|
.vscode/settings.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"python.testing.pytestArgs": [
|
3 |
+
"tests"
|
4 |
+
],
|
5 |
+
"python.testing.unittestEnabled": false,
|
6 |
+
"python.testing.pytestEnabled": true,
|
7 |
+
|
8 |
+
"[python]": {
|
9 |
+
"editor.formatOnSave": true,
|
10 |
+
"editor.codeActionsOnSave": {
|
11 |
+
"source.fixAll": "explicit",
|
12 |
+
"source.organizeImports": "explicit"
|
13 |
+
},
|
14 |
+
"editor.defaultFormatter": "charliermarsh.ruff"
|
15 |
+
}
|
16 |
+
}
|
Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM huggingface/transformers-pytorch-gpu as base
|
2 |
+
RUN pip install fastapi uvicorn pydantic pydantic-settings langdetect accelerate optimum pytest pytest-cov
|
3 |
+
RUN useradd -m -u 1000 user
|
4 |
+
WORKDIR /code
|
5 |
+
|
6 |
+
COPY --chown=user . /code
|
7 |
+
COPY logging.conf /code
|
8 |
+
|
9 |
+
ARG APP_ENV
|
10 |
+
ENV APP_ENV=${APP_ENV}
|
11 |
+
# Conditional commands based on APP_ENV
|
12 |
+
RUN if [ "$APP_ENV" = "dev" ]; then \
|
13 |
+
echo "Force dev dependencies"; \
|
14 |
+
pip install pytest pytest-cov; \
|
15 |
+
fi
|
16 |
+
|
17 |
+
RUN mkdir -p /home/.cache/huggingface
|
18 |
+
RUN mkdir -p /.cache
|
19 |
+
RUN chmod -R 777 /home/.cache/huggingface
|
20 |
+
RUN chmod -R 777 /.cache
|
21 |
+
RUN mkdir -p /code/room_environment-classifier_onxx
|
22 |
+
RUN chmod -R 777 /code/room_environment-classifier_onxx
|
23 |
+
|
24 |
+
# Remove when settings getted from consul
|
25 |
+
# COPY .env /code
|
26 |
+
|
27 |
+
CMD ["uvicorn", "mappingservice.main:app","--host", "0.0.0.0", "--port", "80"]
|
README.md
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Environment
|
2 |
+
Set environment variables in .env file or via shell:
|
3 |
+
|
4 |
+
```bash
|
5 |
+
export API_KEY="APIKEY TO USE THIS API"
|
6 |
+
export HUGGINGFACE_ACCESS_TOKEN="HF READ ACCESS TOKEN"
|
7 |
+
```
|
8 |
+
|
9 |
+
## Consul environment
|
10 |
+
To use the enviroment variables from Consul, set variables in consul.env file or via shell:
|
11 |
+
|
12 |
+
```bash
|
13 |
+
TGX_CONSUL_AUTH_USER="CONSUL AUTH USER"
|
14 |
+
TGX_CONSUL_AUTH_PASS="CONSUL AUTH PASSWORD"
|
15 |
+
TGX_CONSUL_TOKEN="CONSUL TOKEN"
|
16 |
+
TGX_CONSUL_NODES="CONSUL_URLS_SEPARTEDBY_;"
|
17 |
+
```
|
18 |
+
|
19 |
+
# Development
|
20 |
+
Dependencies and run is managed by Poetry
|
21 |
+
|
22 |
+
## Dependencies
|
23 |
+
Use Poetry to Update and install dependencies:
|
24 |
+
|
25 |
+
```bash
|
26 |
+
poetry update
|
27 |
+
poetry install
|
28 |
+
```
|
29 |
+
|
30 |
+
## Build
|
31 |
+
|
32 |
+
### Using docker compose
|
33 |
+
```bash
|
34 |
+
docker compose build --no-cache
|
35 |
+
docker compose up -d
|
36 |
+
```
|
37 |
+
|
38 |
+
*NOTE* Docker compose is set to run on port 8080, change the port in docker-compose.yml if needed.
|
39 |
+
|
40 |
+
### Docker API
|
41 |
+
Check if docker daemon is running:
|
42 |
+
|
43 |
+
```bash
|
44 |
+
sudo systemctl start docker
|
45 |
+
```
|
46 |
+
|
47 |
+
Build docker:
|
48 |
+
|
49 |
+
```bash
|
50 |
+
docker build -t mappingservice .
|
51 |
+
```
|
52 |
+
|
53 |
+
## Run
|
54 |
+
|
55 |
+
|
56 |
+
### Console
|
57 |
+
|
58 |
+
```bash
|
59 |
+
poetry run uvicorn mappingservice.main:app --reload
|
60 |
+
```
|
61 |
+
|
62 |
+
Site runs at http://127.0.0.1:8000/ml-api
|
63 |
+
|
64 |
+
|
65 |
+
### Docker
|
66 |
+
```bash
|
67 |
+
docker run -d --name mycontainer -p 80:80 mappingservice
|
68 |
+
```
|
69 |
+
##### Run
|
70 |
+
|
71 |
+
Send Consul variables on Docker run
|
72 |
+
|
73 |
+
###### API
|
74 |
+
```bash
|
75 |
+
docker run -e TGX_CONSUL_NODES='URL_NODES' -e TGX_CONSUL_TOKEN='TOKEN' -e TGX_CONSUL_AUTH_USER='USER' -e TGX_CONSUL_AUTH_PASS='PWD' -d --name mycontainer -p 80:80 mappingservice
|
76 |
+
```
|
77 |
+
|
78 |
+
### Benchmark
|
79 |
+
Use Poetry to Update and install Locust dependencies:
|
80 |
+
|
81 |
+
```bash
|
82 |
+
poetry update --only benchmark
|
83 |
+
poetry install --only benchmark
|
84 |
+
```
|
85 |
+
|
86 |
+
Run Locust in terminal
|
87 |
+
```bash
|
88 |
+
locust -f ./benchmark/
|
89 |
+
```
|
90 |
+
Site runs at http://localhost:8089/
|
91 |
+
|
benchmark/__init__.py
ADDED
File without changes
|
benchmark/locustfile.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from locust import HttpUser, between, task
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
|
10 |
+
class ApiUser(HttpUser):
|
11 |
+
host = 'http://127.0.0.1:8080/ml-api'
|
12 |
+
wait_time = between(1, 2)
|
13 |
+
|
14 |
+
@task
|
15 |
+
def predict_all(self):
|
16 |
+
headers = {
|
17 |
+
'Api-key': os.getenv('API_KEY'),
|
18 |
+
'Content-Type': 'application/json'
|
19 |
+
}
|
20 |
+
data = {
|
21 |
+
"room_description": "room with sea view",
|
22 |
+
"beds": [
|
23 |
+
{"type": "double", "count": 2}
|
24 |
+
]
|
25 |
+
}
|
26 |
+
print("Headers:", headers)
|
27 |
+
print("Data being sent:", data)
|
28 |
+
with self.client.post("/predict/room/predict/all", headers=headers, data=json.dumps(data), # noqa: E501
|
29 |
+
catch_response=True) as response:
|
30 |
+
print("Response status code:", response.status_code)
|
31 |
+
print("Response data:", response.text)
|
32 |
+
if response.status_code != 200:
|
33 |
+
response.failure(f"Failed with {response.status_code}: {response.text}")
|
34 |
+
else:
|
35 |
+
response.success()
|
docker-compose.yml
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
services:
|
2 |
+
app:
|
3 |
+
image: &app mrml-room-api
|
4 |
+
build:
|
5 |
+
context: .
|
6 |
+
dockerfile: Dockerfile
|
7 |
+
network: host
|
8 |
+
args:
|
9 |
+
tag: local
|
10 |
+
hostname: app_local
|
11 |
+
user: "1000:1000"
|
12 |
+
ports:
|
13 |
+
- 8080:80
|
14 |
+
env_file:
|
15 |
+
- .env
|
16 |
+
volumes:
|
17 |
+
- .:/code
|
18 |
+
networks:
|
19 |
+
- webnet
|
20 |
+
|
21 |
+
networks:
|
22 |
+
webnet:
|
23 |
+
driver: bridge
|
24 |
+
# Uncomment ipam if OpenVPN is causing troubles
|
25 |
+
# ipam:
|
26 |
+
# config:
|
27 |
+
# - subnet: 172.16.57.0/24
|
logging.conf
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[loggers]
|
2 |
+
keys=root
|
3 |
+
|
4 |
+
[logger_root]
|
5 |
+
level=INFO
|
6 |
+
handlers=stream_handler
|
7 |
+
|
8 |
+
[handlers]
|
9 |
+
keys=stream_handler
|
10 |
+
|
11 |
+
[formatters]
|
12 |
+
keys=formatter
|
13 |
+
|
14 |
+
[handler_stream_handler]
|
15 |
+
class=StreamHandler
|
16 |
+
formatter=formatter
|
17 |
+
args=(sys.stdout,)
|
18 |
+
|
19 |
+
[formatter_formatter]
|
20 |
+
format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s
|
mappingservice/__init__.py
ADDED
File without changes
|
mappingservice/config.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from pydantic import SecretStr
|
4 |
+
from pydantic_settings import (
|
5 |
+
BaseSettings,
|
6 |
+
PydanticBaseSettingsSource,
|
7 |
+
SettingsConfigDict,
|
8 |
+
)
|
9 |
+
|
10 |
+
from mappingservice.config_consul import ConsulConfigSettingsSource
|
11 |
+
|
12 |
+
|
13 |
+
class Settings(BaseSettings):
|
14 |
+
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
15 |
+
|
16 |
+
api_key: SecretStr
|
17 |
+
huggingface_access_token: SecretStr
|
18 |
+
hf_hub_cache: str
|
19 |
+
app_env: str = "prod"
|
20 |
+
|
21 |
+
@classmethod
|
22 |
+
def settings_customise_sources(
|
23 |
+
cls,
|
24 |
+
settings_cls,
|
25 |
+
init_settings: PydanticBaseSettingsSource,
|
26 |
+
env_settings: PydanticBaseSettingsSource,
|
27 |
+
dotenv_settings: PydanticBaseSettingsSource,
|
28 |
+
file_secret_settings: PydanticBaseSettingsSource,
|
29 |
+
):
|
30 |
+
settings_list = [
|
31 |
+
init_settings,
|
32 |
+
env_settings,
|
33 |
+
dotenv_settings,
|
34 |
+
file_secret_settings,
|
35 |
+
ConsulConfigSettingsSource(settings_cls),
|
36 |
+
]
|
37 |
+
|
38 |
+
if os.getenv("APP_ENV", "prod").lower() == "dev":
|
39 |
+
del settings_list[-1]
|
40 |
+
|
41 |
+
return tuple(settings_list)
|
mappingservice/config_consul.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging.config
|
3 |
+
import random
|
4 |
+
from typing import Any
|
5 |
+
|
6 |
+
import requests
|
7 |
+
from pydantic import HttpUrl, SecretStr
|
8 |
+
from pydantic.fields import FieldInfo
|
9 |
+
from pydantic_settings import (
|
10 |
+
BaseSettings,
|
11 |
+
PydanticBaseSettingsSource,
|
12 |
+
SettingsConfigDict,
|
13 |
+
)
|
14 |
+
from requests.exceptions import HTTPError, RequestException, Timeout
|
15 |
+
|
16 |
+
logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
logger.info("Logger Configured: config_consul")
|
19 |
+
|
20 |
+
|
21 |
+
class ConsulSettings(BaseSettings):
|
22 |
+
model_config = SettingsConfigDict(env_file="consul.env", env_file_encoding="utf-8")
|
23 |
+
tgx_consul_nodes: str
|
24 |
+
tgx_consul_auth_user: str
|
25 |
+
tgx_consul_auth_pass: SecretStr
|
26 |
+
tgx_consul_token: SecretStr
|
27 |
+
|
28 |
+
|
29 |
+
class ConsulClient:
|
30 |
+
def __init__(self, settings: ConsulSettings):
|
31 |
+
logger.info(
|
32 |
+
f"Consul init:[{settings.tgx_consul_nodes}], "
|
33 |
+
f"user:[{settings.tgx_consul_auth_user}]"
|
34 |
+
)
|
35 |
+
|
36 |
+
# Random get a valid url for consul
|
37 |
+
consul_nodes = settings.tgx_consul_nodes.split(";")
|
38 |
+
consul_url: HttpUrl = random.choice(consul_nodes)
|
39 |
+
|
40 |
+
self.headers = {
|
41 |
+
"X-Consul-Token": f"{settings.tgx_consul_token.get_secret_value()}"
|
42 |
+
}
|
43 |
+
self.auth = (
|
44 |
+
settings.tgx_consul_auth_user,
|
45 |
+
settings.tgx_consul_auth_pass.get_secret_value(),
|
46 |
+
)
|
47 |
+
self.url = consul_url
|
48 |
+
self.timeout = 20
|
49 |
+
|
50 |
+
def get_key_value(self, key: str, dc: str) -> json:
|
51 |
+
url = f"{self.url}/v1/kv/{key}?dc={dc}&raw=false"
|
52 |
+
logger.info(f"Donwloading keys from Consul: [{url}, {key}]")
|
53 |
+
response = requests.get(
|
54 |
+
url=url, headers=self.headers, auth=self.auth, timeout=self.timeout
|
55 |
+
)
|
56 |
+
response.raise_for_status()
|
57 |
+
return response.json()
|
58 |
+
|
59 |
+
|
60 |
+
class ConsulConfigSettingsSource(PydanticBaseSettingsSource):
|
61 |
+
def get_field_value(
|
62 |
+
self, field: FieldInfo, field_name: str
|
63 |
+
):
|
64 |
+
pass
|
65 |
+
|
66 |
+
def prepare_field_value(
|
67 |
+
self, field_name: str, field: FieldInfo, value: Any, value_is_complex: bool
|
68 |
+
) -> Any:
|
69 |
+
pass
|
70 |
+
|
71 |
+
def __call__(self):
|
72 |
+
"""
|
73 |
+
Customize settings soucre from Consul
|
74 |
+
Needs url and secrets to access consul to retrieve settings
|
75 |
+
"""
|
76 |
+
CONSUL_KEY = "mapping/apps/room/ml-api/config"
|
77 |
+
|
78 |
+
consul_settings = ConsulSettings()
|
79 |
+
consul_cli = ConsulClient(consul_settings)
|
80 |
+
|
81 |
+
logger.info(f"Getting consul key value: [{CONSUL_KEY}]")
|
82 |
+
ret_json = {}
|
83 |
+
try:
|
84 |
+
ret_json = consul_cli.get_key_value(CONSUL_KEY, "gc-we1d")
|
85 |
+
except (Timeout, HTTPError, RequestException) as err:
|
86 |
+
logger.warn(f"Error get from Consul {err}")
|
87 |
+
return {}
|
88 |
+
|
89 |
+
if not ret_json:
|
90 |
+
logger.warn(f"Consul key not found: [{CONSUL_KEY}]", "gc-we1d")
|
91 |
+
return {}
|
92 |
+
else:
|
93 |
+
logger.info("Consul key read succesfully")
|
94 |
+
return ret_json
|
95 |
+
# convert Service Account json to string
|
96 |
+
# ret_json["tgx_pubsub_events_credentials_service_account_json"] = json.dumps(
|
97 |
+
# ret_json["tgx_pubsub_events_credentials_service_account_json"])
|
mappingservice/constants.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
AVAILABLE_LANGUAGES = ["en", "es", "na"] # na is a placeholder for the language detection model # noqa: E501
|
2 |
+
|
3 |
+
DEFAULT_LABEL = "No data"
|
4 |
+
|
5 |
+
DEFAULT_SCORE = 0.0
|
6 |
+
|
7 |
+
MODEL_NAMES = {
|
8 |
+
"bed_type": {
|
9 |
+
"classification": "text-classification",
|
10 |
+
"en": "travelgate/bed_type_model-classifier",
|
11 |
+
"es": "travelgate/bed_type_model-classifier",
|
12 |
+
},
|
13 |
+
"room_category": {
|
14 |
+
"classification": "text-classification",
|
15 |
+
"en": "travelgate/room_category-classifier",
|
16 |
+
"es": "travelgate/room_category-classifier",
|
17 |
+
},
|
18 |
+
"environment": {
|
19 |
+
"classification": "text-classification",
|
20 |
+
"en": "travelgate/room_environment-classifier",
|
21 |
+
"es": "travelgate/room_environment-classifier",
|
22 |
+
},
|
23 |
+
"room_features": {
|
24 |
+
"classification": "token-classification",
|
25 |
+
"en": "travelgate/feature_predicted_en",
|
26 |
+
"es": "travelgate/feature_predicted_es",
|
27 |
+
},
|
28 |
+
"room_type": {
|
29 |
+
"classification": "text-classification",
|
30 |
+
"en": "travelgate/room_type-classifier",
|
31 |
+
"es": "travelgate/room_type-classifier"
|
32 |
+
},
|
33 |
+
"room_view": {
|
34 |
+
"classification": "token-classification",
|
35 |
+
"en": "travelgate/view_predicted_en",
|
36 |
+
"es": "travelgate/view_predicted_es"
|
37 |
+
},
|
38 |
+
"lang_detect": {
|
39 |
+
"classification": "text-classification",
|
40 |
+
"na": "papluca/xlm-roberta-base-language-detection", # use na as a placeholder
|
41 |
+
}
|
42 |
+
}
|
43 |
+
|
44 |
+
SPANISH_KEYWORDS = [
|
45 |
+
"habitacion", "cama", "bano", "aire acondicionado", "television", "desayuno",
|
46 |
+
"vista", "servicio", "caja fuerte", "ducha", "sabanas", "toallas",
|
47 |
+
"limpieza", "recepcion", "piscina", "gimnasio", "ascensor", "estacionamiento",
|
48 |
+
"secador de pelo", "armario", "escritorio", "telefono", "decoracion", "silla",
|
49 |
+
"mesa", "luz", "ventana", "apartamento", "casa", "vistas", "presidencial",
|
50 |
+
"estandar", "estudio", "terraza", "doble"
|
51 |
+
]
|
52 |
+
|
53 |
+
ENGLISH_KEYWORDS = {
|
54 |
+
"apartment", "room", "house", "views", "presidential", "studio", "suite",
|
55 |
+
"standard", "deluxe"
|
56 |
+
}
|
mappingservice/dependencies.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging.config
|
2 |
+
from functools import lru_cache
|
3 |
+
|
4 |
+
from fastapi import HTTPException, Security, status
|
5 |
+
from fastapi.security.api_key import APIKeyHeader
|
6 |
+
|
7 |
+
from mappingservice import config
|
8 |
+
|
9 |
+
logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
@lru_cache
|
14 |
+
def get_settings() -> config.Settings:
|
15 |
+
return config.Settings()
|
16 |
+
|
17 |
+
|
18 |
+
API_KEY = get_settings().api_key.get_secret_value()
|
19 |
+
api_key_header_auth = APIKeyHeader(name="Api-key", auto_error=True)
|
20 |
+
mc = {}
|
21 |
+
|
22 |
+
|
23 |
+
def get_api_key(api_key_header: str = Security(api_key_header_auth)):
|
24 |
+
if api_key_header != API_KEY:
|
25 |
+
raise HTTPException(
|
26 |
+
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key"
|
27 |
+
)
|
mappingservice/main.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging.config
|
2 |
+
import time
|
3 |
+
from contextlib import asynccontextmanager
|
4 |
+
|
5 |
+
import psutil
|
6 |
+
from fastapi import Depends, FastAPI, Request
|
7 |
+
from fastapi.responses import JSONResponse
|
8 |
+
|
9 |
+
import mappingservice.dependencies as deps
|
10 |
+
from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
|
11 |
+
from mappingservice.dependencies import get_api_key, get_settings
|
12 |
+
from mappingservice.ms.model_loader import ModelLoader
|
13 |
+
from mappingservice.routers import admin, room
|
14 |
+
from mappingservice.utils import log_memory_usage, predict_language
|
15 |
+
|
16 |
+
logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
|
20 |
+
@asynccontextmanager
|
21 |
+
async def lifespan(application: FastAPI):
|
22 |
+
# Load the ML models
|
23 |
+
settings = get_settings()
|
24 |
+
initial_memory = psutil.Process().memory_info().rss
|
25 |
+
|
26 |
+
start_time = time.time()
|
27 |
+
|
28 |
+
ml_model = ModelLoader(settings, MODEL_NAMES)
|
29 |
+
for k in MODEL_NAMES:
|
30 |
+
logger.info(f"Loading model: {k}...")
|
31 |
+
for lang in AVAILABLE_LANGUAGES:
|
32 |
+
model_pipeline = ml_model.get_model(k, lang)
|
33 |
+
try:
|
34 |
+
deps.mc[k][lang] = model_pipeline
|
35 |
+
except KeyError:
|
36 |
+
deps.mc[k] = {}
|
37 |
+
deps.mc[k][lang] = model_pipeline
|
38 |
+
|
39 |
+
elapsed_time = time.time() - start_time
|
40 |
+
num_cores = psutil.cpu_count(logical=True)
|
41 |
+
total_ram = psutil.virtual_memory().total / (1024 ** 3) # Convert to GB
|
42 |
+
final_memory = psutil.Process().memory_info().rss
|
43 |
+
total_memory_used = (final_memory - initial_memory) / (1024 ** 3) # Convert to GB
|
44 |
+
|
45 |
+
logger.info("*" * 60) # ASCII Art
|
46 |
+
logger.info(f"* Number of Cores: {num_cores}")
|
47 |
+
logger.info(f"* Total RAM: {total_ram:.2f} GB")
|
48 |
+
|
49 |
+
logger.info(
|
50 |
+
f"* AI Models loaded in {elapsed_time:.2f} seconds, "
|
51 |
+
f"using {total_memory_used:.2f} GB"
|
52 |
+
)
|
53 |
+
logger.info("*" * 60) # ASCII Art
|
54 |
+
|
55 |
+
yield
|
56 |
+
# Clean up the ML models and release the resources
|
57 |
+
deps.mc.clear()
|
58 |
+
|
59 |
+
|
60 |
+
app = FastAPI(root_path="/ml-api", lifespan=lifespan)
|
61 |
+
|
62 |
+
|
63 |
+
@app.middleware("http")
|
64 |
+
async def handle_exceptions(request: Request, call_next):
|
65 |
+
try:
|
66 |
+
response = await call_next(request)
|
67 |
+
|
68 |
+
return response
|
69 |
+
except Exception as e:
|
70 |
+
logging.error(f"Unhandled error: {e}")
|
71 |
+
logging.exception(e)
|
72 |
+
log_memory_usage()
|
73 |
+
|
74 |
+
return JSONResponse(status_code=500, content={
|
75 |
+
"message": "Internal Server Error"
|
76 |
+
})
|
77 |
+
finally:
|
78 |
+
log_memory_usage()
|
79 |
+
|
80 |
+
|
81 |
+
@app.middleware("http")
|
82 |
+
async def detect_language(request: Request, call_next):
|
83 |
+
room_description = request.query_params.get("room_description", "")
|
84 |
+
language = "en"
|
85 |
+
|
86 |
+
if not room_description:
|
87 |
+
try:
|
88 |
+
body = await request.json()
|
89 |
+
room_description = body.get("room_description", "")
|
90 |
+
except ValueError:
|
91 |
+
# If the room description is still empty, continue with the request as
|
92 |
+
# the language detection is not required
|
93 |
+
if not room_description:
|
94 |
+
response = await call_next(request)
|
95 |
+
return response
|
96 |
+
|
97 |
+
try:
|
98 |
+
language = predict_language(room_description)
|
99 |
+
|
100 |
+
if language not in AVAILABLE_LANGUAGES:
|
101 |
+
logger.error(f"Unsupported language for room description: {room_description}. Falling back to model prediction.") # noqa: E501
|
102 |
+
|
103 |
+
language = deps.mc['lang_detect']['na'].predict(room_description)[0][0]['label'] # noqa: E501
|
104 |
+
if language not in AVAILABLE_LANGUAGES:
|
105 |
+
logger.error(f"Unsupported language for room description using model prediction: {room_description}. Falling back to English.") # noqa: E501
|
106 |
+
language = "en"
|
107 |
+
except Exception as e:
|
108 |
+
logger.error(f"Error detecting language: {e}")
|
109 |
+
|
110 |
+
request.state.predicted_language = language
|
111 |
+
|
112 |
+
response = await call_next(request)
|
113 |
+
return response
|
114 |
+
|
115 |
+
|
116 |
+
app.include_router(room.router, dependencies=[Depends(get_api_key)])
|
117 |
+
app.include_router(admin.router)
|
mappingservice/models.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging.config
|
2 |
+
from enum import Enum
|
3 |
+
from typing import List, Union
|
4 |
+
|
5 |
+
from pydantic import BaseModel, validator
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
|
10 |
+
class PredictionResponse(BaseModel):
|
11 |
+
label: str
|
12 |
+
score: float
|
13 |
+
|
14 |
+
|
15 |
+
class BedType(str, Enum):
|
16 |
+
none = ""
|
17 |
+
single = "single"
|
18 |
+
double = "double"
|
19 |
+
queen = "queen"
|
20 |
+
king = "king"
|
21 |
+
california_king = "california_king"
|
22 |
+
bunk = "bunk"
|
23 |
+
sofa = "sofa"
|
24 |
+
rollaway = "rollaway"
|
25 |
+
futon = "futon"
|
26 |
+
|
27 |
+
|
28 |
+
class BedData(BaseModel):
|
29 |
+
type: Union[BedType, None] = None
|
30 |
+
count: Union[int, None] = None
|
31 |
+
|
32 |
+
@validator("count", pre=True, always=True)
|
33 |
+
def validate_count(cls, v):
|
34 |
+
return v if v and v > 0 else None
|
35 |
+
|
36 |
+
@validator("type", pre=True, always=True)
|
37 |
+
def validate_type(cls, v):
|
38 |
+
try:
|
39 |
+
return BedType(v.lower()) if v else None
|
40 |
+
except ValueError:
|
41 |
+
return None
|
42 |
+
|
43 |
+
@classmethod
|
44 |
+
def create_valid_bed(cls, type: Union[str, None], count: Union[int, None]):
|
45 |
+
if type and count and count > 0:
|
46 |
+
try:
|
47 |
+
type_enum = BedType(type.lower())
|
48 |
+
return cls(type=type_enum, count=count)
|
49 |
+
except ValueError:
|
50 |
+
logger.error(f"Invalid bed type: {type}")
|
51 |
+
return None
|
52 |
+
|
53 |
+
def __init__(self, **data):
|
54 |
+
variations = {
|
55 |
+
"individual": BedType.single,
|
56 |
+
"camaindividual": BedType.single,
|
57 |
+
"doble": BedType.double,
|
58 |
+
"camadoble": BedType.double,
|
59 |
+
"reina": BedType.queen,
|
60 |
+
"queenbed": BedType.queen,
|
61 |
+
"rey": BedType.king,
|
62 |
+
"californiaking": BedType.california_king,
|
63 |
+
"litera": BedType.bunk,
|
64 |
+
"sofá": BedType.sofa,
|
65 |
+
"sofacama": BedType.sofa,
|
66 |
+
"plegable": BedType.rollaway,
|
67 |
+
"futón": BedType.futon,
|
68 |
+
"twin": BedType.single,
|
69 |
+
"twinbed": BedType.single,
|
70 |
+
"singlebed": BedType.single,
|
71 |
+
"doublebed": BedType.double,
|
72 |
+
"largedouble": BedType.queen,
|
73 |
+
"extralargedouble": BedType.king,
|
74 |
+
"bunkbed": BedType.bunk,
|
75 |
+
"couch": BedType.sofa,
|
76 |
+
"airmattress": BedType.futon,
|
77 |
+
"floormattress": BedType.futon,
|
78 |
+
}
|
79 |
+
|
80 |
+
bed_type = data.get("type")
|
81 |
+
if bed_type:
|
82 |
+
normalized_bed_type = bed_type.replace(" ", "").lower()
|
83 |
+
data["type"] = variations.get(normalized_bed_type, bed_type)
|
84 |
+
super().__init__(**data)
|
85 |
+
|
86 |
+
|
87 |
+
class RoomData(BaseModel):
|
88 |
+
room_description: str
|
89 |
+
beds: Union[BedData, None]
|
90 |
+
|
91 |
+
|
92 |
+
class Predictions(BaseModel):
|
93 |
+
type: PredictionResponse
|
94 |
+
category: PredictionResponse
|
95 |
+
environment: List[PredictionResponse]
|
96 |
+
feature: List[PredictionResponse]
|
97 |
+
view: List[PredictionResponse]
|
98 |
+
language_detected: str
|
99 |
+
beds: List[BedData]
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
class AllPredictionsResponse(BaseModel):
|
104 |
+
predictions: Predictions
|
105 |
+
|
mappingservice/ms/__init__.py
ADDED
File without changes
|
mappingservice/ms/ml_models/__init__.py
ADDED
File without changes
|
mappingservice/ms/ml_models/base.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
|
4 |
+
class Base:
|
5 |
+
|
6 |
+
CONFIDENCE_THRESHOLD = 0.95
|
7 |
+
|
8 |
+
def predict(self, data, pipeline, language):
|
9 |
+
raise NotImplementedError()
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def process_ner_results(ner_results, confidence_threshold):
|
13 |
+
processed_entities = []
|
14 |
+
current_entity_group = []
|
15 |
+
current_entity_score = 0
|
16 |
+
|
17 |
+
for result in ner_results:
|
18 |
+
if result["score"] < confidence_threshold:
|
19 |
+
continue
|
20 |
+
|
21 |
+
if result["word"].startswith("##"):
|
22 |
+
if current_entity_group:
|
23 |
+
current_entity_group[-1] += result["word"][2:]
|
24 |
+
continue
|
25 |
+
|
26 |
+
if current_entity_group:
|
27 |
+
average_score = current_entity_score / len(current_entity_group)
|
28 |
+
processed_entities.append({
|
29 |
+
"word": " ".join(current_entity_group),
|
30 |
+
"score": average_score
|
31 |
+
})
|
32 |
+
current_entity_group = []
|
33 |
+
current_entity_score = 0
|
34 |
+
|
35 |
+
current_entity_group.append(result["word"])
|
36 |
+
current_entity_score += result["score"]
|
37 |
+
|
38 |
+
if current_entity_group:
|
39 |
+
average_score = current_entity_score / len(current_entity_group)
|
40 |
+
processed_entities.append({
|
41 |
+
"word": " ".join(current_entity_group),
|
42 |
+
"score": average_score
|
43 |
+
})
|
44 |
+
|
45 |
+
return processed_entities
|
46 |
+
|
47 |
+
@staticmethod
|
48 |
+
def preprocess_data(data, language):
|
49 |
+
translations = {
|
50 |
+
"deluxe": "de lujo",
|
51 |
+
"twin": "individual",
|
52 |
+
"size": "tamaño",
|
53 |
+
"premium": "calidad",
|
54 |
+
"double": "doble",
|
55 |
+
"room": "habitacion"
|
56 |
+
}
|
57 |
+
|
58 |
+
if language == "es":
|
59 |
+
for key, value in translations.items():
|
60 |
+
data = data.replace(key, value)
|
61 |
+
|
62 |
+
data = re.sub(r'\b\w{1,2}\b', '', data)
|
63 |
+
data = re.sub(r'\b\d+\b', '', data)
|
64 |
+
data = re.sub(r'\s+', ' ', data).strip() # Elimina espacios extra y espacios al inicio/final # noqa E501
|
65 |
+
|
66 |
+
return data
|
mappingservice/ms/ml_models/bed_type.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
from .base import Base
|
4 |
+
|
5 |
+
|
6 |
+
class BedType(Base):
|
7 |
+
|
8 |
+
def predict(self, data, pipeline, language):
|
9 |
+
return pipeline.predict(data)[0][0]
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def extract_bed_numbers(description):
|
13 |
+
pattern = re.compile(
|
14 |
+
r"\b(\d+|\bone\b|\btwo\b|\bthree\b|\buno\b|\bdos\b|\btres\b)\b",
|
15 |
+
re.IGNORECASE,
|
16 |
+
)
|
17 |
+
matches = pattern.findall(description)
|
18 |
+
word_to_number = {"one": 1, "two": 2, "three": 3, "uno": 1, "dos": 2, "tres": 3}
|
19 |
+
bed_numbers = []
|
20 |
+
for match in matches:
|
21 |
+
if match.isdigit():
|
22 |
+
bed_numbers.append(int(match))
|
23 |
+
else:
|
24 |
+
bed_numbers.append(word_to_number.get(match.lower(), 1))
|
25 |
+
return bed_numbers
|
mappingservice/ms/ml_models/environment.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from .base import Base
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
|
8 |
+
class Environment(Base):
|
9 |
+
|
10 |
+
CONFIDENCE_THRESHOLD = 0.5
|
11 |
+
|
12 |
+
def predict(self, data, pipeline, language):
|
13 |
+
data = data.replace("connecting", "connected")
|
14 |
+
|
15 |
+
scores = pipeline.predict(data)[0]
|
16 |
+
|
17 |
+
predicted_results = []
|
18 |
+
for result in scores:
|
19 |
+
if result["score"] > self.CONFIDENCE_THRESHOLD:
|
20 |
+
predicted_results.append(
|
21 |
+
{
|
22 |
+
"label": result["label"],
|
23 |
+
"score": result["score"],
|
24 |
+
"language_detected": language,
|
25 |
+
}
|
26 |
+
)
|
27 |
+
|
28 |
+
logger.info(f"Predicted Labels and Scores: {predicted_results}")
|
29 |
+
|
30 |
+
return predicted_results
|
mappingservice/ms/ml_models/room_category.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .base import Base
|
2 |
+
|
3 |
+
|
4 |
+
class RoomCategory(Base):
|
5 |
+
|
6 |
+
CONFIDENCE_THRESHOLD = 0.7
|
7 |
+
|
8 |
+
def predict(self, data: str, pipeline, language) -> dict:
|
9 |
+
predictions = pipeline.predict(data)
|
10 |
+
|
11 |
+
if predictions and predictions[0][0]["score"] > self.CONFIDENCE_THRESHOLD:
|
12 |
+
return {
|
13 |
+
"language_detected": language,
|
14 |
+
"label": predictions[0][0]["label"],
|
15 |
+
"score": predictions[0][0]["score"],
|
16 |
+
}
|
17 |
+
else:
|
18 |
+
score = predictions[0][0]["score"] if predictions else 0
|
19 |
+
return {
|
20 |
+
"language_detected": language,
|
21 |
+
"label": "default",
|
22 |
+
"score": score
|
23 |
+
}
|
mappingservice/ms/ml_models/room_features.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .base import Base
|
2 |
+
|
3 |
+
|
4 |
+
class RoomFeatures(Base):
|
5 |
+
|
6 |
+
CONFIDENCE_THRESHOLD = 0.5
|
7 |
+
|
8 |
+
def predict(self, data, pipeline, language):
|
9 |
+
preprocessed_data = self.preprocess_data(data, language)
|
10 |
+
|
11 |
+
results = pipeline.predict(preprocessed_data)
|
12 |
+
|
13 |
+
return {
|
14 |
+
"language_detected": language,
|
15 |
+
"features": self.process_ner_results(
|
16 |
+
results, self.CONFIDENCE_THRESHOLD
|
17 |
+
),
|
18 |
+
}
|
19 |
+
|
mappingservice/ms/ml_models/room_type.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .base import Base
|
2 |
+
|
3 |
+
|
4 |
+
class RoomType(Base):
|
5 |
+
|
6 |
+
CONFIDENCE_THRESHOLD = 0.7
|
7 |
+
|
8 |
+
def predict(self, data: str, pipeline, language) -> dict:
|
9 |
+
predictions = pipeline.predict(data)
|
10 |
+
|
11 |
+
if predictions and predictions[0][0]["score"] > self.CONFIDENCE_THRESHOLD:
|
12 |
+
return {
|
13 |
+
"language_detected": language,
|
14 |
+
"label": predictions[0][0]["label"],
|
15 |
+
"score": predictions[0][0]["score"],
|
16 |
+
}
|
17 |
+
else:
|
18 |
+
score = predictions[0][0]["score"] if predictions else 0
|
19 |
+
return {
|
20 |
+
"language_detected": language,
|
21 |
+
"label": "default",
|
22 |
+
"score": score
|
23 |
+
}
|
mappingservice/ms/ml_models/room_view.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .base import Base
|
2 |
+
|
3 |
+
|
4 |
+
class RoomView(Base):
|
5 |
+
|
6 |
+
CONFIDENCE_THRESHOLD = 0.6
|
7 |
+
|
8 |
+
def predict(self, data, pipeline, language):
|
9 |
+
preprocessed_data = self.preprocess_data(data, language)
|
10 |
+
ner_results = pipeline.predict(preprocessed_data)
|
11 |
+
|
12 |
+
return {
|
13 |
+
"language_detected": language,
|
14 |
+
"has_views": bool(ner_results),
|
15 |
+
"views": self.process_ner_results(
|
16 |
+
ner_results, self.CONFIDENCE_THRESHOLD
|
17 |
+
),
|
18 |
+
}
|
mappingservice/ms/model_loader.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import time
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from optimum.bettertransformer import BetterTransformer
|
6 |
+
from transformers import (
|
7 |
+
AutoModelForSequenceClassification,
|
8 |
+
AutoModelForTokenClassification,
|
9 |
+
AutoTokenizer,
|
10 |
+
pipeline,
|
11 |
+
)
|
12 |
+
|
13 |
+
from mappingservice.config import Settings
|
14 |
+
|
15 |
+
|
16 |
+
class ModelLoader:
|
17 |
+
def __init__(self, settings: Settings, model_names):
|
18 |
+
self.settings = settings
|
19 |
+
self.model_names = model_names
|
20 |
+
self.model_cache = {}
|
21 |
+
self.token = self.settings.huggingface_access_token.get_secret_value()
|
22 |
+
self.classification = "token-classification"
|
23 |
+
|
24 |
+
def load_model(self, model_name):
|
25 |
+
start_time = time.time()
|
26 |
+
if self.classification == "token-classification":
|
27 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name, token=self.token) # noqa: E501
|
28 |
+
else:
|
29 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name, token=self.token) # noqa: E501
|
30 |
+
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.token)
|
32 |
+
model = BetterTransformer.transform(model)
|
33 |
+
end_time = time.time()
|
34 |
+
logging.info(f"Model {model_name} loaded in {end_time - start_time:.2f} seconds.") # noqa: E501
|
35 |
+
|
36 |
+
return model, tokenizer
|
37 |
+
|
38 |
+
def get_model(self, model_name, language):
|
39 |
+
mc = self.model_cache.get(model_name) or {}
|
40 |
+
if language in mc:
|
41 |
+
logging.info(f"Using cached model for language: {language}")
|
42 |
+
return self.model_cache[model_name][language]
|
43 |
+
|
44 |
+
self.classification = self.model_names.get(model_name).get("classification")
|
45 |
+
|
46 |
+
model_name = self.model_names.get(model_name).get(language)
|
47 |
+
|
48 |
+
if not model_name:
|
49 |
+
logging.warning(f"Unsupported language: {language}")
|
50 |
+
return None
|
51 |
+
|
52 |
+
model, tokenizer = self.load_model(model_name)
|
53 |
+
|
54 |
+
pipeline_dict = {
|
55 |
+
'task': self.classification,
|
56 |
+
'model': model,
|
57 |
+
'tokenizer': tokenizer,
|
58 |
+
'token': self.token,
|
59 |
+
'device': 0 if torch.cuda.is_available() else -1,
|
60 |
+
}
|
61 |
+
|
62 |
+
if self.classification == "token-classification":
|
63 |
+
pipeline_dict.update({'framework': 'pt'})
|
64 |
+
|
65 |
+
if self.classification == "text-classification":
|
66 |
+
pipeline_dict.update({'top_k': 1})
|
67 |
+
|
68 |
+
model_pipeline = pipeline(**pipeline_dict)
|
69 |
+
|
70 |
+
self.model_cache[model_name] = {}
|
71 |
+
self.model_cache[model_name][language] = model_pipeline
|
72 |
+
|
73 |
+
return model_pipeline
|
mappingservice/routers/__init__.py
ADDED
File without changes
|
mappingservice/routers/admin.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
|
3 |
+
router = APIRouter(tags=["admin"])
|
4 |
+
|
5 |
+
|
6 |
+
@router.get("/health")
|
7 |
+
async def health():
|
8 |
+
return {"health": "OK"}
|
9 |
+
|
10 |
+
|
11 |
+
@router.get("/status")
|
12 |
+
async def status():
|
13 |
+
return {"status": "OK"}
|
mappingservice/routers/room.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import time
|
3 |
+
from concurrent.futures import ThreadPoolExecutor
|
4 |
+
|
5 |
+
from fastapi import APIRouter, Query, Request
|
6 |
+
|
7 |
+
from mappingservice.constants import DEFAULT_LABEL, DEFAULT_SCORE
|
8 |
+
from mappingservice.dependencies import (
|
9 |
+
mc,
|
10 |
+
)
|
11 |
+
from mappingservice.models import (
|
12 |
+
AllPredictionsResponse,
|
13 |
+
PredictionResponse,
|
14 |
+
Predictions,
|
15 |
+
RoomData,
|
16 |
+
)
|
17 |
+
from mappingservice.ms.ml_models.bed_type import BedType as BedTypeModel
|
18 |
+
from mappingservice.ms.ml_models.environment import Environment
|
19 |
+
from mappingservice.ms.ml_models.room_category import RoomCategory
|
20 |
+
from mappingservice.ms.ml_models.room_features import RoomFeatures
|
21 |
+
from mappingservice.ms.ml_models.room_type import RoomType
|
22 |
+
from mappingservice.ms.ml_models.room_view import RoomView
|
23 |
+
from mappingservice.utils import (
|
24 |
+
get_bed_predictions,
|
25 |
+
process_predictions,
|
26 |
+
safe_round,
|
27 |
+
)
|
28 |
+
|
29 |
+
logging.basicConfig(
|
30 |
+
level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
|
31 |
+
)
|
32 |
+
logger = logging.getLogger(__name__)
|
33 |
+
|
34 |
+
router = APIRouter(
|
35 |
+
prefix="/predict/room", tags=["room"], responses={404: {"description": "Not found"}}
|
36 |
+
)
|
37 |
+
|
38 |
+
|
39 |
+
def get_room_type_prediction(room_description: str, language: str = "en"):
|
40 |
+
pipeline = mc['room_type'][language]
|
41 |
+
model = BedTypeModel()
|
42 |
+
return {"msg": model.predict(room_description, pipeline, language)}
|
43 |
+
|
44 |
+
|
45 |
+
def get_view_prediction(room_description: str, language: str = "en"):
|
46 |
+
pipeline = mc['room_view'][language]
|
47 |
+
model = RoomView()
|
48 |
+
return {"view_prediction": model.predict(room_description, pipeline, language)}
|
49 |
+
|
50 |
+
|
51 |
+
def get_room_category_prediction(room_description: str, language: str = "en"):
|
52 |
+
pipeline = mc['room_category'][language]
|
53 |
+
model = RoomCategory()
|
54 |
+
return {"msg": model.predict(room_description, pipeline, language)}
|
55 |
+
|
56 |
+
|
57 |
+
def get_feature_prediction(room_description: str, language: str = "en"):
|
58 |
+
pipeline = mc['room_features'][language]
|
59 |
+
model = RoomFeatures()
|
60 |
+
return {"feature_prediction": model.predict(room_description, pipeline, language)}
|
61 |
+
|
62 |
+
|
63 |
+
def get_room_environment_prediction(room_description: str, language: str = "en"):
|
64 |
+
pipeline = mc['environment'][language]
|
65 |
+
model = Environment()
|
66 |
+
return {"msg": model.predict(room_description, pipeline, language)}
|
67 |
+
|
68 |
+
|
69 |
+
@router.post("/predict/beds")
|
70 |
+
async def predict_beds(request: Request, room_description: str = Query(...)): # noqa: E501
|
71 |
+
language = request.state.predicted_language
|
72 |
+
pipeline = mc['bed_type'][language]
|
73 |
+
model = BedTypeModel()
|
74 |
+
prediction = model.predict(room_description, pipeline, language)
|
75 |
+
|
76 |
+
return prediction
|
77 |
+
|
78 |
+
|
79 |
+
@router.get("/type")
|
80 |
+
async def predict_room_type_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
|
81 |
+
language = request.state.predicted_language
|
82 |
+
pipeline = mc['room_type'][language]
|
83 |
+
model = RoomType()
|
84 |
+
prediction = model.predict(room_description, pipeline, language)
|
85 |
+
|
86 |
+
return prediction
|
87 |
+
|
88 |
+
|
89 |
+
@router.get("/category")
|
90 |
+
async def predict_room_category_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
|
91 |
+
prediction = mc['room_category']['en'].predict(room_description)
|
92 |
+
return prediction
|
93 |
+
|
94 |
+
|
95 |
+
@router.get("/environment")
|
96 |
+
async def predict_room_environment_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
|
97 |
+
prediction = mc['environment']['en'].predict(room_description)
|
98 |
+
return prediction
|
99 |
+
|
100 |
+
|
101 |
+
@router.get("/view")
|
102 |
+
async def predict_view_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
|
103 |
+
prediction = mc['room_view']['en'].predict(room_description)
|
104 |
+
return prediction
|
105 |
+
|
106 |
+
|
107 |
+
@router.get("/feature")
|
108 |
+
async def predict_feature_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
|
109 |
+
prediction = mc['room_features']['en'].predict(room_description)
|
110 |
+
return prediction
|
111 |
+
|
112 |
+
|
113 |
+
@router.post("/predict/all", response_model=AllPredictionsResponse)
|
114 |
+
async def predict_all(request: Request, room_data: RoomData):
|
115 |
+
start_time = time.time()
|
116 |
+
room_data = RoomData(**await request.json())
|
117 |
+
language = request.state.predicted_language
|
118 |
+
|
119 |
+
with ThreadPoolExecutor() as executor:
|
120 |
+
type_future = executor.submit(
|
121 |
+
get_room_type_prediction, room_data.room_description, language
|
122 |
+
)
|
123 |
+
category_future = executor.submit(
|
124 |
+
get_room_category_prediction, room_data.room_description, language
|
125 |
+
)
|
126 |
+
environment_future = executor.submit(
|
127 |
+
get_room_environment_prediction, room_data.room_description, language
|
128 |
+
)
|
129 |
+
feature_future = executor.submit(
|
130 |
+
get_feature_prediction, room_data.room_description, language
|
131 |
+
)
|
132 |
+
view_future = executor.submit(
|
133 |
+
get_view_prediction, room_data.room_description, language
|
134 |
+
)
|
135 |
+
|
136 |
+
type_pred = type_future.result()["msg"]
|
137 |
+
category_pred = category_future.result()["msg"]
|
138 |
+
environment_pred_results = environment_future.result()["msg"]
|
139 |
+
feature_pred_results = feature_future.result()["feature_prediction"]
|
140 |
+
view_pred_results = view_future.result()["view_prediction"]
|
141 |
+
|
142 |
+
bed_predictions = [
|
143 |
+
bed_data for bed_data in room_data.beds if bed_data.type is not None and bed_data.count is not None # noqa: E501
|
144 |
+
]
|
145 |
+
|
146 |
+
if not bed_predictions:
|
147 |
+
logger.debug("No bed data provided or valid; extracting from description.")
|
148 |
+
extracted_beds = get_bed_predictions(room_data.room_description)
|
149 |
+
if extracted_beds:
|
150 |
+
bed_predictions.extend(extracted_beds)
|
151 |
+
|
152 |
+
end_time = time.time()
|
153 |
+
total_time = end_time - start_time
|
154 |
+
logger.info(f"Total processing time: {total_time:.3f} seconds")
|
155 |
+
|
156 |
+
formatted_predictions = {
|
157 |
+
"type": {
|
158 |
+
"label": type_pred.get("label", DEFAULT_LABEL),
|
159 |
+
"score": safe_round(type_pred.get("score", DEFAULT_SCORE), 3),
|
160 |
+
},
|
161 |
+
"category": {
|
162 |
+
"label": category_pred.get("label", DEFAULT_LABEL),
|
163 |
+
"score": safe_round(category_pred.get("score", DEFAULT_SCORE), 3),
|
164 |
+
},
|
165 |
+
}
|
166 |
+
|
167 |
+
env_preds = process_predictions(environment_pred_results)
|
168 |
+
feat_preds = process_predictions(
|
169 |
+
feature_pred_results.get("features", []), label_key="word"
|
170 |
+
)
|
171 |
+
view_preds = process_predictions(
|
172 |
+
view_pred_results.get("views", []), label_key="word"
|
173 |
+
)
|
174 |
+
|
175 |
+
predictions = Predictions(
|
176 |
+
type=PredictionResponse(**formatted_predictions["type"]),
|
177 |
+
category=PredictionResponse(**formatted_predictions["category"]),
|
178 |
+
environment=[PredictionResponse(**pred) for pred in env_preds] if env_preds else [], # noqa: E501
|
179 |
+
feature=[PredictionResponse(**pred) for pred in feat_preds] if feat_preds else [], # noqa: E501
|
180 |
+
view=[PredictionResponse(**pred) for pred in view_preds] if view_preds else [],
|
181 |
+
language_detected=language,
|
182 |
+
beds=bed_predictions,
|
183 |
+
)
|
184 |
+
|
185 |
+
return AllPredictionsResponse(predictions=predictions)
|
mappingservice/utils.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging.config
|
2 |
+
import re
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
import psutil
|
6 |
+
from langdetect import LangDetectException, detect
|
7 |
+
|
8 |
+
from mappingservice.constants import ENGLISH_KEYWORDS, SPANISH_KEYWORDS
|
9 |
+
from mappingservice.models import BedData, BedType
|
10 |
+
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
|
13 |
+
)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
def log_memory_usage():
|
18 |
+
process = psutil.Process()
|
19 |
+
memory_info = process.memory_info()
|
20 |
+
logging.info(f"Memory usage: RSS={memory_info.rss}, VMS={memory_info.vms}")
|
21 |
+
|
22 |
+
|
23 |
+
def safe_round(value, decimals=0):
|
24 |
+
return round(value, decimals) if isinstance(value, int | float) else 0
|
25 |
+
|
26 |
+
|
27 |
+
def process_predictions(predictions, score_key="score", label_key="label"):
|
28 |
+
return [
|
29 |
+
{"label": pred.get(label_key, "No data"), "score": round(pred.get(score_key, 0), 3)} # noqa: E501
|
30 |
+
for pred in predictions if isinstance(pred, dict) and pred.get(score_key, 0) > 0
|
31 |
+
] or None
|
32 |
+
|
33 |
+
|
34 |
+
def parse_model_output(predictions):
|
35 |
+
bed_data_list = []
|
36 |
+
threshold = 0.5
|
37 |
+
|
38 |
+
if not isinstance(predictions, list) or not all(
|
39 |
+
isinstance(pred, dict) for pred in predictions
|
40 |
+
):
|
41 |
+
return bed_data_list
|
42 |
+
|
43 |
+
for prediction in predictions:
|
44 |
+
type = prediction.get("type")
|
45 |
+
count = prediction.get("count", 0)
|
46 |
+
score = prediction.get("score", 0)
|
47 |
+
|
48 |
+
if score > threshold and count > 0:
|
49 |
+
if type in BedType._member_names_:
|
50 |
+
bed_data = BedData(type=BedType[type], count=count)
|
51 |
+
bed_data_list.append(bed_data)
|
52 |
+
else:
|
53 |
+
logger.debug(f"Unsupported bed type: {type}")
|
54 |
+
|
55 |
+
return bed_data_list
|
56 |
+
|
57 |
+
|
58 |
+
def get_bed_predictions(description: str):
|
59 |
+
logger.debug(f"Extracting bed predictions from description: {description}")
|
60 |
+
|
61 |
+
description = description.lower()
|
62 |
+
|
63 |
+
bed_pattern = re.compile(
|
64 |
+
r"(\d+)?\s*\b(king|queen|double|single|twin|bunk|sofa|rollaway|futon|"
|
65 |
+
r"cama\s*king|cama\s*queen|cama\s*double|cama\s*single|cama\s*twin|"
|
66 |
+
r"cama\s*bunk|cama\s*sofa|cama\s*rollaway|cama\s*futon)\b\s*(beds?|camas?)", re.IGNORECASE) # noqa: E501
|
67 |
+
|
68 |
+
or_pattern = re.compile(
|
69 |
+
r"\b(king|queen|double|single|twin)\s+or\s+(king|queen|double|single|twin)\b", re.IGNORECASE) # noqa: E501
|
70 |
+
|
71 |
+
# default_bed_type_pattern = re.compile(
|
72 |
+
# r"\b(double|twin|king|queen|single|doble|individual|gemela|matrimonial)\b\s+"
|
73 |
+
# r"\b(room|apartment|suite|habitacion|apartamento)\b", re.IGNORECASE)
|
74 |
+
|
75 |
+
|
76 |
+
matches = bed_pattern.findall(description)
|
77 |
+
or_matches = or_pattern.findall(description)
|
78 |
+
# default_matches = default_bed_type_pattern.findall(description)
|
79 |
+
|
80 |
+
|
81 |
+
bed_data_list = []
|
82 |
+
bed_type_counts = {}
|
83 |
+
|
84 |
+
for match in matches:
|
85 |
+
count = int(match[0]) if match[0] else 1
|
86 |
+
bed_type_name = match[1]
|
87 |
+
normalized_bed_type = bed_type_name.strip().lower()
|
88 |
+
bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None # noqa: E501
|
89 |
+
|
90 |
+
if bed_type:
|
91 |
+
bed_type_counts[bed_type] = count
|
92 |
+
|
93 |
+
for match in or_matches:
|
94 |
+
for bed_type_name in match:
|
95 |
+
normalized_bed_type = bed_type_name.strip().lower()
|
96 |
+
bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None # noqa: E501
|
97 |
+
if bed_type:
|
98 |
+
bed_type_counts[bed_type] = bed_type_counts.get(bed_type, 0) + 1
|
99 |
+
|
100 |
+
for bed_type, count in bed_type_counts.items():
|
101 |
+
bed_data_list.append(BedData(type=bed_type, count=count))
|
102 |
+
|
103 |
+
if not bed_data_list:
|
104 |
+
logger.warning("No valid bed data found from extracted information.")
|
105 |
+
|
106 |
+
return bed_data_list
|
107 |
+
|
108 |
+
def extract_bed_numbers(description: str):
|
109 |
+
bed_number_pattern = re.compile(
|
110 |
+
r"(\d+|\bone\b|\btwo\b|\bthree\b)\s*bed", re.IGNORECASE
|
111 |
+
)
|
112 |
+
numbers = []
|
113 |
+
word_to_number = {"one": 1, "two": 2, "three": 3}
|
114 |
+
|
115 |
+
matches = bed_number_pattern.findall(description)
|
116 |
+
for match in matches:
|
117 |
+
number = word_to_number.get(match.lower(), match)
|
118 |
+
numbers.append(int(number))
|
119 |
+
|
120 |
+
return numbers
|
121 |
+
|
122 |
+
|
123 |
+
def validate_bed_data(beds: List[BedData]) -> List[BedData]:
|
124 |
+
valid_beds = [bed for bed in beds if bed.type and bed.count > 0]
|
125 |
+
if not valid_beds:
|
126 |
+
logger.info(f"No valid beds found in {beds}")
|
127 |
+
return valid_beds
|
128 |
+
|
129 |
+
|
130 |
+
def is_list_of_lists(variable):
|
131 |
+
if not isinstance(variable, list):
|
132 |
+
return False
|
133 |
+
return all(isinstance(item, list) for item in variable)
|
134 |
+
|
135 |
+
|
136 |
+
def predict_language(text):
|
137 |
+
text_lower = text.lower()
|
138 |
+
|
139 |
+
for keyword in SPANISH_KEYWORDS:
|
140 |
+
if keyword in text_lower:
|
141 |
+
return "es"
|
142 |
+
|
143 |
+
for keyword in ENGLISH_KEYWORDS:
|
144 |
+
if keyword in text_lower:
|
145 |
+
return "en"
|
146 |
+
|
147 |
+
try:
|
148 |
+
language = detect(text)
|
149 |
+
if language in {"en", "es"}:
|
150 |
+
return language
|
151 |
+
except LangDetectException:
|
152 |
+
pass
|
153 |
+
|
154 |
+
return None
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "mappingservice"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["oscar <oscar@travelgate.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.11"
|
10 |
+
psutil = "^5.9.8"
|
11 |
+
fastapi = "^0.111.0"
|
12 |
+
uvicorn = {extras = ["standard"], version = "^0.29.0"}
|
13 |
+
pydantic = {version = "^2.7.1", extras = ["email"]}
|
14 |
+
pydantic-settings = "^2.2.1"
|
15 |
+
transformers = "^4.40.2"
|
16 |
+
torch = {version = "^2.3.0", source = "pytorch-cpu"}
|
17 |
+
langdetect = "^1.0.9"
|
18 |
+
accelerate = "^0.30.1"
|
19 |
+
optimum = "^1.19.2"
|
20 |
+
onnx = "^1.16.0"
|
21 |
+
onnxruntime = "^1.18.0"
|
22 |
+
|
23 |
+
[tool.poetry.group.dev.dependencies]
|
24 |
+
pytest = "^8.2.0"
|
25 |
+
pytest-cov="^5.0.0"
|
26 |
+
pre-commit="^3.7.0"
|
27 |
+
ruff="^0.4.4"
|
28 |
+
|
29 |
+
[tool.poetry.group.benchmark.dependencies]
|
30 |
+
locust = "^2.27.0"
|
31 |
+
|
32 |
+
[tool.pytest.ini_options]
|
33 |
+
addopts = "-vv -rA --disable-warnings --cov=app --cov-report term-missing"
|
34 |
+
testpaths = [
|
35 |
+
"tests",
|
36 |
+
]
|
37 |
+
asyncio_mode = "auto"
|
38 |
+
pythonpath = [
|
39 |
+
"."
|
40 |
+
]
|
41 |
+
|
42 |
+
[tool.ruff]
|
43 |
+
# Same as Black.
|
44 |
+
line-length = 88
|
45 |
+
indent-width = 4
|
46 |
+
|
47 |
+
# Assume Python 3.11
|
48 |
+
target-version = "py38"
|
49 |
+
|
50 |
+
# Exclude a variety of commonly ignored directories.
|
51 |
+
lint.exclude = [
|
52 |
+
".bzr",
|
53 |
+
".direnv",
|
54 |
+
".eggs",
|
55 |
+
".git",
|
56 |
+
".git-rewrite",
|
57 |
+
".hg",
|
58 |
+
".ipynb_checkpoints",
|
59 |
+
".mypy_cache",
|
60 |
+
".nox",
|
61 |
+
".pants.d",
|
62 |
+
".pyenv",
|
63 |
+
".pytest_cache",
|
64 |
+
".pytype",
|
65 |
+
".ruff_cache",
|
66 |
+
".svn",
|
67 |
+
".tox",
|
68 |
+
".venv",
|
69 |
+
".vscode",
|
70 |
+
"__pypackages__",
|
71 |
+
"_build",
|
72 |
+
"buck-out",
|
73 |
+
"build",
|
74 |
+
"dist",
|
75 |
+
"node_modules",
|
76 |
+
"site-packages",
|
77 |
+
"venv",
|
78 |
+
]
|
79 |
+
|
80 |
+
lint.select = [
|
81 |
+
"E", # pycodestyle errors
|
82 |
+
"W", # pycodestyle warnings
|
83 |
+
"F", # pyflakes
|
84 |
+
"I", # isort
|
85 |
+
"C", # flake8-comprehensions
|
86 |
+
"B", # flake8-bugbear
|
87 |
+
"UP", # pyupgrade
|
88 |
+
]
|
89 |
+
lint.ignore = [
|
90 |
+
"B008", # do not perform function calls in argument defaults
|
91 |
+
"C901", # too complex
|
92 |
+
"W191", # indentation contains tabs
|
93 |
+
"E711", # `None` should be `cond is None`
|
94 |
+
]
|
95 |
+
|
96 |
+
[tool.ruff.format]
|
97 |
+
# Like Black, use double quotes for strings.
|
98 |
+
quote-style = "double"
|
99 |
+
|
100 |
+
# Like Black, indent with spaces, rather than tabs.
|
101 |
+
indent-style = "space"
|
102 |
+
|
103 |
+
# Like Black, respect magic trailing commas.
|
104 |
+
skip-magic-trailing-comma = false
|
105 |
+
|
106 |
+
# Like Black, automatically detect the appropriate line ending.
|
107 |
+
line-ending = "auto"
|
108 |
+
|
109 |
+
# Enable auto-formatting of code examples in docstrings. Markdown,
|
110 |
+
# reStructuredText code/literal blocks and doctests are all supported.
|
111 |
+
#
|
112 |
+
# This is currently disabled by default, but it is planned for this
|
113 |
+
# to be opt-out in the future.
|
114 |
+
docstring-code-format = false
|
115 |
+
|
116 |
+
# Set the line length limit used when formatting code snippets in
|
117 |
+
# docstrings.
|
118 |
+
#
|
119 |
+
# This only has an effect when the `docstring-code-format` setting is
|
120 |
+
# enabled.
|
121 |
+
docstring-code-line-length = "dynamic"
|
122 |
+
|
123 |
+
[tool.ruff.lint.per-file-ignores]
|
124 |
+
"__init__.py" = ["F401"]
|
125 |
+
"**/{tests,docs,tools}/*" = ["E402"]
|
126 |
+
|
127 |
+
[tool.ruff.lint.pyupgrade]
|
128 |
+
# Preserve types, even if a file imports `from __future__ import annotations`.
|
129 |
+
keep-runtime-typing = true
|
130 |
+
|
131 |
+
[build-system]
|
132 |
+
requires = ["poetry-core"]
|
133 |
+
build-backend = "poetry.core.masonry.api"
|
134 |
+
|
135 |
+
[[tool.poetry.source]]
|
136 |
+
name = "pytorch-cpu"
|
137 |
+
url = "https://download.pytorch.org/whl/cpu"
|
138 |
+
priority = "supplemental"
|
tests/__init__.py
ADDED
File without changes
|
tests/test_langdetect.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
6 |
+
|
7 |
+
from mappingservice.utils import predict_language
|
8 |
+
|
9 |
+
|
10 |
+
@pytest.fixture
|
11 |
+
def classifier():
|
12 |
+
model_path = "papluca/xlm-roberta-base-language-detection"
|
13 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
15 |
+
classification = pipeline(
|
16 |
+
"text-classification",
|
17 |
+
model=model,
|
18 |
+
tokenizer=tokenizer,
|
19 |
+
framework="pt",
|
20 |
+
device=0 if torch.cuda.is_available() else -1,
|
21 |
+
)
|
22 |
+
|
23 |
+
return classification
|
24 |
+
|
25 |
+
|
26 |
+
def test_model_predictions(classifier):
|
27 |
+
test_data = [
|
28 |
+
{'input': 'Habitacion estandar con bano', 'expected_response': 'es'},
|
29 |
+
{'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501
|
30 |
+
{'input': 'casa ejecutiva', 'expected_response': 'es'},
|
31 |
+
{'input': 'villa doble', 'expected_response': 'es'},
|
32 |
+
{'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'},
|
33 |
+
{'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'},
|
34 |
+
{'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501
|
35 |
+
{'input': 'estudio familiar grande', 'expected_response': 'es'},
|
36 |
+
{'input': 'suite familiar junior', 'expected_response': 'en'},
|
37 |
+
{'input': 'bungalow tradicional sin bano', 'expected_response': 'es'},
|
38 |
+
{'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501
|
39 |
+
{'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501
|
40 |
+
{'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
|
41 |
+
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
|
42 |
+
{'input': 'premier palace double room', 'expected_response': 'en'},
|
43 |
+
{'input': 'double single use deluxe', 'expected_response': 'en'},
|
44 |
+
{'input': 'double room queen bed superior', 'expected_response': 'en'},
|
45 |
+
{'input': 'double guest room', 'expected_response': 'en'},
|
46 |
+
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
|
47 |
+
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
|
48 |
+
{'input': 'superior quadruple room', 'expected_response': 'en'},
|
49 |
+
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
|
50 |
+
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
|
51 |
+
{'input': 'premier palace double room', 'expected_response': 'en'},
|
52 |
+
{'input': 'double single use deluxe', 'expected_response': 'en'},
|
53 |
+
{'input': 'double room queen bed superior', 'expected_response': 'en'},
|
54 |
+
{'input': 'double guest room', 'expected_response': 'en'},
|
55 |
+
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
|
56 |
+
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
|
57 |
+
{'input': 'superior quadruple room', 'expected_response': 'en'},
|
58 |
+
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
|
59 |
+
{'input': 'comfort double', 'expected_response': 'en'},
|
60 |
+
{'input': '1 king bed suite nonsmoking', 'expected_response': 'en'},
|
61 |
+
{'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'},
|
62 |
+
{'input': 'family room superior', 'expected_response': 'en'}
|
63 |
+
]
|
64 |
+
|
65 |
+
for test_case in test_data:
|
66 |
+
description = test_case["input"]
|
67 |
+
expected_label = test_case["expected_response"]
|
68 |
+
# First, try to predict based on keywords
|
69 |
+
predicted_label = predict_language(description)
|
70 |
+
|
71 |
+
# If no prediction was made, fallback to model prediction
|
72 |
+
if not predicted_label:
|
73 |
+
print(f"Fallback to model prediction for '{description}'")
|
74 |
+
result = classifier(description)
|
75 |
+
predicted_label = result[0]["label"]
|
76 |
+
|
77 |
+
assert (
|
78 |
+
predicted_label == expected_label
|
79 |
+
), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501
|
tests/test_roomcategory.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
6 |
+
|
7 |
+
|
8 |
+
@pytest.fixture
|
9 |
+
def classifier():
|
10 |
+
model_path = "travelgate/room_category-classifier"
|
11 |
+
try:
|
12 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
14 |
+
classification = pipeline(
|
15 |
+
"text-classification",
|
16 |
+
model=model,
|
17 |
+
tokenizer=tokenizer,
|
18 |
+
framework="pt",
|
19 |
+
device=0 if torch.cuda.is_available() else -1,
|
20 |
+
)
|
21 |
+
print("Modelo y tokenizer cargados exitosamente.")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Error al cargar el modelo o tokenizer: {e}")
|
24 |
+
raise e
|
25 |
+
|
26 |
+
return classification
|
27 |
+
|
28 |
+
|
29 |
+
def test_model_predictions(classifier):
|
30 |
+
test_data = [
|
31 |
+
{'input': 'comfort double', 'expected_response': 'comfort'},
|
32 |
+
{'input': 'full size bed 1 big bed', 'expected_response': 'default'},
|
33 |
+
{'input': '1 king bed suite nonsmoking', 'expected_response': 'default'},
|
34 |
+
{'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'junior'},
|
35 |
+
{'input': 'superior sea view', 'expected_response': 'superior'},
|
36 |
+
{'input': 'family room superior', 'expected_response': 'superior'},
|
37 |
+
{'input': '1 king standard', 'expected_response': 'standard'},
|
38 |
+
{'input': 'superior king balcon vista al mar', 'expected_response': 'superior'},
|
39 |
+
{'input': 'standard double room king bed accessible', 'expected_response': 'standard'}, # noqa: E501
|
40 |
+
{'input': 'sagamore garden view suite standard', 'expected_response': 'standard'}, # noqa: E501
|
41 |
+
{'input': 'standard 1 king bed courtyard view non smoking', 'expected_response': 'standard'}, # noqa: E501
|
42 |
+
{'input': 'suite double for 2 adults 0 children and 0 infants', 'expected_response': 'default'}, # noqa: E501
|
43 |
+
{'input': 'one bedroom superior king suite with ocean view non smoking', 'expected_response': 'superior'}, # noqa: E501
|
44 |
+
{'input': 'omnia deluxe triple for 2 adults 0 children and 0 infants', 'expected_response': 'deluxe'}, # noqa: E501
|
45 |
+
{'input': 'king premium room incl evening tasting welcome gift comp wifi 27 30 sqm espresso fridge bathrobe', 'expected_response': 'premium'}, # noqa: E501
|
46 |
+
{'input': 'standard twin double room single use', 'expected_response': 'standard'}, # noqa: E501
|
47 |
+
{'input': 'habitacion 2 camas deluxe', 'expected_response': 'deluxe'},
|
48 |
+
{'input': 'standard 2 double beds courtyard view non smoking', 'expected_response': 'standard'}, # noqa: E501
|
49 |
+
{'input': 'habitacion estandar con bano', 'expected_response': 'standard'},
|
50 |
+
{'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'superior'}, # noqa: E501
|
51 |
+
{'input': 'habitacion estandar matrimonial adaptada discapacitados', 'expected_response': 'standard'}, # noqa: E501
|
52 |
+
{'input': 'privilege twin for 2 adults 0 children and 0 infants', 'expected_response': 'privilege'}, # noqa: E501
|
53 |
+
{'input': 'classic single for 1 adults 0 children and 0 infants', 'expected_response': 'classic'}, # noqa: E501
|
54 |
+
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'deluxe'}, # noqa: E501
|
55 |
+
{'input': 'gallery two queens', 'expected_response': 'default'},
|
56 |
+
{'input': 'premier palace double room', 'expected_response': 'premier'},
|
57 |
+
{'input': 'double single use deluxe', 'expected_response': 'deluxe'},
|
58 |
+
{'input': 'double room queen bed superior', 'expected_response': 'superior'},
|
59 |
+
{'input': 'double guest standard room', 'expected_response': 'standard'},
|
60 |
+
{'input': '2 queen beds disney view non smoking', 'expected_response': 'default'}, # noqa: E501
|
61 |
+
{'input': 'standard single room for 1 adults 0 children and 0 infants', 'expected_response': 'standard'}, # noqa: E501
|
62 |
+
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'premium'}, # noqa: E501
|
63 |
+
{'input': 'superior quadruple room', 'expected_response': 'superior'},
|
64 |
+
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'superior'} # noqa: E501
|
65 |
+
]
|
66 |
+
|
67 |
+
for test_case in test_data:
|
68 |
+
description = test_case["input"]
|
69 |
+
expected_category = test_case["expected_response"]
|
70 |
+
result = classifier(description)[0]
|
71 |
+
predicted_category = result["label"]
|
72 |
+
|
73 |
+
assert (
|
74 |
+
predicted_category == expected_category
|
75 |
+
), f"Incorrect prediction for '{description}': expected '{expected_category}', obtained '{predicted_category}'" # noqa: E501
|
tests/test_roomenvironment.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
6 |
+
|
7 |
+
|
8 |
+
@pytest.fixture
|
9 |
+
def classifier():
|
10 |
+
model_path = "travelgate/room_environment-classifier"
|
11 |
+
try:
|
12 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
14 |
+
classification = pipeline(
|
15 |
+
"text-classification",
|
16 |
+
model=model,
|
17 |
+
tokenizer=tokenizer,
|
18 |
+
framework="pt",
|
19 |
+
device=0 if torch.cuda.is_available() else -1,
|
20 |
+
)
|
21 |
+
print("Modelo y tokenizer cargados exitosamente.")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Error al cargar el modelo o tokenizer: {e}")
|
24 |
+
raise e
|
25 |
+
|
26 |
+
return classification
|
27 |
+
|
28 |
+
|
29 |
+
def test_model_predictions(classifier):
|
30 |
+
test_data = [
|
31 |
+
{'input': 'comfort double with shared bed', 'expected_response': 'shared bed'},
|
32 |
+
{'input': 'habitacion con banno compartido', 'expected_response': 'shared bathroom'}, # noqa: E501
|
33 |
+
{'input': 'habitacion con bano compartido', 'expected_response': 'shared bathroom'}, # noqa: E501
|
34 |
+
{'input': 'full size bed 1 big bed with shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
|
35 |
+
{'input': '1 king bed suite nonsmoking shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
|
36 |
+
{'input': 'junior suite 1 king bed nonsmoking shared bed', 'expected_response': 'shared bed'}, # noqa: E501
|
37 |
+
{'input': 'superior sea view connected room', 'expected_response': 'connected'},
|
38 |
+
{'input': 'twin room with two shared beds', 'expected_response': 'shared bed'},
|
39 |
+
{'input': 'double room with connected rooms available', 'expected_response': 'connected'}, # noqa: E501
|
40 |
+
{'input': 'two queen beds with shared bathroom access', 'expected_response': 'shared bathroom'}, # noqa: E501
|
41 |
+
{'input': 'suite with king bed and connected room option', 'expected_response': 'connected'}, # noqa: E501
|
42 |
+
{'input': 'shared dormitory room mixed gender', 'expected_response': 'shared room'}, # noqa: E501
|
43 |
+
{'input': 'private room with shared bathroom facilities', 'expected_response': 'shared bathroom'}, # noqa: E501
|
44 |
+
{'input': 'family suite with connected rooms', 'expected_response': 'connected'}, # noqa: E501
|
45 |
+
{'input': 'bunk bed in mixed shared room', 'expected_response': 'shared room'},
|
46 |
+
{'input': 'deluxe queen room with two queen beds shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
|
47 |
+
{'input': 'hostel dorm bed shared room', 'expected_response': 'shared room'},
|
48 |
+
{'input': 'economy double room with shared bathroom outside the room', 'expected_response': 'shared bathroom'}, # noqa: E501
|
49 |
+
{'input': 'luxury suite with sea view and private connected rooms', 'expected_response': 'connected'}, # noqa: E501
|
50 |
+
{'input': 'single bed in co-ed shared hostel room', 'expected_response': 'shared room'}, # noqa: E501
|
51 |
+
{'input': 'executive suite with optional connecting room', 'expected_response': 'connected'}, # noqa: E501
|
52 |
+
{'input': 'standard twin room shared beds', 'expected_response': 'shared bed'},
|
53 |
+
{'input': 'cozy single room with access to shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
|
54 |
+
{'input': 'spacious family room with interconnecting doors', 'expected_response': 'connected'}, # noqa: E501
|
55 |
+
{'input': 'shared female dormitory for 4 guests', 'expected_response': 'shared room'}, # noqa: E501
|
56 |
+
{'input': 'luxury double room with shared bed arrangements', 'expected_response': 'shared bed'}, # noqa: E501
|
57 |
+
{'input': 'master suite with a shared bedroom balcony and sea view', 'expected_response': 'shared room'}, # noqa: E501
|
58 |
+
{'input': 'budget twin room with shared bath', 'expected_response': 'shared bathroom'}, # noqa: E501
|
59 |
+
{'input': 'two-bedroom apartment with connecting options', 'expected_response': 'connected'}, # noqa: E501
|
60 |
+
{'input': 'en-suite room with shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
|
61 |
+
{'input': 'single guest room with shared bed', 'expected_response': 'shared bed'}, # noqa: E501
|
62 |
+
{'input': 'shared room in a modern loft city center', 'expected_response': 'shared room'}, # noqa: E501
|
63 |
+
{'input': 'double bed in a shared room apartment', 'expected_response': 'shared room'}, # noqa: E501
|
64 |
+
{'input': 'studio with shared bath', 'expected_response': 'shared bathroom'},
|
65 |
+
{'input': 'penthouse suite connected rooms available upon request', 'expected_response': 'connected'}, # noqa: E501
|
66 |
+
{'input': "backpacker's special one bed in a shared room of six", 'expected_response': 'shared room'} # noqa: E501
|
67 |
+
]
|
68 |
+
|
69 |
+
for test_case in test_data:
|
70 |
+
description = test_case["input"]
|
71 |
+
expected_environment = test_case["expected_response"]
|
72 |
+
result = classifier(description)[0]
|
73 |
+
predicted_environment = result["label"]
|
74 |
+
|
75 |
+
assert (
|
76 |
+
predicted_environment == expected_environment
|
77 |
+
), f"Incorrect prediction for '{description}': expected '{expected_environment}', obtained '{predicted_environment}'" # noqa: E501
|
tests/test_roomtype.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
import torch
|
5 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
6 |
+
|
7 |
+
|
8 |
+
@pytest.fixture
|
9 |
+
def classifier():
|
10 |
+
model_path = "travelgate/room_type-classifier"
|
11 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
13 |
+
classification = pipeline(
|
14 |
+
"text-classification",
|
15 |
+
model=model,
|
16 |
+
tokenizer=tokenizer,
|
17 |
+
framework="pt",
|
18 |
+
device=0 if torch.cuda.is_available() else -1,
|
19 |
+
)
|
20 |
+
|
21 |
+
return classification
|
22 |
+
|
23 |
+
|
24 |
+
def test_model_predictions(classifier):
|
25 |
+
test_data = [
|
26 |
+
{'input': 'Habitacion estandar con bano', 'expected_response': 'room'},
|
27 |
+
{'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'apartment'}, # noqa: E501
|
28 |
+
{'input': 'casa ejecutiva', 'expected_response': 'house'},
|
29 |
+
{'input': 'villa doble', 'expected_response': 'villa'},
|
30 |
+
{'input': 'estudio de una habitacion de lujo', 'expected_response': 'studio'},
|
31 |
+
{'input': 'chalet premier con dos habitaciones', 'expected_response': 'chalet'},
|
32 |
+
{'input': 'casa de la playa premium con bano compartido', 'expected_response': 'beach house'}, # noqa: E501
|
33 |
+
{'input': 'estudio familiar grande', 'expected_response': 'family studio'},
|
34 |
+
{'input': 'suite familiar junior', 'expected_response': 'family suite'},
|
35 |
+
{'input': 'bungalow tradicional sin bano', 'expected_response': 'bungalow'},
|
36 |
+
{'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'room'}, # noqa: E501
|
37 |
+
{'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'room'}, # noqa: E501
|
38 |
+
{'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
|
39 |
+
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
|
40 |
+
{'input': 'premier palace double room', 'expected_response': 'room'},
|
41 |
+
{'input': 'double single use deluxe', 'expected_response': 'default'},
|
42 |
+
{'input': 'double room queen bed superior', 'expected_response': 'room'},
|
43 |
+
{'input': 'double guest room', 'expected_response': 'room'},
|
44 |
+
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
|
45 |
+
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'room'}, # noqa: E501
|
46 |
+
{'input': 'superior quadruple room', 'expected_response': 'room'},
|
47 |
+
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'apartment'}, # noqa: E501
|
48 |
+
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
|
49 |
+
{'input': 'premier palace double room', 'expected_response': 'room'},
|
50 |
+
{'input': 'double single use deluxe', 'expected_response': 'default'},
|
51 |
+
{'input': 'double room queen bed superior', 'expected_response': 'room'},
|
52 |
+
{'input': 'double guest room', 'expected_response': 'room'},
|
53 |
+
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
|
54 |
+
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'room'}, # noqa: E501
|
55 |
+
{'input': 'superior quadruple room', 'expected_response': 'room'},
|
56 |
+
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'apartment'}, # noqa: E501
|
57 |
+
{'input': 'comfort double', 'expected_response': 'default'},
|
58 |
+
{'input': '1 king bed suite nonsmoking', 'expected_response': 'suite'},
|
59 |
+
{'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'suite'},
|
60 |
+
{'input': 'family room superior', 'expected_response': 'room'}
|
61 |
+
]
|
62 |
+
|
63 |
+
for test_case in test_data:
|
64 |
+
description = test_case["input"]
|
65 |
+
expected_label = test_case["expected_response"]
|
66 |
+
result = classifier(description)[0]
|
67 |
+
predicted_label = result["label"]
|
68 |
+
|
69 |
+
assert (
|
70 |
+
predicted_label == expected_label
|
71 |
+
), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501
|
tests/test_roomview.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import pytest
|
3 |
+
from langdetect import detect_langs
|
4 |
+
|
5 |
+
import mappingservice.dependencies as deps
|
6 |
+
from mappingservice.config import Settings
|
7 |
+
from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
|
8 |
+
from mappingservice.ms.model_loader import ModelLoader
|
9 |
+
from mappingservice.utils import predict_language
|
10 |
+
|
11 |
+
settings = Settings()
|
12 |
+
ml_model = ModelLoader(settings, MODEL_NAMES)
|
13 |
+
|
14 |
+
|
15 |
+
@pytest.fixture
|
16 |
+
def classifier():
|
17 |
+
|
18 |
+
for lang in AVAILABLE_LANGUAGES:
|
19 |
+
model_pipeline = ml_model.get_model('room_view', lang)
|
20 |
+
try:
|
21 |
+
deps.mc['room_view'][lang] = model_pipeline
|
22 |
+
except KeyError:
|
23 |
+
deps.mc['room_view'] = {}
|
24 |
+
deps.mc['room_view'][lang] = model_pipeline
|
25 |
+
|
26 |
+
def get_model(language):
|
27 |
+
return deps.mc['room_view'][language]
|
28 |
+
|
29 |
+
return get_model
|
30 |
+
|
31 |
+
|
32 |
+
def test_model_predictions(classifier):
|
33 |
+
test_data = [
|
34 |
+
{'input': 'studio with ruins view', 'expected_response': 'ruins'}, # noqa: E501
|
35 |
+
{'input': 'suite with pool view', 'expected_response': 'pool'},
|
36 |
+
{'input': 'executive room garden view', 'expected_response': 'garden'},
|
37 |
+
{'input': 'studio two bedroom vineyard view 4 guests wine country charm', 'expected_response': 'vineyard'}, # noqa: E501
|
38 |
+
{'input': 'superior suite city view', 'expected_response': 'city'},
|
39 |
+
{'input': 'room with a balcony and harbour views', 'expected_response': 'harbour'}, # noqa: E501
|
40 |
+
{'input': 'junior studio stars views', 'expected_response': 'stars'},
|
41 |
+
{'input': 'room park views', 'expected_response': 'park'},
|
42 |
+
{'input': 'loft two bedroom and marina view 4 adults coastal luxury', 'expected_response': 'marina'}, # noqa: E501
|
43 |
+
{'input': 'house sea view', 'expected_response': 'sea'},
|
44 |
+
{'input': 'villa hill views', 'expected_response': 'hill'},
|
45 |
+
{'input': 'room park view', 'expected_response': 'park'},
|
46 |
+
{'input': 'townhouse one bedroom and park view 2 adults urban charm', 'expected_response': 'park'}, # noqa: E501
|
47 |
+
{'input': 'suite garden view', 'expected_response': 'garden'},
|
48 |
+
{'input': 'twin room ocean view with balcony', 'expected_response': 'ocean'},
|
49 |
+
{'input': 'residencia con vistas al mar', 'expected_response': 'mar'},
|
50 |
+
{'input': 'habitacion con vistas a la piscina', 'expected_response': 'piscina'},
|
51 |
+
{'input': 'habitacion ejecutiva con vistas al jardin', 'expected_response': 'jardin'}, # noqa: E501
|
52 |
+
# {'input': 'habitacion con vistas al oceano', 'expected_response': 'oceano'}, # noqa: E501
|
53 |
+
{'input': 'suite superior con vistas a la ciudad', 'expected_response': 'ciudad'}, # noqa: E501
|
54 |
+
# {'input': 'habitacion con balcon con vistas a la bahia', 'expected_response': 'bahia'}, # noqa: E501
|
55 |
+
{'input': 'estudio junior con vistas a las estrellas', 'expected_response': 'estrellas'}, # noqa: E501
|
56 |
+
{'input': 'habitacion con vistas al parque', 'expected_response': 'parque'},
|
57 |
+
{'input': 'habitacion con vistas a la ciudad', 'expected_response': 'ciudad'},
|
58 |
+
{'input': 'casa con vista al mar', 'expected_response': 'mar'},
|
59 |
+
{'input': 'corner suite pool view', 'expected_response': 'pool'},
|
60 |
+
{'input': 'estudio con vista al parque', 'expected_response': 'parque'},
|
61 |
+
{'input': 'estudio con vista al jardin', 'expected_response': 'jardin'},
|
62 |
+
{'input': 'residencia con vista al amazonas', 'expected_response': 'amazonas'},
|
63 |
+
{'input': 'habitacion con vista a la montana', 'expected_response': 'montana'}, # noqa: E501
|
64 |
+
{'input': 'residencia moderno con vista al park', 'expected_response': 'park'}, # noqa: E501
|
65 |
+
{'input': 'dormitorio con vistas al lago', 'expected_response': 'lago'}, # noqa: E501
|
66 |
+
# {'input': 'dormitorio con vistas a la bahia', 'expected_response': 'bahia'},
|
67 |
+
{'input': 'classic ocean view', 'expected_response': 'ocean'} # noqa: E501
|
68 |
+
]
|
69 |
+
|
70 |
+
# this test case will always pass untill we solve the mistery with model data
|
71 |
+
print("Test data length:", len(test_data))
|
72 |
+
|
73 |
+
for test_case in test_data:
|
74 |
+
language = detect_langs(test_case['input'])
|
75 |
+
for item in language:
|
76 |
+
if item.lang in AVAILABLE_LANGUAGES:
|
77 |
+
language = item.lang
|
78 |
+
break
|
79 |
+
|
80 |
+
if language not in AVAILABLE_LANGUAGES:
|
81 |
+
language = predict_language(test_case['input'])
|
82 |
+
|
83 |
+
cls = classifier(language)
|
84 |
+
description = test_case["input"]
|
85 |
+
expected_view = test_case["expected_response"]
|
86 |
+
processed_results = cls.predict(description)
|
87 |
+
|
88 |
+
predicted_views = [entity["word"] for entity in processed_results]
|
89 |
+
assert expected_view in predicted_views, f"Incorrect prediction for '{description}' using '{language}' language: expected '{expected_view}', obtained '{predicted_views}'" # noqa: E501
|