Calin Rada commited on
Commit
f006f31
0 Parent(s):
Files changed (46) hide show
  1. .dockerignore +9 -0
  2. .env.dist +10 -0
  3. .github/CODEOWNERS +1 -0
  4. .github/dependabot.yml +18 -0
  5. .github/workflows/deploy-docker.yml +9 -0
  6. .github/workflows/pr-test.yml +41 -0
  7. .gitignore +165 -0
  8. .pre-commit-config.yaml +25 -0
  9. .vscode/extensions.json +5 -0
  10. .vscode/launch.json +27 -0
  11. .vscode/settings.json +16 -0
  12. Dockerfile +27 -0
  13. README.md +91 -0
  14. benchmark/__init__.py +0 -0
  15. benchmark/locustfile.py +35 -0
  16. docker-compose.yml +27 -0
  17. logging.conf +20 -0
  18. mappingservice/__init__.py +0 -0
  19. mappingservice/config.py +41 -0
  20. mappingservice/config_consul.py +97 -0
  21. mappingservice/constants.py +56 -0
  22. mappingservice/dependencies.py +27 -0
  23. mappingservice/main.py +117 -0
  24. mappingservice/models.py +105 -0
  25. mappingservice/ms/__init__.py +0 -0
  26. mappingservice/ms/ml_models/__init__.py +0 -0
  27. mappingservice/ms/ml_models/base.py +66 -0
  28. mappingservice/ms/ml_models/bed_type.py +25 -0
  29. mappingservice/ms/ml_models/environment.py +30 -0
  30. mappingservice/ms/ml_models/room_category.py +23 -0
  31. mappingservice/ms/ml_models/room_features.py +19 -0
  32. mappingservice/ms/ml_models/room_type.py +23 -0
  33. mappingservice/ms/ml_models/room_view.py +18 -0
  34. mappingservice/ms/model_loader.py +73 -0
  35. mappingservice/routers/__init__.py +0 -0
  36. mappingservice/routers/admin.py +13 -0
  37. mappingservice/routers/room.py +185 -0
  38. mappingservice/utils.py +154 -0
  39. poetry.lock +0 -0
  40. pyproject.toml +138 -0
  41. tests/__init__.py +0 -0
  42. tests/test_langdetect.py +79 -0
  43. tests/test_roomcategory.py +75 -0
  44. tests/test_roomenvironment.py +77 -0
  45. tests/test_roomtype.py +71 -0
  46. tests/test_roomview.py +89 -0
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .vscode
2
+ .idea
3
+ .cache
4
+ .git
5
+ .github
6
+ .ruff_cache
7
+ .pre-commit-config.yaml
8
+ .env
9
+ act
.env.dist ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ API_KEY=your_api_key
2
+ HUGGINGFACE_ACCESS_TOKEN=yours_hf_token
3
+
4
+ TGX_CONSUL_AUTH_USER=
5
+ TGX_CONSUL_AUTH_PASS=
6
+ TGX_CONSUL_TOKEN=
7
+ TGX_CONSUL_NODES=tgx
8
+
9
+ HF_HUB_CACHE = /home/.cache/huggingface
10
+
.github/CODEOWNERS ADDED
@@ -0,0 +1 @@
 
 
1
+ * @Travelgate/eng-architecture
.github/dependabot.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ # Maintain dependencies for GitHub Actions
9
+ - package-ecosystem: "github-actions"
10
+ # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
11
+ directory: "/"
12
+ schedule:
13
+ interval: "weekly"
14
+ # Maintain dependencies for Python
15
+ - package-ecosystem: "pip"
16
+ directory: "/" # Location of package manifests
17
+ schedule:
18
+ interval: "weekly"
.github/workflows/deploy-docker.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and Deploy to Registry
2
+ on:
3
+ workflow_dispatch
4
+ jobs:
5
+ call-upload-to-registry:
6
+ uses: Travelgate/ci-workflows/.github/workflows/upload-to-registry.yml@main
7
+ permissions:
8
+ contents: write
9
+ secrets: inherit
.github/workflows/pr-test.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: PR Python Poetry
2
+ on:
3
+ pull_request:
4
+ branches: [ "main" ]
5
+ jobs:
6
+ build:
7
+ runs-on:
8
+ labels: ["docker"]
9
+ container:
10
+ image: python:3.11.8
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ fetch-depth: '0'
15
+ - name: Install poetry
16
+ run: |
17
+ python -m pip install --upgrade pip
18
+ pip install poetry huggingface_hub[cli]
19
+ - name: Login to huggingface
20
+ env:
21
+ HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
22
+ run: |
23
+ huggingface-cli login --token=${HUGGINGFACE_TOKEN}
24
+ - name: Install dependencies
25
+ run: |
26
+ poetry update
27
+ poetry install
28
+ - name: Lint with Ruff
29
+ run: |
30
+ pip install ruff
31
+ ruff check . --output-format=github
32
+ continue-on-error: true
33
+ - name: Test with pytest
34
+ env:
35
+ APP_ENV: "dev"
36
+ HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
37
+ HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
38
+ API_KEY: ${{ secrets.API_KEY }}
39
+ HF_HUB_CACHE: "/home/.cache/huggingface"
40
+ run: poetry run pytest --cov=.
41
+ continue-on-error: false
.gitignore ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ .idea/
161
+
162
+ .env
163
+ consul.env
164
+ .DS_Store
165
+ act
.pre-commit-config.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://pre-commit.com for more information
2
+ # See https://pre-commit.com/hooks.html for more hooks
3
+ default_language_version:
4
+ python: python3.11
5
+ repos:
6
+ - repo: https://github.com/pre-commit/pre-commit-hooks
7
+ rev: v4.6.0
8
+ hooks:
9
+ - id: check-added-large-files
10
+ - id: check-toml
11
+ - id: check-yaml
12
+ args:
13
+ - --unsafe
14
+ - id: end-of-file-fixer
15
+ - id: trailing-whitespace
16
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
17
+ rev: v0.4.1
18
+ hooks:
19
+ - id: ruff
20
+ args:
21
+ - --fix
22
+ - id: ruff-format
23
+ ci:
24
+ autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
25
+ autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
.vscode/extensions.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ // See http://go.microsoft.com/fwlink/?LinkId=827846
3
+ // for the documentation about the extensions.json format
4
+ "recommendations": ["ms-python.python", "charliermarsh.ruff"]
5
+ }
.vscode/launch.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "name": "Python: FastAPI",
6
+ "type": "debugpy",
7
+ "request": "launch",
8
+ "module": "uvicorn",
9
+ "args": [
10
+ "mappingservice.main:app",
11
+ "--reload"
12
+ ],
13
+ "justMyCode": false
14
+ },
15
+ {
16
+ "name": "Debug Tests",
17
+ "type": "debugpy",
18
+ "justMyCode": false,
19
+ "request": "launch",
20
+ "module": "pytest",
21
+ "console": "integratedTerminal",
22
+ "env": {
23
+ "PYTEST_ADDOPTS": "--no-cov"
24
+ },
25
+ }
26
+ ]
27
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "python.testing.pytestArgs": [
3
+ "tests"
4
+ ],
5
+ "python.testing.unittestEnabled": false,
6
+ "python.testing.pytestEnabled": true,
7
+
8
+ "[python]": {
9
+ "editor.formatOnSave": true,
10
+ "editor.codeActionsOnSave": {
11
+ "source.fixAll": "explicit",
12
+ "source.organizeImports": "explicit"
13
+ },
14
+ "editor.defaultFormatter": "charliermarsh.ruff"
15
+ }
16
+ }
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM huggingface/transformers-pytorch-gpu as base
2
+ RUN pip install fastapi uvicorn pydantic pydantic-settings langdetect accelerate optimum pytest pytest-cov
3
+ RUN useradd -m -u 1000 user
4
+ WORKDIR /code
5
+
6
+ COPY --chown=user . /code
7
+ COPY logging.conf /code
8
+
9
+ ARG APP_ENV
10
+ ENV APP_ENV=${APP_ENV}
11
+ # Conditional commands based on APP_ENV
12
+ RUN if [ "$APP_ENV" = "dev" ]; then \
13
+ echo "Force dev dependencies"; \
14
+ pip install pytest pytest-cov; \
15
+ fi
16
+
17
+ RUN mkdir -p /home/.cache/huggingface
18
+ RUN mkdir -p /.cache
19
+ RUN chmod -R 777 /home/.cache/huggingface
20
+ RUN chmod -R 777 /.cache
21
+ RUN mkdir -p /code/room_environment-classifier_onxx
22
+ RUN chmod -R 777 /code/room_environment-classifier_onxx
23
+
24
+ # Remove when settings getted from consul
25
+ # COPY .env /code
26
+
27
+ CMD ["uvicorn", "mappingservice.main:app","--host", "0.0.0.0", "--port", "80"]
README.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment
2
+ Set environment variables in .env file or via shell:
3
+
4
+ ```bash
5
+ export API_KEY="APIKEY TO USE THIS API"
6
+ export HUGGINGFACE_ACCESS_TOKEN="HF READ ACCESS TOKEN"
7
+ ```
8
+
9
+ ## Consul environment
10
+ To use the enviroment variables from Consul, set variables in consul.env file or via shell:
11
+
12
+ ```bash
13
+ TGX_CONSUL_AUTH_USER="CONSUL AUTH USER"
14
+ TGX_CONSUL_AUTH_PASS="CONSUL AUTH PASSWORD"
15
+ TGX_CONSUL_TOKEN="CONSUL TOKEN"
16
+ TGX_CONSUL_NODES="CONSUL_URLS_SEPARTEDBY_;"
17
+ ```
18
+
19
+ # Development
20
+ Dependencies and run is managed by Poetry
21
+
22
+ ## Dependencies
23
+ Use Poetry to Update and install dependencies:
24
+
25
+ ```bash
26
+ poetry update
27
+ poetry install
28
+ ```
29
+
30
+ ## Build
31
+
32
+ ### Using docker compose
33
+ ```bash
34
+ docker compose build --no-cache
35
+ docker compose up -d
36
+ ```
37
+
38
+ *NOTE* Docker compose is set to run on port 8080, change the port in docker-compose.yml if needed.
39
+
40
+ ### Docker API
41
+ Check if docker daemon is running:
42
+
43
+ ```bash
44
+ sudo systemctl start docker
45
+ ```
46
+
47
+ Build docker:
48
+
49
+ ```bash
50
+ docker build -t mappingservice .
51
+ ```
52
+
53
+ ## Run
54
+
55
+
56
+ ### Console
57
+
58
+ ```bash
59
+ poetry run uvicorn mappingservice.main:app --reload
60
+ ```
61
+
62
+ Site runs at http://127.0.0.1:8000/ml-api
63
+
64
+
65
+ ### Docker
66
+ ```bash
67
+ docker run -d --name mycontainer -p 80:80 mappingservice
68
+ ```
69
+ ##### Run
70
+
71
+ Send Consul variables on Docker run
72
+
73
+ ###### API
74
+ ```bash
75
+ docker run -e TGX_CONSUL_NODES='URL_NODES' -e TGX_CONSUL_TOKEN='TOKEN' -e TGX_CONSUL_AUTH_USER='USER' -e TGX_CONSUL_AUTH_PASS='PWD' -d --name mycontainer -p 80:80 mappingservice
76
+ ```
77
+
78
+ ### Benchmark
79
+ Use Poetry to Update and install Locust dependencies:
80
+
81
+ ```bash
82
+ poetry update --only benchmark
83
+ poetry install --only benchmark
84
+ ```
85
+
86
+ Run Locust in terminal
87
+ ```bash
88
+ locust -f ./benchmark/
89
+ ```
90
+ Site runs at http://localhost:8089/
91
+
benchmark/__init__.py ADDED
File without changes
benchmark/locustfile.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ from dotenv import load_dotenv
5
+ from locust import HttpUser, between, task
6
+
7
+ load_dotenv()
8
+
9
+
10
+ class ApiUser(HttpUser):
11
+ host = 'http://127.0.0.1:8080/ml-api'
12
+ wait_time = between(1, 2)
13
+
14
+ @task
15
+ def predict_all(self):
16
+ headers = {
17
+ 'Api-key': os.getenv('API_KEY'),
18
+ 'Content-Type': 'application/json'
19
+ }
20
+ data = {
21
+ "room_description": "room with sea view",
22
+ "beds": [
23
+ {"type": "double", "count": 2}
24
+ ]
25
+ }
26
+ print("Headers:", headers)
27
+ print("Data being sent:", data)
28
+ with self.client.post("/predict/room/predict/all", headers=headers, data=json.dumps(data), # noqa: E501
29
+ catch_response=True) as response:
30
+ print("Response status code:", response.status_code)
31
+ print("Response data:", response.text)
32
+ if response.status_code != 200:
33
+ response.failure(f"Failed with {response.status_code}: {response.text}")
34
+ else:
35
+ response.success()
docker-compose.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ app:
3
+ image: &app mrml-room-api
4
+ build:
5
+ context: .
6
+ dockerfile: Dockerfile
7
+ network: host
8
+ args:
9
+ tag: local
10
+ hostname: app_local
11
+ user: "1000:1000"
12
+ ports:
13
+ - 8080:80
14
+ env_file:
15
+ - .env
16
+ volumes:
17
+ - .:/code
18
+ networks:
19
+ - webnet
20
+
21
+ networks:
22
+ webnet:
23
+ driver: bridge
24
+ # Uncomment ipam if OpenVPN is causing troubles
25
+ # ipam:
26
+ # config:
27
+ # - subnet: 172.16.57.0/24
logging.conf ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [loggers]
2
+ keys=root
3
+
4
+ [logger_root]
5
+ level=INFO
6
+ handlers=stream_handler
7
+
8
+ [handlers]
9
+ keys=stream_handler
10
+
11
+ [formatters]
12
+ keys=formatter
13
+
14
+ [handler_stream_handler]
15
+ class=StreamHandler
16
+ formatter=formatter
17
+ args=(sys.stdout,)
18
+
19
+ [formatter_formatter]
20
+ format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s
mappingservice/__init__.py ADDED
File without changes
mappingservice/config.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from pydantic import SecretStr
4
+ from pydantic_settings import (
5
+ BaseSettings,
6
+ PydanticBaseSettingsSource,
7
+ SettingsConfigDict,
8
+ )
9
+
10
+ from mappingservice.config_consul import ConsulConfigSettingsSource
11
+
12
+
13
+ class Settings(BaseSettings):
14
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
15
+
16
+ api_key: SecretStr
17
+ huggingface_access_token: SecretStr
18
+ hf_hub_cache: str
19
+ app_env: str = "prod"
20
+
21
+ @classmethod
22
+ def settings_customise_sources(
23
+ cls,
24
+ settings_cls,
25
+ init_settings: PydanticBaseSettingsSource,
26
+ env_settings: PydanticBaseSettingsSource,
27
+ dotenv_settings: PydanticBaseSettingsSource,
28
+ file_secret_settings: PydanticBaseSettingsSource,
29
+ ):
30
+ settings_list = [
31
+ init_settings,
32
+ env_settings,
33
+ dotenv_settings,
34
+ file_secret_settings,
35
+ ConsulConfigSettingsSource(settings_cls),
36
+ ]
37
+
38
+ if os.getenv("APP_ENV", "prod").lower() == "dev":
39
+ del settings_list[-1]
40
+
41
+ return tuple(settings_list)
mappingservice/config_consul.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging.config
3
+ import random
4
+ from typing import Any
5
+
6
+ import requests
7
+ from pydantic import HttpUrl, SecretStr
8
+ from pydantic.fields import FieldInfo
9
+ from pydantic_settings import (
10
+ BaseSettings,
11
+ PydanticBaseSettingsSource,
12
+ SettingsConfigDict,
13
+ )
14
+ from requests.exceptions import HTTPError, RequestException, Timeout
15
+
16
+ logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
17
+ logger = logging.getLogger(__name__)
18
+ logger.info("Logger Configured: config_consul")
19
+
20
+
21
+ class ConsulSettings(BaseSettings):
22
+ model_config = SettingsConfigDict(env_file="consul.env", env_file_encoding="utf-8")
23
+ tgx_consul_nodes: str
24
+ tgx_consul_auth_user: str
25
+ tgx_consul_auth_pass: SecretStr
26
+ tgx_consul_token: SecretStr
27
+
28
+
29
+ class ConsulClient:
30
+ def __init__(self, settings: ConsulSettings):
31
+ logger.info(
32
+ f"Consul init:[{settings.tgx_consul_nodes}], "
33
+ f"user:[{settings.tgx_consul_auth_user}]"
34
+ )
35
+
36
+ # Random get a valid url for consul
37
+ consul_nodes = settings.tgx_consul_nodes.split(";")
38
+ consul_url: HttpUrl = random.choice(consul_nodes)
39
+
40
+ self.headers = {
41
+ "X-Consul-Token": f"{settings.tgx_consul_token.get_secret_value()}"
42
+ }
43
+ self.auth = (
44
+ settings.tgx_consul_auth_user,
45
+ settings.tgx_consul_auth_pass.get_secret_value(),
46
+ )
47
+ self.url = consul_url
48
+ self.timeout = 20
49
+
50
+ def get_key_value(self, key: str, dc: str) -> json:
51
+ url = f"{self.url}/v1/kv/{key}?dc={dc}&raw=false"
52
+ logger.info(f"Donwloading keys from Consul: [{url}, {key}]")
53
+ response = requests.get(
54
+ url=url, headers=self.headers, auth=self.auth, timeout=self.timeout
55
+ )
56
+ response.raise_for_status()
57
+ return response.json()
58
+
59
+
60
+ class ConsulConfigSettingsSource(PydanticBaseSettingsSource):
61
+ def get_field_value(
62
+ self, field: FieldInfo, field_name: str
63
+ ):
64
+ pass
65
+
66
+ def prepare_field_value(
67
+ self, field_name: str, field: FieldInfo, value: Any, value_is_complex: bool
68
+ ) -> Any:
69
+ pass
70
+
71
+ def __call__(self):
72
+ """
73
+ Customize settings soucre from Consul
74
+ Needs url and secrets to access consul to retrieve settings
75
+ """
76
+ CONSUL_KEY = "mapping/apps/room/ml-api/config"
77
+
78
+ consul_settings = ConsulSettings()
79
+ consul_cli = ConsulClient(consul_settings)
80
+
81
+ logger.info(f"Getting consul key value: [{CONSUL_KEY}]")
82
+ ret_json = {}
83
+ try:
84
+ ret_json = consul_cli.get_key_value(CONSUL_KEY, "gc-we1d")
85
+ except (Timeout, HTTPError, RequestException) as err:
86
+ logger.warn(f"Error get from Consul {err}")
87
+ return {}
88
+
89
+ if not ret_json:
90
+ logger.warn(f"Consul key not found: [{CONSUL_KEY}]", "gc-we1d")
91
+ return {}
92
+ else:
93
+ logger.info("Consul key read succesfully")
94
+ return ret_json
95
+ # convert Service Account json to string
96
+ # ret_json["tgx_pubsub_events_credentials_service_account_json"] = json.dumps(
97
+ # ret_json["tgx_pubsub_events_credentials_service_account_json"])
mappingservice/constants.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AVAILABLE_LANGUAGES = ["en", "es", "na"] # na is a placeholder for the language detection model # noqa: E501
2
+
3
+ DEFAULT_LABEL = "No data"
4
+
5
+ DEFAULT_SCORE = 0.0
6
+
7
+ MODEL_NAMES = {
8
+ "bed_type": {
9
+ "classification": "text-classification",
10
+ "en": "travelgate/bed_type_model-classifier",
11
+ "es": "travelgate/bed_type_model-classifier",
12
+ },
13
+ "room_category": {
14
+ "classification": "text-classification",
15
+ "en": "travelgate/room_category-classifier",
16
+ "es": "travelgate/room_category-classifier",
17
+ },
18
+ "environment": {
19
+ "classification": "text-classification",
20
+ "en": "travelgate/room_environment-classifier",
21
+ "es": "travelgate/room_environment-classifier",
22
+ },
23
+ "room_features": {
24
+ "classification": "token-classification",
25
+ "en": "travelgate/feature_predicted_en",
26
+ "es": "travelgate/feature_predicted_es",
27
+ },
28
+ "room_type": {
29
+ "classification": "text-classification",
30
+ "en": "travelgate/room_type-classifier",
31
+ "es": "travelgate/room_type-classifier"
32
+ },
33
+ "room_view": {
34
+ "classification": "token-classification",
35
+ "en": "travelgate/view_predicted_en",
36
+ "es": "travelgate/view_predicted_es"
37
+ },
38
+ "lang_detect": {
39
+ "classification": "text-classification",
40
+ "na": "papluca/xlm-roberta-base-language-detection", # use na as a placeholder
41
+ }
42
+ }
43
+
44
+ SPANISH_KEYWORDS = [
45
+ "habitacion", "cama", "bano", "aire acondicionado", "television", "desayuno",
46
+ "vista", "servicio", "caja fuerte", "ducha", "sabanas", "toallas",
47
+ "limpieza", "recepcion", "piscina", "gimnasio", "ascensor", "estacionamiento",
48
+ "secador de pelo", "armario", "escritorio", "telefono", "decoracion", "silla",
49
+ "mesa", "luz", "ventana", "apartamento", "casa", "vistas", "presidencial",
50
+ "estandar", "estudio", "terraza", "doble"
51
+ ]
52
+
53
+ ENGLISH_KEYWORDS = {
54
+ "apartment", "room", "house", "views", "presidential", "studio", "suite",
55
+ "standard", "deluxe"
56
+ }
mappingservice/dependencies.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging.config
2
+ from functools import lru_cache
3
+
4
+ from fastapi import HTTPException, Security, status
5
+ from fastapi.security.api_key import APIKeyHeader
6
+
7
+ from mappingservice import config
8
+
9
+ logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ @lru_cache
14
+ def get_settings() -> config.Settings:
15
+ return config.Settings()
16
+
17
+
18
+ API_KEY = get_settings().api_key.get_secret_value()
19
+ api_key_header_auth = APIKeyHeader(name="Api-key", auto_error=True)
20
+ mc = {}
21
+
22
+
23
+ def get_api_key(api_key_header: str = Security(api_key_header_auth)):
24
+ if api_key_header != API_KEY:
25
+ raise HTTPException(
26
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key"
27
+ )
mappingservice/main.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging.config
2
+ import time
3
+ from contextlib import asynccontextmanager
4
+
5
+ import psutil
6
+ from fastapi import Depends, FastAPI, Request
7
+ from fastapi.responses import JSONResponse
8
+
9
+ import mappingservice.dependencies as deps
10
+ from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
11
+ from mappingservice.dependencies import get_api_key, get_settings
12
+ from mappingservice.ms.model_loader import ModelLoader
13
+ from mappingservice.routers import admin, room
14
+ from mappingservice.utils import log_memory_usage, predict_language
15
+
16
+ logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @asynccontextmanager
21
+ async def lifespan(application: FastAPI):
22
+ # Load the ML models
23
+ settings = get_settings()
24
+ initial_memory = psutil.Process().memory_info().rss
25
+
26
+ start_time = time.time()
27
+
28
+ ml_model = ModelLoader(settings, MODEL_NAMES)
29
+ for k in MODEL_NAMES:
30
+ logger.info(f"Loading model: {k}...")
31
+ for lang in AVAILABLE_LANGUAGES:
32
+ model_pipeline = ml_model.get_model(k, lang)
33
+ try:
34
+ deps.mc[k][lang] = model_pipeline
35
+ except KeyError:
36
+ deps.mc[k] = {}
37
+ deps.mc[k][lang] = model_pipeline
38
+
39
+ elapsed_time = time.time() - start_time
40
+ num_cores = psutil.cpu_count(logical=True)
41
+ total_ram = psutil.virtual_memory().total / (1024 ** 3) # Convert to GB
42
+ final_memory = psutil.Process().memory_info().rss
43
+ total_memory_used = (final_memory - initial_memory) / (1024 ** 3) # Convert to GB
44
+
45
+ logger.info("*" * 60) # ASCII Art
46
+ logger.info(f"* Number of Cores: {num_cores}")
47
+ logger.info(f"* Total RAM: {total_ram:.2f} GB")
48
+
49
+ logger.info(
50
+ f"* AI Models loaded in {elapsed_time:.2f} seconds, "
51
+ f"using {total_memory_used:.2f} GB"
52
+ )
53
+ logger.info("*" * 60) # ASCII Art
54
+
55
+ yield
56
+ # Clean up the ML models and release the resources
57
+ deps.mc.clear()
58
+
59
+
60
+ app = FastAPI(root_path="/ml-api", lifespan=lifespan)
61
+
62
+
63
+ @app.middleware("http")
64
+ async def handle_exceptions(request: Request, call_next):
65
+ try:
66
+ response = await call_next(request)
67
+
68
+ return response
69
+ except Exception as e:
70
+ logging.error(f"Unhandled error: {e}")
71
+ logging.exception(e)
72
+ log_memory_usage()
73
+
74
+ return JSONResponse(status_code=500, content={
75
+ "message": "Internal Server Error"
76
+ })
77
+ finally:
78
+ log_memory_usage()
79
+
80
+
81
+ @app.middleware("http")
82
+ async def detect_language(request: Request, call_next):
83
+ room_description = request.query_params.get("room_description", "")
84
+ language = "en"
85
+
86
+ if not room_description:
87
+ try:
88
+ body = await request.json()
89
+ room_description = body.get("room_description", "")
90
+ except ValueError:
91
+ # If the room description is still empty, continue with the request as
92
+ # the language detection is not required
93
+ if not room_description:
94
+ response = await call_next(request)
95
+ return response
96
+
97
+ try:
98
+ language = predict_language(room_description)
99
+
100
+ if language not in AVAILABLE_LANGUAGES:
101
+ logger.error(f"Unsupported language for room description: {room_description}. Falling back to model prediction.") # noqa: E501
102
+
103
+ language = deps.mc['lang_detect']['na'].predict(room_description)[0][0]['label'] # noqa: E501
104
+ if language not in AVAILABLE_LANGUAGES:
105
+ logger.error(f"Unsupported language for room description using model prediction: {room_description}. Falling back to English.") # noqa: E501
106
+ language = "en"
107
+ except Exception as e:
108
+ logger.error(f"Error detecting language: {e}")
109
+
110
+ request.state.predicted_language = language
111
+
112
+ response = await call_next(request)
113
+ return response
114
+
115
+
116
+ app.include_router(room.router, dependencies=[Depends(get_api_key)])
117
+ app.include_router(admin.router)
mappingservice/models.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging.config
2
+ from enum import Enum
3
+ from typing import List, Union
4
+
5
+ from pydantic import BaseModel, validator
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class PredictionResponse(BaseModel):
11
+ label: str
12
+ score: float
13
+
14
+
15
+ class BedType(str, Enum):
16
+ none = ""
17
+ single = "single"
18
+ double = "double"
19
+ queen = "queen"
20
+ king = "king"
21
+ california_king = "california_king"
22
+ bunk = "bunk"
23
+ sofa = "sofa"
24
+ rollaway = "rollaway"
25
+ futon = "futon"
26
+
27
+
28
+ class BedData(BaseModel):
29
+ type: Union[BedType, None] = None
30
+ count: Union[int, None] = None
31
+
32
+ @validator("count", pre=True, always=True)
33
+ def validate_count(cls, v):
34
+ return v if v and v > 0 else None
35
+
36
+ @validator("type", pre=True, always=True)
37
+ def validate_type(cls, v):
38
+ try:
39
+ return BedType(v.lower()) if v else None
40
+ except ValueError:
41
+ return None
42
+
43
+ @classmethod
44
+ def create_valid_bed(cls, type: Union[str, None], count: Union[int, None]):
45
+ if type and count and count > 0:
46
+ try:
47
+ type_enum = BedType(type.lower())
48
+ return cls(type=type_enum, count=count)
49
+ except ValueError:
50
+ logger.error(f"Invalid bed type: {type}")
51
+ return None
52
+
53
+ def __init__(self, **data):
54
+ variations = {
55
+ "individual": BedType.single,
56
+ "camaindividual": BedType.single,
57
+ "doble": BedType.double,
58
+ "camadoble": BedType.double,
59
+ "reina": BedType.queen,
60
+ "queenbed": BedType.queen,
61
+ "rey": BedType.king,
62
+ "californiaking": BedType.california_king,
63
+ "litera": BedType.bunk,
64
+ "sofá": BedType.sofa,
65
+ "sofacama": BedType.sofa,
66
+ "plegable": BedType.rollaway,
67
+ "futón": BedType.futon,
68
+ "twin": BedType.single,
69
+ "twinbed": BedType.single,
70
+ "singlebed": BedType.single,
71
+ "doublebed": BedType.double,
72
+ "largedouble": BedType.queen,
73
+ "extralargedouble": BedType.king,
74
+ "bunkbed": BedType.bunk,
75
+ "couch": BedType.sofa,
76
+ "airmattress": BedType.futon,
77
+ "floormattress": BedType.futon,
78
+ }
79
+
80
+ bed_type = data.get("type")
81
+ if bed_type:
82
+ normalized_bed_type = bed_type.replace(" ", "").lower()
83
+ data["type"] = variations.get(normalized_bed_type, bed_type)
84
+ super().__init__(**data)
85
+
86
+
87
+ class RoomData(BaseModel):
88
+ room_description: str
89
+ beds: Union[BedData, None]
90
+
91
+
92
+ class Predictions(BaseModel):
93
+ type: PredictionResponse
94
+ category: PredictionResponse
95
+ environment: List[PredictionResponse]
96
+ feature: List[PredictionResponse]
97
+ view: List[PredictionResponse]
98
+ language_detected: str
99
+ beds: List[BedData]
100
+
101
+
102
+
103
+ class AllPredictionsResponse(BaseModel):
104
+ predictions: Predictions
105
+
mappingservice/ms/__init__.py ADDED
File without changes
mappingservice/ms/ml_models/__init__.py ADDED
File without changes
mappingservice/ms/ml_models/base.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ class Base:
5
+
6
+ CONFIDENCE_THRESHOLD = 0.95
7
+
8
+ def predict(self, data, pipeline, language):
9
+ raise NotImplementedError()
10
+
11
+ @staticmethod
12
+ def process_ner_results(ner_results, confidence_threshold):
13
+ processed_entities = []
14
+ current_entity_group = []
15
+ current_entity_score = 0
16
+
17
+ for result in ner_results:
18
+ if result["score"] < confidence_threshold:
19
+ continue
20
+
21
+ if result["word"].startswith("##"):
22
+ if current_entity_group:
23
+ current_entity_group[-1] += result["word"][2:]
24
+ continue
25
+
26
+ if current_entity_group:
27
+ average_score = current_entity_score / len(current_entity_group)
28
+ processed_entities.append({
29
+ "word": " ".join(current_entity_group),
30
+ "score": average_score
31
+ })
32
+ current_entity_group = []
33
+ current_entity_score = 0
34
+
35
+ current_entity_group.append(result["word"])
36
+ current_entity_score += result["score"]
37
+
38
+ if current_entity_group:
39
+ average_score = current_entity_score / len(current_entity_group)
40
+ processed_entities.append({
41
+ "word": " ".join(current_entity_group),
42
+ "score": average_score
43
+ })
44
+
45
+ return processed_entities
46
+
47
+ @staticmethod
48
+ def preprocess_data(data, language):
49
+ translations = {
50
+ "deluxe": "de lujo",
51
+ "twin": "individual",
52
+ "size": "tamaño",
53
+ "premium": "calidad",
54
+ "double": "doble",
55
+ "room": "habitacion"
56
+ }
57
+
58
+ if language == "es":
59
+ for key, value in translations.items():
60
+ data = data.replace(key, value)
61
+
62
+ data = re.sub(r'\b\w{1,2}\b', '', data)
63
+ data = re.sub(r'\b\d+\b', '', data)
64
+ data = re.sub(r'\s+', ' ', data).strip() # Elimina espacios extra y espacios al inicio/final # noqa E501
65
+
66
+ return data
mappingservice/ms/ml_models/bed_type.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from .base import Base
4
+
5
+
6
+ class BedType(Base):
7
+
8
+ def predict(self, data, pipeline, language):
9
+ return pipeline.predict(data)[0][0]
10
+
11
+ @staticmethod
12
+ def extract_bed_numbers(description):
13
+ pattern = re.compile(
14
+ r"\b(\d+|\bone\b|\btwo\b|\bthree\b|\buno\b|\bdos\b|\btres\b)\b",
15
+ re.IGNORECASE,
16
+ )
17
+ matches = pattern.findall(description)
18
+ word_to_number = {"one": 1, "two": 2, "three": 3, "uno": 1, "dos": 2, "tres": 3}
19
+ bed_numbers = []
20
+ for match in matches:
21
+ if match.isdigit():
22
+ bed_numbers.append(int(match))
23
+ else:
24
+ bed_numbers.append(word_to_number.get(match.lower(), 1))
25
+ return bed_numbers
mappingservice/ms/ml_models/environment.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from .base import Base
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class Environment(Base):
9
+
10
+ CONFIDENCE_THRESHOLD = 0.5
11
+
12
+ def predict(self, data, pipeline, language):
13
+ data = data.replace("connecting", "connected")
14
+
15
+ scores = pipeline.predict(data)[0]
16
+
17
+ predicted_results = []
18
+ for result in scores:
19
+ if result["score"] > self.CONFIDENCE_THRESHOLD:
20
+ predicted_results.append(
21
+ {
22
+ "label": result["label"],
23
+ "score": result["score"],
24
+ "language_detected": language,
25
+ }
26
+ )
27
+
28
+ logger.info(f"Predicted Labels and Scores: {predicted_results}")
29
+
30
+ return predicted_results
mappingservice/ms/ml_models/room_category.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import Base
2
+
3
+
4
+ class RoomCategory(Base):
5
+
6
+ CONFIDENCE_THRESHOLD = 0.7
7
+
8
+ def predict(self, data: str, pipeline, language) -> dict:
9
+ predictions = pipeline.predict(data)
10
+
11
+ if predictions and predictions[0][0]["score"] > self.CONFIDENCE_THRESHOLD:
12
+ return {
13
+ "language_detected": language,
14
+ "label": predictions[0][0]["label"],
15
+ "score": predictions[0][0]["score"],
16
+ }
17
+ else:
18
+ score = predictions[0][0]["score"] if predictions else 0
19
+ return {
20
+ "language_detected": language,
21
+ "label": "default",
22
+ "score": score
23
+ }
mappingservice/ms/ml_models/room_features.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import Base
2
+
3
+
4
+ class RoomFeatures(Base):
5
+
6
+ CONFIDENCE_THRESHOLD = 0.5
7
+
8
+ def predict(self, data, pipeline, language):
9
+ preprocessed_data = self.preprocess_data(data, language)
10
+
11
+ results = pipeline.predict(preprocessed_data)
12
+
13
+ return {
14
+ "language_detected": language,
15
+ "features": self.process_ner_results(
16
+ results, self.CONFIDENCE_THRESHOLD
17
+ ),
18
+ }
19
+
mappingservice/ms/ml_models/room_type.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import Base
2
+
3
+
4
+ class RoomType(Base):
5
+
6
+ CONFIDENCE_THRESHOLD = 0.7
7
+
8
+ def predict(self, data: str, pipeline, language) -> dict:
9
+ predictions = pipeline.predict(data)
10
+
11
+ if predictions and predictions[0][0]["score"] > self.CONFIDENCE_THRESHOLD:
12
+ return {
13
+ "language_detected": language,
14
+ "label": predictions[0][0]["label"],
15
+ "score": predictions[0][0]["score"],
16
+ }
17
+ else:
18
+ score = predictions[0][0]["score"] if predictions else 0
19
+ return {
20
+ "language_detected": language,
21
+ "label": "default",
22
+ "score": score
23
+ }
mappingservice/ms/ml_models/room_view.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import Base
2
+
3
+
4
+ class RoomView(Base):
5
+
6
+ CONFIDENCE_THRESHOLD = 0.6
7
+
8
+ def predict(self, data, pipeline, language):
9
+ preprocessed_data = self.preprocess_data(data, language)
10
+ ner_results = pipeline.predict(preprocessed_data)
11
+
12
+ return {
13
+ "language_detected": language,
14
+ "has_views": bool(ner_results),
15
+ "views": self.process_ner_results(
16
+ ner_results, self.CONFIDENCE_THRESHOLD
17
+ ),
18
+ }
mappingservice/ms/model_loader.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import time
3
+
4
+ import torch
5
+ from optimum.bettertransformer import BetterTransformer
6
+ from transformers import (
7
+ AutoModelForSequenceClassification,
8
+ AutoModelForTokenClassification,
9
+ AutoTokenizer,
10
+ pipeline,
11
+ )
12
+
13
+ from mappingservice.config import Settings
14
+
15
+
16
+ class ModelLoader:
17
+ def __init__(self, settings: Settings, model_names):
18
+ self.settings = settings
19
+ self.model_names = model_names
20
+ self.model_cache = {}
21
+ self.token = self.settings.huggingface_access_token.get_secret_value()
22
+ self.classification = "token-classification"
23
+
24
+ def load_model(self, model_name):
25
+ start_time = time.time()
26
+ if self.classification == "token-classification":
27
+ model = AutoModelForTokenClassification.from_pretrained(model_name, token=self.token) # noqa: E501
28
+ else:
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, token=self.token) # noqa: E501
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.token)
32
+ model = BetterTransformer.transform(model)
33
+ end_time = time.time()
34
+ logging.info(f"Model {model_name} loaded in {end_time - start_time:.2f} seconds.") # noqa: E501
35
+
36
+ return model, tokenizer
37
+
38
+ def get_model(self, model_name, language):
39
+ mc = self.model_cache.get(model_name) or {}
40
+ if language in mc:
41
+ logging.info(f"Using cached model for language: {language}")
42
+ return self.model_cache[model_name][language]
43
+
44
+ self.classification = self.model_names.get(model_name).get("classification")
45
+
46
+ model_name = self.model_names.get(model_name).get(language)
47
+
48
+ if not model_name:
49
+ logging.warning(f"Unsupported language: {language}")
50
+ return None
51
+
52
+ model, tokenizer = self.load_model(model_name)
53
+
54
+ pipeline_dict = {
55
+ 'task': self.classification,
56
+ 'model': model,
57
+ 'tokenizer': tokenizer,
58
+ 'token': self.token,
59
+ 'device': 0 if torch.cuda.is_available() else -1,
60
+ }
61
+
62
+ if self.classification == "token-classification":
63
+ pipeline_dict.update({'framework': 'pt'})
64
+
65
+ if self.classification == "text-classification":
66
+ pipeline_dict.update({'top_k': 1})
67
+
68
+ model_pipeline = pipeline(**pipeline_dict)
69
+
70
+ self.model_cache[model_name] = {}
71
+ self.model_cache[model_name][language] = model_pipeline
72
+
73
+ return model_pipeline
mappingservice/routers/__init__.py ADDED
File without changes
mappingservice/routers/admin.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ router = APIRouter(tags=["admin"])
4
+
5
+
6
+ @router.get("/health")
7
+ async def health():
8
+ return {"health": "OK"}
9
+
10
+
11
+ @router.get("/status")
12
+ async def status():
13
+ return {"status": "OK"}
mappingservice/routers/room.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import time
3
+ from concurrent.futures import ThreadPoolExecutor
4
+
5
+ from fastapi import APIRouter, Query, Request
6
+
7
+ from mappingservice.constants import DEFAULT_LABEL, DEFAULT_SCORE
8
+ from mappingservice.dependencies import (
9
+ mc,
10
+ )
11
+ from mappingservice.models import (
12
+ AllPredictionsResponse,
13
+ PredictionResponse,
14
+ Predictions,
15
+ RoomData,
16
+ )
17
+ from mappingservice.ms.ml_models.bed_type import BedType as BedTypeModel
18
+ from mappingservice.ms.ml_models.environment import Environment
19
+ from mappingservice.ms.ml_models.room_category import RoomCategory
20
+ from mappingservice.ms.ml_models.room_features import RoomFeatures
21
+ from mappingservice.ms.ml_models.room_type import RoomType
22
+ from mappingservice.ms.ml_models.room_view import RoomView
23
+ from mappingservice.utils import (
24
+ get_bed_predictions,
25
+ process_predictions,
26
+ safe_round,
27
+ )
28
+
29
+ logging.basicConfig(
30
+ level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ router = APIRouter(
35
+ prefix="/predict/room", tags=["room"], responses={404: {"description": "Not found"}}
36
+ )
37
+
38
+
39
+ def get_room_type_prediction(room_description: str, language: str = "en"):
40
+ pipeline = mc['room_type'][language]
41
+ model = BedTypeModel()
42
+ return {"msg": model.predict(room_description, pipeline, language)}
43
+
44
+
45
+ def get_view_prediction(room_description: str, language: str = "en"):
46
+ pipeline = mc['room_view'][language]
47
+ model = RoomView()
48
+ return {"view_prediction": model.predict(room_description, pipeline, language)}
49
+
50
+
51
+ def get_room_category_prediction(room_description: str, language: str = "en"):
52
+ pipeline = mc['room_category'][language]
53
+ model = RoomCategory()
54
+ return {"msg": model.predict(room_description, pipeline, language)}
55
+
56
+
57
+ def get_feature_prediction(room_description: str, language: str = "en"):
58
+ pipeline = mc['room_features'][language]
59
+ model = RoomFeatures()
60
+ return {"feature_prediction": model.predict(room_description, pipeline, language)}
61
+
62
+
63
+ def get_room_environment_prediction(room_description: str, language: str = "en"):
64
+ pipeline = mc['environment'][language]
65
+ model = Environment()
66
+ return {"msg": model.predict(room_description, pipeline, language)}
67
+
68
+
69
+ @router.post("/predict/beds")
70
+ async def predict_beds(request: Request, room_description: str = Query(...)): # noqa: E501
71
+ language = request.state.predicted_language
72
+ pipeline = mc['bed_type'][language]
73
+ model = BedTypeModel()
74
+ prediction = model.predict(room_description, pipeline, language)
75
+
76
+ return prediction
77
+
78
+
79
+ @router.get("/type")
80
+ async def predict_room_type_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
81
+ language = request.state.predicted_language
82
+ pipeline = mc['room_type'][language]
83
+ model = RoomType()
84
+ prediction = model.predict(room_description, pipeline, language)
85
+
86
+ return prediction
87
+
88
+
89
+ @router.get("/category")
90
+ async def predict_room_category_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
91
+ prediction = mc['room_category']['en'].predict(room_description)
92
+ return prediction
93
+
94
+
95
+ @router.get("/environment")
96
+ async def predict_room_environment_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
97
+ prediction = mc['environment']['en'].predict(room_description)
98
+ return prediction
99
+
100
+
101
+ @router.get("/view")
102
+ async def predict_view_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
103
+ prediction = mc['room_view']['en'].predict(room_description)
104
+ return prediction
105
+
106
+
107
+ @router.get("/feature")
108
+ async def predict_feature_endpoint(request: Request, room_description: str = Query(...)): # noqa: E501
109
+ prediction = mc['room_features']['en'].predict(room_description)
110
+ return prediction
111
+
112
+
113
+ @router.post("/predict/all", response_model=AllPredictionsResponse)
114
+ async def predict_all(request: Request, room_data: RoomData):
115
+ start_time = time.time()
116
+ room_data = RoomData(**await request.json())
117
+ language = request.state.predicted_language
118
+
119
+ with ThreadPoolExecutor() as executor:
120
+ type_future = executor.submit(
121
+ get_room_type_prediction, room_data.room_description, language
122
+ )
123
+ category_future = executor.submit(
124
+ get_room_category_prediction, room_data.room_description, language
125
+ )
126
+ environment_future = executor.submit(
127
+ get_room_environment_prediction, room_data.room_description, language
128
+ )
129
+ feature_future = executor.submit(
130
+ get_feature_prediction, room_data.room_description, language
131
+ )
132
+ view_future = executor.submit(
133
+ get_view_prediction, room_data.room_description, language
134
+ )
135
+
136
+ type_pred = type_future.result()["msg"]
137
+ category_pred = category_future.result()["msg"]
138
+ environment_pred_results = environment_future.result()["msg"]
139
+ feature_pred_results = feature_future.result()["feature_prediction"]
140
+ view_pred_results = view_future.result()["view_prediction"]
141
+
142
+ bed_predictions = [
143
+ bed_data for bed_data in room_data.beds if bed_data.type is not None and bed_data.count is not None # noqa: E501
144
+ ]
145
+
146
+ if not bed_predictions:
147
+ logger.debug("No bed data provided or valid; extracting from description.")
148
+ extracted_beds = get_bed_predictions(room_data.room_description)
149
+ if extracted_beds:
150
+ bed_predictions.extend(extracted_beds)
151
+
152
+ end_time = time.time()
153
+ total_time = end_time - start_time
154
+ logger.info(f"Total processing time: {total_time:.3f} seconds")
155
+
156
+ formatted_predictions = {
157
+ "type": {
158
+ "label": type_pred.get("label", DEFAULT_LABEL),
159
+ "score": safe_round(type_pred.get("score", DEFAULT_SCORE), 3),
160
+ },
161
+ "category": {
162
+ "label": category_pred.get("label", DEFAULT_LABEL),
163
+ "score": safe_round(category_pred.get("score", DEFAULT_SCORE), 3),
164
+ },
165
+ }
166
+
167
+ env_preds = process_predictions(environment_pred_results)
168
+ feat_preds = process_predictions(
169
+ feature_pred_results.get("features", []), label_key="word"
170
+ )
171
+ view_preds = process_predictions(
172
+ view_pred_results.get("views", []), label_key="word"
173
+ )
174
+
175
+ predictions = Predictions(
176
+ type=PredictionResponse(**formatted_predictions["type"]),
177
+ category=PredictionResponse(**formatted_predictions["category"]),
178
+ environment=[PredictionResponse(**pred) for pred in env_preds] if env_preds else [], # noqa: E501
179
+ feature=[PredictionResponse(**pred) for pred in feat_preds] if feat_preds else [], # noqa: E501
180
+ view=[PredictionResponse(**pred) for pred in view_preds] if view_preds else [],
181
+ language_detected=language,
182
+ beds=bed_predictions,
183
+ )
184
+
185
+ return AllPredictionsResponse(predictions=predictions)
mappingservice/utils.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging.config
2
+ import re
3
+ from typing import List
4
+
5
+ import psutil
6
+ from langdetect import LangDetectException, detect
7
+
8
+ from mappingservice.constants import ENGLISH_KEYWORDS, SPANISH_KEYWORDS
9
+ from mappingservice.models import BedData, BedType
10
+
11
+ logging.basicConfig(
12
+ level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
13
+ )
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def log_memory_usage():
18
+ process = psutil.Process()
19
+ memory_info = process.memory_info()
20
+ logging.info(f"Memory usage: RSS={memory_info.rss}, VMS={memory_info.vms}")
21
+
22
+
23
+ def safe_round(value, decimals=0):
24
+ return round(value, decimals) if isinstance(value, int | float) else 0
25
+
26
+
27
+ def process_predictions(predictions, score_key="score", label_key="label"):
28
+ return [
29
+ {"label": pred.get(label_key, "No data"), "score": round(pred.get(score_key, 0), 3)} # noqa: E501
30
+ for pred in predictions if isinstance(pred, dict) and pred.get(score_key, 0) > 0
31
+ ] or None
32
+
33
+
34
+ def parse_model_output(predictions):
35
+ bed_data_list = []
36
+ threshold = 0.5
37
+
38
+ if not isinstance(predictions, list) or not all(
39
+ isinstance(pred, dict) for pred in predictions
40
+ ):
41
+ return bed_data_list
42
+
43
+ for prediction in predictions:
44
+ type = prediction.get("type")
45
+ count = prediction.get("count", 0)
46
+ score = prediction.get("score", 0)
47
+
48
+ if score > threshold and count > 0:
49
+ if type in BedType._member_names_:
50
+ bed_data = BedData(type=BedType[type], count=count)
51
+ bed_data_list.append(bed_data)
52
+ else:
53
+ logger.debug(f"Unsupported bed type: {type}")
54
+
55
+ return bed_data_list
56
+
57
+
58
+ def get_bed_predictions(description: str):
59
+ logger.debug(f"Extracting bed predictions from description: {description}")
60
+
61
+ description = description.lower()
62
+
63
+ bed_pattern = re.compile(
64
+ r"(\d+)?\s*\b(king|queen|double|single|twin|bunk|sofa|rollaway|futon|"
65
+ r"cama\s*king|cama\s*queen|cama\s*double|cama\s*single|cama\s*twin|"
66
+ r"cama\s*bunk|cama\s*sofa|cama\s*rollaway|cama\s*futon)\b\s*(beds?|camas?)", re.IGNORECASE) # noqa: E501
67
+
68
+ or_pattern = re.compile(
69
+ r"\b(king|queen|double|single|twin)\s+or\s+(king|queen|double|single|twin)\b", re.IGNORECASE) # noqa: E501
70
+
71
+ # default_bed_type_pattern = re.compile(
72
+ # r"\b(double|twin|king|queen|single|doble|individual|gemela|matrimonial)\b\s+"
73
+ # r"\b(room|apartment|suite|habitacion|apartamento)\b", re.IGNORECASE)
74
+
75
+
76
+ matches = bed_pattern.findall(description)
77
+ or_matches = or_pattern.findall(description)
78
+ # default_matches = default_bed_type_pattern.findall(description)
79
+
80
+
81
+ bed_data_list = []
82
+ bed_type_counts = {}
83
+
84
+ for match in matches:
85
+ count = int(match[0]) if match[0] else 1
86
+ bed_type_name = match[1]
87
+ normalized_bed_type = bed_type_name.strip().lower()
88
+ bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None # noqa: E501
89
+
90
+ if bed_type:
91
+ bed_type_counts[bed_type] = count
92
+
93
+ for match in or_matches:
94
+ for bed_type_name in match:
95
+ normalized_bed_type = bed_type_name.strip().lower()
96
+ bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None # noqa: E501
97
+ if bed_type:
98
+ bed_type_counts[bed_type] = bed_type_counts.get(bed_type, 0) + 1
99
+
100
+ for bed_type, count in bed_type_counts.items():
101
+ bed_data_list.append(BedData(type=bed_type, count=count))
102
+
103
+ if not bed_data_list:
104
+ logger.warning("No valid bed data found from extracted information.")
105
+
106
+ return bed_data_list
107
+
108
+ def extract_bed_numbers(description: str):
109
+ bed_number_pattern = re.compile(
110
+ r"(\d+|\bone\b|\btwo\b|\bthree\b)\s*bed", re.IGNORECASE
111
+ )
112
+ numbers = []
113
+ word_to_number = {"one": 1, "two": 2, "three": 3}
114
+
115
+ matches = bed_number_pattern.findall(description)
116
+ for match in matches:
117
+ number = word_to_number.get(match.lower(), match)
118
+ numbers.append(int(number))
119
+
120
+ return numbers
121
+
122
+
123
+ def validate_bed_data(beds: List[BedData]) -> List[BedData]:
124
+ valid_beds = [bed for bed in beds if bed.type and bed.count > 0]
125
+ if not valid_beds:
126
+ logger.info(f"No valid beds found in {beds}")
127
+ return valid_beds
128
+
129
+
130
+ def is_list_of_lists(variable):
131
+ if not isinstance(variable, list):
132
+ return False
133
+ return all(isinstance(item, list) for item in variable)
134
+
135
+
136
+ def predict_language(text):
137
+ text_lower = text.lower()
138
+
139
+ for keyword in SPANISH_KEYWORDS:
140
+ if keyword in text_lower:
141
+ return "es"
142
+
143
+ for keyword in ENGLISH_KEYWORDS:
144
+ if keyword in text_lower:
145
+ return "en"
146
+
147
+ try:
148
+ language = detect(text)
149
+ if language in {"en", "es"}:
150
+ return language
151
+ except LangDetectException:
152
+ pass
153
+
154
+ return None
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "mappingservice"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["oscar <oscar@travelgate.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.11"
10
+ psutil = "^5.9.8"
11
+ fastapi = "^0.111.0"
12
+ uvicorn = {extras = ["standard"], version = "^0.29.0"}
13
+ pydantic = {version = "^2.7.1", extras = ["email"]}
14
+ pydantic-settings = "^2.2.1"
15
+ transformers = "^4.40.2"
16
+ torch = {version = "^2.3.0", source = "pytorch-cpu"}
17
+ langdetect = "^1.0.9"
18
+ accelerate = "^0.30.1"
19
+ optimum = "^1.19.2"
20
+ onnx = "^1.16.0"
21
+ onnxruntime = "^1.18.0"
22
+
23
+ [tool.poetry.group.dev.dependencies]
24
+ pytest = "^8.2.0"
25
+ pytest-cov="^5.0.0"
26
+ pre-commit="^3.7.0"
27
+ ruff="^0.4.4"
28
+
29
+ [tool.poetry.group.benchmark.dependencies]
30
+ locust = "^2.27.0"
31
+
32
+ [tool.pytest.ini_options]
33
+ addopts = "-vv -rA --disable-warnings --cov=app --cov-report term-missing"
34
+ testpaths = [
35
+ "tests",
36
+ ]
37
+ asyncio_mode = "auto"
38
+ pythonpath = [
39
+ "."
40
+ ]
41
+
42
+ [tool.ruff]
43
+ # Same as Black.
44
+ line-length = 88
45
+ indent-width = 4
46
+
47
+ # Assume Python 3.11
48
+ target-version = "py38"
49
+
50
+ # Exclude a variety of commonly ignored directories.
51
+ lint.exclude = [
52
+ ".bzr",
53
+ ".direnv",
54
+ ".eggs",
55
+ ".git",
56
+ ".git-rewrite",
57
+ ".hg",
58
+ ".ipynb_checkpoints",
59
+ ".mypy_cache",
60
+ ".nox",
61
+ ".pants.d",
62
+ ".pyenv",
63
+ ".pytest_cache",
64
+ ".pytype",
65
+ ".ruff_cache",
66
+ ".svn",
67
+ ".tox",
68
+ ".venv",
69
+ ".vscode",
70
+ "__pypackages__",
71
+ "_build",
72
+ "buck-out",
73
+ "build",
74
+ "dist",
75
+ "node_modules",
76
+ "site-packages",
77
+ "venv",
78
+ ]
79
+
80
+ lint.select = [
81
+ "E", # pycodestyle errors
82
+ "W", # pycodestyle warnings
83
+ "F", # pyflakes
84
+ "I", # isort
85
+ "C", # flake8-comprehensions
86
+ "B", # flake8-bugbear
87
+ "UP", # pyupgrade
88
+ ]
89
+ lint.ignore = [
90
+ "B008", # do not perform function calls in argument defaults
91
+ "C901", # too complex
92
+ "W191", # indentation contains tabs
93
+ "E711", # `None` should be `cond is None`
94
+ ]
95
+
96
+ [tool.ruff.format]
97
+ # Like Black, use double quotes for strings.
98
+ quote-style = "double"
99
+
100
+ # Like Black, indent with spaces, rather than tabs.
101
+ indent-style = "space"
102
+
103
+ # Like Black, respect magic trailing commas.
104
+ skip-magic-trailing-comma = false
105
+
106
+ # Like Black, automatically detect the appropriate line ending.
107
+ line-ending = "auto"
108
+
109
+ # Enable auto-formatting of code examples in docstrings. Markdown,
110
+ # reStructuredText code/literal blocks and doctests are all supported.
111
+ #
112
+ # This is currently disabled by default, but it is planned for this
113
+ # to be opt-out in the future.
114
+ docstring-code-format = false
115
+
116
+ # Set the line length limit used when formatting code snippets in
117
+ # docstrings.
118
+ #
119
+ # This only has an effect when the `docstring-code-format` setting is
120
+ # enabled.
121
+ docstring-code-line-length = "dynamic"
122
+
123
+ [tool.ruff.lint.per-file-ignores]
124
+ "__init__.py" = ["F401"]
125
+ "**/{tests,docs,tools}/*" = ["E402"]
126
+
127
+ [tool.ruff.lint.pyupgrade]
128
+ # Preserve types, even if a file imports `from __future__ import annotations`.
129
+ keep-runtime-typing = true
130
+
131
+ [build-system]
132
+ requires = ["poetry-core"]
133
+ build-backend = "poetry.core.masonry.api"
134
+
135
+ [[tool.poetry.source]]
136
+ name = "pytorch-cpu"
137
+ url = "https://download.pytorch.org/whl/cpu"
138
+ priority = "supplemental"
tests/__init__.py ADDED
File without changes
tests/test_langdetect.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import pytest
4
+ import torch
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
6
+
7
+ from mappingservice.utils import predict_language
8
+
9
+
10
+ @pytest.fixture
11
+ def classifier():
12
+ model_path = "papluca/xlm-roberta-base-language-detection"
13
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
14
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
15
+ classification = pipeline(
16
+ "text-classification",
17
+ model=model,
18
+ tokenizer=tokenizer,
19
+ framework="pt",
20
+ device=0 if torch.cuda.is_available() else -1,
21
+ )
22
+
23
+ return classification
24
+
25
+
26
+ def test_model_predictions(classifier):
27
+ test_data = [
28
+ {'input': 'Habitacion estandar con bano', 'expected_response': 'es'},
29
+ {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501
30
+ {'input': 'casa ejecutiva', 'expected_response': 'es'},
31
+ {'input': 'villa doble', 'expected_response': 'es'},
32
+ {'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'},
33
+ {'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'},
34
+ {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501
35
+ {'input': 'estudio familiar grande', 'expected_response': 'es'},
36
+ {'input': 'suite familiar junior', 'expected_response': 'en'},
37
+ {'input': 'bungalow tradicional sin bano', 'expected_response': 'es'},
38
+ {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501
39
+ {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501
40
+ {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
41
+ {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
42
+ {'input': 'premier palace double room', 'expected_response': 'en'},
43
+ {'input': 'double single use deluxe', 'expected_response': 'en'},
44
+ {'input': 'double room queen bed superior', 'expected_response': 'en'},
45
+ {'input': 'double guest room', 'expected_response': 'en'},
46
+ {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
47
+ {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
48
+ {'input': 'superior quadruple room', 'expected_response': 'en'},
49
+ {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
50
+ {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
51
+ {'input': 'premier palace double room', 'expected_response': 'en'},
52
+ {'input': 'double single use deluxe', 'expected_response': 'en'},
53
+ {'input': 'double room queen bed superior', 'expected_response': 'en'},
54
+ {'input': 'double guest room', 'expected_response': 'en'},
55
+ {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
56
+ {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
57
+ {'input': 'superior quadruple room', 'expected_response': 'en'},
58
+ {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
59
+ {'input': 'comfort double', 'expected_response': 'en'},
60
+ {'input': '1 king bed suite nonsmoking', 'expected_response': 'en'},
61
+ {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'},
62
+ {'input': 'family room superior', 'expected_response': 'en'}
63
+ ]
64
+
65
+ for test_case in test_data:
66
+ description = test_case["input"]
67
+ expected_label = test_case["expected_response"]
68
+ # First, try to predict based on keywords
69
+ predicted_label = predict_language(description)
70
+
71
+ # If no prediction was made, fallback to model prediction
72
+ if not predicted_label:
73
+ print(f"Fallback to model prediction for '{description}'")
74
+ result = classifier(description)
75
+ predicted_label = result[0]["label"]
76
+
77
+ assert (
78
+ predicted_label == expected_label
79
+ ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501
tests/test_roomcategory.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import pytest
4
+ import torch
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
6
+
7
+
8
+ @pytest.fixture
9
+ def classifier():
10
+ model_path = "travelgate/room_category-classifier"
11
+ try:
12
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
13
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
14
+ classification = pipeline(
15
+ "text-classification",
16
+ model=model,
17
+ tokenizer=tokenizer,
18
+ framework="pt",
19
+ device=0 if torch.cuda.is_available() else -1,
20
+ )
21
+ print("Modelo y tokenizer cargados exitosamente.")
22
+ except Exception as e:
23
+ print(f"Error al cargar el modelo o tokenizer: {e}")
24
+ raise e
25
+
26
+ return classification
27
+
28
+
29
+ def test_model_predictions(classifier):
30
+ test_data = [
31
+ {'input': 'comfort double', 'expected_response': 'comfort'},
32
+ {'input': 'full size bed 1 big bed', 'expected_response': 'default'},
33
+ {'input': '1 king bed suite nonsmoking', 'expected_response': 'default'},
34
+ {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'junior'},
35
+ {'input': 'superior sea view', 'expected_response': 'superior'},
36
+ {'input': 'family room superior', 'expected_response': 'superior'},
37
+ {'input': '1 king standard', 'expected_response': 'standard'},
38
+ {'input': 'superior king balcon vista al mar', 'expected_response': 'superior'},
39
+ {'input': 'standard double room king bed accessible', 'expected_response': 'standard'}, # noqa: E501
40
+ {'input': 'sagamore garden view suite standard', 'expected_response': 'standard'}, # noqa: E501
41
+ {'input': 'standard 1 king bed courtyard view non smoking', 'expected_response': 'standard'}, # noqa: E501
42
+ {'input': 'suite double for 2 adults 0 children and 0 infants', 'expected_response': 'default'}, # noqa: E501
43
+ {'input': 'one bedroom superior king suite with ocean view non smoking', 'expected_response': 'superior'}, # noqa: E501
44
+ {'input': 'omnia deluxe triple for 2 adults 0 children and 0 infants', 'expected_response': 'deluxe'}, # noqa: E501
45
+ {'input': 'king premium room incl evening tasting welcome gift comp wifi 27 30 sqm espresso fridge bathrobe', 'expected_response': 'premium'}, # noqa: E501
46
+ {'input': 'standard twin double room single use', 'expected_response': 'standard'}, # noqa: E501
47
+ {'input': 'habitacion 2 camas deluxe', 'expected_response': 'deluxe'},
48
+ {'input': 'standard 2 double beds courtyard view non smoking', 'expected_response': 'standard'}, # noqa: E501
49
+ {'input': 'habitacion estandar con bano', 'expected_response': 'standard'},
50
+ {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'superior'}, # noqa: E501
51
+ {'input': 'habitacion estandar matrimonial adaptada discapacitados', 'expected_response': 'standard'}, # noqa: E501
52
+ {'input': 'privilege twin for 2 adults 0 children and 0 infants', 'expected_response': 'privilege'}, # noqa: E501
53
+ {'input': 'classic single for 1 adults 0 children and 0 infants', 'expected_response': 'classic'}, # noqa: E501
54
+ {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'deluxe'}, # noqa: E501
55
+ {'input': 'gallery two queens', 'expected_response': 'default'},
56
+ {'input': 'premier palace double room', 'expected_response': 'premier'},
57
+ {'input': 'double single use deluxe', 'expected_response': 'deluxe'},
58
+ {'input': 'double room queen bed superior', 'expected_response': 'superior'},
59
+ {'input': 'double guest standard room', 'expected_response': 'standard'},
60
+ {'input': '2 queen beds disney view non smoking', 'expected_response': 'default'}, # noqa: E501
61
+ {'input': 'standard single room for 1 adults 0 children and 0 infants', 'expected_response': 'standard'}, # noqa: E501
62
+ {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'premium'}, # noqa: E501
63
+ {'input': 'superior quadruple room', 'expected_response': 'superior'},
64
+ {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'superior'} # noqa: E501
65
+ ]
66
+
67
+ for test_case in test_data:
68
+ description = test_case["input"]
69
+ expected_category = test_case["expected_response"]
70
+ result = classifier(description)[0]
71
+ predicted_category = result["label"]
72
+
73
+ assert (
74
+ predicted_category == expected_category
75
+ ), f"Incorrect prediction for '{description}': expected '{expected_category}', obtained '{predicted_category}'" # noqa: E501
tests/test_roomenvironment.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import pytest
4
+ import torch
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
6
+
7
+
8
+ @pytest.fixture
9
+ def classifier():
10
+ model_path = "travelgate/room_environment-classifier"
11
+ try:
12
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
13
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
14
+ classification = pipeline(
15
+ "text-classification",
16
+ model=model,
17
+ tokenizer=tokenizer,
18
+ framework="pt",
19
+ device=0 if torch.cuda.is_available() else -1,
20
+ )
21
+ print("Modelo y tokenizer cargados exitosamente.")
22
+ except Exception as e:
23
+ print(f"Error al cargar el modelo o tokenizer: {e}")
24
+ raise e
25
+
26
+ return classification
27
+
28
+
29
+ def test_model_predictions(classifier):
30
+ test_data = [
31
+ {'input': 'comfort double with shared bed', 'expected_response': 'shared bed'},
32
+ {'input': 'habitacion con banno compartido', 'expected_response': 'shared bathroom'}, # noqa: E501
33
+ {'input': 'habitacion con bano compartido', 'expected_response': 'shared bathroom'}, # noqa: E501
34
+ {'input': 'full size bed 1 big bed with shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
35
+ {'input': '1 king bed suite nonsmoking shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
36
+ {'input': 'junior suite 1 king bed nonsmoking shared bed', 'expected_response': 'shared bed'}, # noqa: E501
37
+ {'input': 'superior sea view connected room', 'expected_response': 'connected'},
38
+ {'input': 'twin room with two shared beds', 'expected_response': 'shared bed'},
39
+ {'input': 'double room with connected rooms available', 'expected_response': 'connected'}, # noqa: E501
40
+ {'input': 'two queen beds with shared bathroom access', 'expected_response': 'shared bathroom'}, # noqa: E501
41
+ {'input': 'suite with king bed and connected room option', 'expected_response': 'connected'}, # noqa: E501
42
+ {'input': 'shared dormitory room mixed gender', 'expected_response': 'shared room'}, # noqa: E501
43
+ {'input': 'private room with shared bathroom facilities', 'expected_response': 'shared bathroom'}, # noqa: E501
44
+ {'input': 'family suite with connected rooms', 'expected_response': 'connected'}, # noqa: E501
45
+ {'input': 'bunk bed in mixed shared room', 'expected_response': 'shared room'},
46
+ {'input': 'deluxe queen room with two queen beds shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
47
+ {'input': 'hostel dorm bed shared room', 'expected_response': 'shared room'},
48
+ {'input': 'economy double room with shared bathroom outside the room', 'expected_response': 'shared bathroom'}, # noqa: E501
49
+ {'input': 'luxury suite with sea view and private connected rooms', 'expected_response': 'connected'}, # noqa: E501
50
+ {'input': 'single bed in co-ed shared hostel room', 'expected_response': 'shared room'}, # noqa: E501
51
+ {'input': 'executive suite with optional connecting room', 'expected_response': 'connected'}, # noqa: E501
52
+ {'input': 'standard twin room shared beds', 'expected_response': 'shared bed'},
53
+ {'input': 'cozy single room with access to shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
54
+ {'input': 'spacious family room with interconnecting doors', 'expected_response': 'connected'}, # noqa: E501
55
+ {'input': 'shared female dormitory for 4 guests', 'expected_response': 'shared room'}, # noqa: E501
56
+ {'input': 'luxury double room with shared bed arrangements', 'expected_response': 'shared bed'}, # noqa: E501
57
+ {'input': 'master suite with a shared bedroom balcony and sea view', 'expected_response': 'shared room'}, # noqa: E501
58
+ {'input': 'budget twin room with shared bath', 'expected_response': 'shared bathroom'}, # noqa: E501
59
+ {'input': 'two-bedroom apartment with connecting options', 'expected_response': 'connected'}, # noqa: E501
60
+ {'input': 'en-suite room with shared bathroom', 'expected_response': 'shared bathroom'}, # noqa: E501
61
+ {'input': 'single guest room with shared bed', 'expected_response': 'shared bed'}, # noqa: E501
62
+ {'input': 'shared room in a modern loft city center', 'expected_response': 'shared room'}, # noqa: E501
63
+ {'input': 'double bed in a shared room apartment', 'expected_response': 'shared room'}, # noqa: E501
64
+ {'input': 'studio with shared bath', 'expected_response': 'shared bathroom'},
65
+ {'input': 'penthouse suite connected rooms available upon request', 'expected_response': 'connected'}, # noqa: E501
66
+ {'input': "backpacker's special one bed in a shared room of six", 'expected_response': 'shared room'} # noqa: E501
67
+ ]
68
+
69
+ for test_case in test_data:
70
+ description = test_case["input"]
71
+ expected_environment = test_case["expected_response"]
72
+ result = classifier(description)[0]
73
+ predicted_environment = result["label"]
74
+
75
+ assert (
76
+ predicted_environment == expected_environment
77
+ ), f"Incorrect prediction for '{description}': expected '{expected_environment}', obtained '{predicted_environment}'" # noqa: E501
tests/test_roomtype.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import pytest
4
+ import torch
5
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
6
+
7
+
8
+ @pytest.fixture
9
+ def classifier():
10
+ model_path = "travelgate/room_type-classifier"
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
12
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
13
+ classification = pipeline(
14
+ "text-classification",
15
+ model=model,
16
+ tokenizer=tokenizer,
17
+ framework="pt",
18
+ device=0 if torch.cuda.is_available() else -1,
19
+ )
20
+
21
+ return classification
22
+
23
+
24
+ def test_model_predictions(classifier):
25
+ test_data = [
26
+ {'input': 'Habitacion estandar con bano', 'expected_response': 'room'},
27
+ {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'apartment'}, # noqa: E501
28
+ {'input': 'casa ejecutiva', 'expected_response': 'house'},
29
+ {'input': 'villa doble', 'expected_response': 'villa'},
30
+ {'input': 'estudio de una habitacion de lujo', 'expected_response': 'studio'},
31
+ {'input': 'chalet premier con dos habitaciones', 'expected_response': 'chalet'},
32
+ {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'beach house'}, # noqa: E501
33
+ {'input': 'estudio familiar grande', 'expected_response': 'family studio'},
34
+ {'input': 'suite familiar junior', 'expected_response': 'family suite'},
35
+ {'input': 'bungalow tradicional sin bano', 'expected_response': 'bungalow'},
36
+ {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'room'}, # noqa: E501
37
+ {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'room'}, # noqa: E501
38
+ {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
39
+ {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
40
+ {'input': 'premier palace double room', 'expected_response': 'room'},
41
+ {'input': 'double single use deluxe', 'expected_response': 'default'},
42
+ {'input': 'double room queen bed superior', 'expected_response': 'room'},
43
+ {'input': 'double guest room', 'expected_response': 'room'},
44
+ {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
45
+ {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'room'}, # noqa: E501
46
+ {'input': 'superior quadruple room', 'expected_response': 'room'},
47
+ {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'apartment'}, # noqa: E501
48
+ {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
49
+ {'input': 'premier palace double room', 'expected_response': 'room'},
50
+ {'input': 'double single use deluxe', 'expected_response': 'default'},
51
+ {'input': 'double room queen bed superior', 'expected_response': 'room'},
52
+ {'input': 'double guest room', 'expected_response': 'room'},
53
+ {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'room'}, # noqa: E501
54
+ {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'room'}, # noqa: E501
55
+ {'input': 'superior quadruple room', 'expected_response': 'room'},
56
+ {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'apartment'}, # noqa: E501
57
+ {'input': 'comfort double', 'expected_response': 'default'},
58
+ {'input': '1 king bed suite nonsmoking', 'expected_response': 'suite'},
59
+ {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'suite'},
60
+ {'input': 'family room superior', 'expected_response': 'room'}
61
+ ]
62
+
63
+ for test_case in test_data:
64
+ description = test_case["input"]
65
+ expected_label = test_case["expected_response"]
66
+ result = classifier(description)[0]
67
+ predicted_label = result["label"]
68
+
69
+ assert (
70
+ predicted_label == expected_label
71
+ ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501
tests/test_roomview.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import pytest
3
+ from langdetect import detect_langs
4
+
5
+ import mappingservice.dependencies as deps
6
+ from mappingservice.config import Settings
7
+ from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
8
+ from mappingservice.ms.model_loader import ModelLoader
9
+ from mappingservice.utils import predict_language
10
+
11
+ settings = Settings()
12
+ ml_model = ModelLoader(settings, MODEL_NAMES)
13
+
14
+
15
+ @pytest.fixture
16
+ def classifier():
17
+
18
+ for lang in AVAILABLE_LANGUAGES:
19
+ model_pipeline = ml_model.get_model('room_view', lang)
20
+ try:
21
+ deps.mc['room_view'][lang] = model_pipeline
22
+ except KeyError:
23
+ deps.mc['room_view'] = {}
24
+ deps.mc['room_view'][lang] = model_pipeline
25
+
26
+ def get_model(language):
27
+ return deps.mc['room_view'][language]
28
+
29
+ return get_model
30
+
31
+
32
+ def test_model_predictions(classifier):
33
+ test_data = [
34
+ {'input': 'studio with ruins view', 'expected_response': 'ruins'}, # noqa: E501
35
+ {'input': 'suite with pool view', 'expected_response': 'pool'},
36
+ {'input': 'executive room garden view', 'expected_response': 'garden'},
37
+ {'input': 'studio two bedroom vineyard view 4 guests wine country charm', 'expected_response': 'vineyard'}, # noqa: E501
38
+ {'input': 'superior suite city view', 'expected_response': 'city'},
39
+ {'input': 'room with a balcony and harbour views', 'expected_response': 'harbour'}, # noqa: E501
40
+ {'input': 'junior studio stars views', 'expected_response': 'stars'},
41
+ {'input': 'room park views', 'expected_response': 'park'},
42
+ {'input': 'loft two bedroom and marina view 4 adults coastal luxury', 'expected_response': 'marina'}, # noqa: E501
43
+ {'input': 'house sea view', 'expected_response': 'sea'},
44
+ {'input': 'villa hill views', 'expected_response': 'hill'},
45
+ {'input': 'room park view', 'expected_response': 'park'},
46
+ {'input': 'townhouse one bedroom and park view 2 adults urban charm', 'expected_response': 'park'}, # noqa: E501
47
+ {'input': 'suite garden view', 'expected_response': 'garden'},
48
+ {'input': 'twin room ocean view with balcony', 'expected_response': 'ocean'},
49
+ {'input': 'residencia con vistas al mar', 'expected_response': 'mar'},
50
+ {'input': 'habitacion con vistas a la piscina', 'expected_response': 'piscina'},
51
+ {'input': 'habitacion ejecutiva con vistas al jardin', 'expected_response': 'jardin'}, # noqa: E501
52
+ # {'input': 'habitacion con vistas al oceano', 'expected_response': 'oceano'}, # noqa: E501
53
+ {'input': 'suite superior con vistas a la ciudad', 'expected_response': 'ciudad'}, # noqa: E501
54
+ # {'input': 'habitacion con balcon con vistas a la bahia', 'expected_response': 'bahia'}, # noqa: E501
55
+ {'input': 'estudio junior con vistas a las estrellas', 'expected_response': 'estrellas'}, # noqa: E501
56
+ {'input': 'habitacion con vistas al parque', 'expected_response': 'parque'},
57
+ {'input': 'habitacion con vistas a la ciudad', 'expected_response': 'ciudad'},
58
+ {'input': 'casa con vista al mar', 'expected_response': 'mar'},
59
+ {'input': 'corner suite pool view', 'expected_response': 'pool'},
60
+ {'input': 'estudio con vista al parque', 'expected_response': 'parque'},
61
+ {'input': 'estudio con vista al jardin', 'expected_response': 'jardin'},
62
+ {'input': 'residencia con vista al amazonas', 'expected_response': 'amazonas'},
63
+ {'input': 'habitacion con vista a la montana', 'expected_response': 'montana'}, # noqa: E501
64
+ {'input': 'residencia moderno con vista al park', 'expected_response': 'park'}, # noqa: E501
65
+ {'input': 'dormitorio con vistas al lago', 'expected_response': 'lago'}, # noqa: E501
66
+ # {'input': 'dormitorio con vistas a la bahia', 'expected_response': 'bahia'},
67
+ {'input': 'classic ocean view', 'expected_response': 'ocean'} # noqa: E501
68
+ ]
69
+
70
+ # this test case will always pass untill we solve the mistery with model data
71
+ print("Test data length:", len(test_data))
72
+
73
+ for test_case in test_data:
74
+ language = detect_langs(test_case['input'])
75
+ for item in language:
76
+ if item.lang in AVAILABLE_LANGUAGES:
77
+ language = item.lang
78
+ break
79
+
80
+ if language not in AVAILABLE_LANGUAGES:
81
+ language = predict_language(test_case['input'])
82
+
83
+ cls = classifier(language)
84
+ description = test_case["input"]
85
+ expected_view = test_case["expected_response"]
86
+ processed_results = cls.predict(description)
87
+
88
+ predicted_views = [entity["word"] for entity in processed_results]
89
+ assert expected_view in predicted_views, f"Incorrect prediction for '{description}' using '{language}' language: expected '{expected_view}', obtained '{predicted_views}'" # noqa: E501