Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- .gitignore +151 -175
- app.py +1215 -974
- enhanced_nlp_processor.py +904 -904
- language_detector.py +251 -0
- llm_query_processor.py +384 -351
- logger.py +61 -61
- neo4j_service.py +222 -222
- spell_corrector.py +257 -257
- translation_service.py +1057 -702
.gitignore
CHANGED
|
@@ -1,175 +1,151 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
*.egg
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
#
|
| 32 |
-
*.manifest
|
| 33 |
-
*.spec
|
| 34 |
-
|
| 35 |
-
# Installer logs
|
| 36 |
-
pip-log.txt
|
| 37 |
-
pip-delete-this-directory.txt
|
| 38 |
-
|
| 39 |
-
# Unit test / coverage reports
|
| 40 |
-
htmlcov/
|
| 41 |
-
.tox/
|
| 42 |
-
.nox/
|
| 43 |
-
.coverage
|
| 44 |
-
.coverage.*
|
| 45 |
-
.cache
|
| 46 |
-
nosetests.xml
|
| 47 |
-
coverage.xml
|
| 48 |
-
*.cover
|
| 49 |
-
*.py,cover
|
| 50 |
-
.hypothesis/
|
| 51 |
-
.pytest_cache/
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
*.
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
db.sqlite3
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
#
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
#
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
#
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
#
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
.
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
*.
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
.
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
.dmypy.json
|
| 153 |
-
dmypy.json
|
| 154 |
-
|
| 155 |
-
# Pyre type checker
|
| 156 |
-
.pyre/
|
| 157 |
-
|
| 158 |
-
# pytype static type analyzer
|
| 159 |
-
.pytype/
|
| 160 |
-
|
| 161 |
-
# Cython debug symbols
|
| 162 |
-
cython_debug/
|
| 163 |
-
|
| 164 |
-
# PyCharm
|
| 165 |
-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 166 |
-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 167 |
-
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 168 |
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 169 |
-
#.idea/
|
| 170 |
-
|
| 171 |
-
# Ruff stuff:
|
| 172 |
-
.ruff_cache/
|
| 173 |
-
|
| 174 |
-
# PyPI configuration file
|
| 175 |
-
.pypirc
|
|
|
|
| 1 |
+
# Environment variables
|
| 2 |
+
.env
|
| 3 |
+
.env.local
|
| 4 |
+
.env.*.local
|
| 5 |
+
|
| 6 |
+
# Python
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.py[cod]
|
| 9 |
+
*$py.class
|
| 10 |
+
*.so
|
| 11 |
+
.Python
|
| 12 |
+
build/
|
| 13 |
+
develop-eggs/
|
| 14 |
+
dist/
|
| 15 |
+
downloads/
|
| 16 |
+
eggs/
|
| 17 |
+
.eggs/
|
| 18 |
+
lib/
|
| 19 |
+
lib64/
|
| 20 |
+
parts/
|
| 21 |
+
sdist/
|
| 22 |
+
var/
|
| 23 |
+
wheels/
|
| 24 |
+
pip-wheel-metadata/
|
| 25 |
+
share/python-wheels/
|
| 26 |
+
*.egg-info/
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
MANIFEST
|
| 30 |
+
|
| 31 |
+
# PyInstaller
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
|
| 53 |
+
# Translations
|
| 54 |
+
*.mo
|
| 55 |
+
*.pot
|
| 56 |
+
|
| 57 |
+
# Django stuff:
|
| 58 |
+
*.log
|
| 59 |
+
local_settings.py
|
| 60 |
+
db.sqlite3
|
| 61 |
+
db.sqlite3-journal
|
| 62 |
+
|
| 63 |
+
# Flask stuff:
|
| 64 |
+
instance/
|
| 65 |
+
.webassets-cache
|
| 66 |
+
|
| 67 |
+
# Scrapy stuff:
|
| 68 |
+
.scrapy
|
| 69 |
+
|
| 70 |
+
# Sphinx documentation
|
| 71 |
+
docs/_build/
|
| 72 |
+
|
| 73 |
+
# PyBuilder
|
| 74 |
+
target/
|
| 75 |
+
|
| 76 |
+
# Jupyter Notebook
|
| 77 |
+
.ipynb_checkpoints
|
| 78 |
+
|
| 79 |
+
# IPython
|
| 80 |
+
profile_default/
|
| 81 |
+
ipython_config.py
|
| 82 |
+
|
| 83 |
+
# pyenv
|
| 84 |
+
.python-version
|
| 85 |
+
|
| 86 |
+
# pipenv
|
| 87 |
+
Pipfile.lock
|
| 88 |
+
|
| 89 |
+
# PEP 582
|
| 90 |
+
__pypackages__/
|
| 91 |
+
|
| 92 |
+
# Celery stuff
|
| 93 |
+
celerybeat-schedule
|
| 94 |
+
celerybeat.pid
|
| 95 |
+
|
| 96 |
+
# SageMath parsed files
|
| 97 |
+
*.sage.py
|
| 98 |
+
|
| 99 |
+
# Environments
|
| 100 |
+
.venv
|
| 101 |
+
env/
|
| 102 |
+
venv/
|
| 103 |
+
ENV/
|
| 104 |
+
env.bak/
|
| 105 |
+
venv.bak/
|
| 106 |
+
|
| 107 |
+
# Spyder project settings
|
| 108 |
+
.spyderproject
|
| 109 |
+
.spyproject
|
| 110 |
+
|
| 111 |
+
# Rope project settings
|
| 112 |
+
.ropeproject
|
| 113 |
+
|
| 114 |
+
# mkdocs documentation
|
| 115 |
+
/site
|
| 116 |
+
|
| 117 |
+
# mypy
|
| 118 |
+
.mypy_cache/
|
| 119 |
+
.dmypy.json
|
| 120 |
+
dmypy.json
|
| 121 |
+
|
| 122 |
+
# Pyre type checker
|
| 123 |
+
.pyre/
|
| 124 |
+
|
| 125 |
+
# IDE
|
| 126 |
+
.vscode/
|
| 127 |
+
.idea/
|
| 128 |
+
*.swp
|
| 129 |
+
*.swo
|
| 130 |
+
*~
|
| 131 |
+
|
| 132 |
+
# OS
|
| 133 |
+
.DS_Store
|
| 134 |
+
.DS_Store?
|
| 135 |
+
._*
|
| 136 |
+
.Spotlight-V100
|
| 137 |
+
.Trashes
|
| 138 |
+
ehthumbs.db
|
| 139 |
+
Thumbs.db
|
| 140 |
+
|
| 141 |
+
# Logs
|
| 142 |
+
logs/
|
| 143 |
+
*.log
|
| 144 |
+
|
| 145 |
+
# Database
|
| 146 |
+
*.db
|
| 147 |
+
*.sqlite
|
| 148 |
+
|
| 149 |
+
# Temporary files
|
| 150 |
+
*.tmp
|
| 151 |
+
*.temp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,974 +1,1215 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Main Flask Application for Transport Query System
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
from flask import Flask, render_template, request, jsonify, session
|
| 7 |
-
import os
|
| 8 |
-
from llm_query_processor import LLMQueryProcessor
|
| 9 |
-
from enhanced_nlp_processor import EnhancedNLPProcessor
|
| 10 |
-
from spell_corrector import SpellCorrector
|
| 11 |
-
from neo4j_service import Neo4jService
|
| 12 |
-
from translation_service import TranslationService
|
| 13 |
-
from logger import get_logger
|
| 14 |
-
from config import Config
|
| 15 |
-
|
| 16 |
-
app = Flask(__name__)
|
| 17 |
-
app.config.from_object(Config)
|
| 18 |
-
logger = get_logger("FlaskApp")
|
| 19 |
-
|
| 20 |
-
# Initialize services
|
| 21 |
-
query_processor = LLMQueryProcessor()
|
| 22 |
-
enhanced_nlp_processor = EnhancedNLPProcessor()
|
| 23 |
-
spell_corrector = SpellCorrector()
|
| 24 |
-
neo4j_service = Neo4jService()
|
| 25 |
-
translation_service = TranslationService()
|
| 26 |
-
|
| 27 |
-
@app.route('/')
|
| 28 |
-
def index():
|
| 29 |
-
"""Main page"""
|
| 30 |
-
return render_template('index.html')
|
| 31 |
-
|
| 32 |
-
@app.route('/api/query', methods=['POST'])
|
| 33 |
-
def process_query():
|
| 34 |
-
"""Process user query with enhanced NLP and translation support"""
|
| 35 |
-
try:
|
| 36 |
-
data = request.get_json()
|
| 37 |
-
user_query = data.get('query', '').strip()
|
| 38 |
-
use_enhanced_nlp = data.get('enhanced_nlp', True) # Default to enhanced NLP
|
| 39 |
-
|
| 40 |
-
if not user_query:
|
| 41 |
-
return jsonify({
|
| 42 |
-
'success': False,
|
| 43 |
-
'message': 'Please enter a query.'
|
| 44 |
-
})
|
| 45 |
-
|
| 46 |
-
#
|
| 47 |
-
translation_info = translation_service.translate_query(user_query)
|
| 48 |
-
|
| 49 |
-
# Use translated query for processing
|
| 50 |
-
query_to_process = translation_info['translated_query']
|
| 51 |
-
|
| 52 |
-
# Log translation info to console
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
result
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
'
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
if not text:
|
| 181 |
-
return jsonify({
|
| 182 |
-
'success': False,
|
| 183 |
-
'message': 'Please provide text to
|
| 184 |
-
})
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
'
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
'
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
{
|
| 211 |
-
'
|
| 212 |
-
'
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
'
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
'
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
'
|
| 249 |
-
'
|
| 250 |
-
'
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
]
|
| 255 |
-
}
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
'
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
#
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
'
|
| 290 |
-
'
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
'
|
| 302 |
-
'
|
| 303 |
-
'
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
'
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
}
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
'
|
| 402 |
-
'
|
| 403 |
-
'
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
'
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
'
|
| 442 |
-
'
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
]
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
'
|
| 461 |
-
'
|
| 462 |
-
'
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
'
|
| 467 |
-
|
| 468 |
-
'
|
| 469 |
-
'
|
| 470 |
-
'
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
'
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
{
|
| 491 |
-
'
|
| 492 |
-
'
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
{
|
| 550 |
-
'category': '
|
| 551 |
-
'
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
'
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
'query': '
|
| 582 |
-
'description': '
|
| 583 |
-
},
|
| 584 |
-
{
|
| 585 |
-
|
| 586 |
-
'
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
'
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
'query': '
|
| 609 |
-
'description': '
|
| 610 |
-
}
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
'query': '
|
| 647 |
-
'description': '
|
| 648 |
-
},
|
| 649 |
-
{
|
| 650 |
-
|
| 651 |
-
'
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
'
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
'query': '
|
| 674 |
-
'description': '
|
| 675 |
-
}
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
'
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
'
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
'
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
|
| 947 |
-
|
| 948 |
-
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Main Flask Application for Transport Query System
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from flask import Flask, render_template, request, jsonify, session
|
| 7 |
+
import os
|
| 8 |
+
from llm_query_processor import LLMQueryProcessor
|
| 9 |
+
from enhanced_nlp_processor import EnhancedNLPProcessor
|
| 10 |
+
from spell_corrector import SpellCorrector
|
| 11 |
+
from neo4j_service import Neo4jService
|
| 12 |
+
from translation_service import TranslationService
|
| 13 |
+
from logger import get_logger
|
| 14 |
+
from config import Config
|
| 15 |
+
|
| 16 |
+
app = Flask(__name__)
|
| 17 |
+
app.config.from_object(Config)
|
| 18 |
+
logger = get_logger("FlaskApp")
|
| 19 |
+
|
| 20 |
+
# Initialize services
|
| 21 |
+
query_processor = LLMQueryProcessor()
|
| 22 |
+
enhanced_nlp_processor = EnhancedNLPProcessor()
|
| 23 |
+
spell_corrector = SpellCorrector()
|
| 24 |
+
neo4j_service = Neo4jService()
|
| 25 |
+
translation_service = TranslationService()
|
| 26 |
+
|
| 27 |
+
@app.route('/')
|
| 28 |
+
def index():
|
| 29 |
+
"""Main page"""
|
| 30 |
+
return render_template('index.html')
|
| 31 |
+
|
| 32 |
+
@app.route('/api/query', methods=['POST'])
|
| 33 |
+
def process_query():
|
| 34 |
+
"""Process user query with enhanced NLP and translation support"""
|
| 35 |
+
try:
|
| 36 |
+
data = request.get_json()
|
| 37 |
+
user_query = data.get('query', '').strip()
|
| 38 |
+
use_enhanced_nlp = data.get('enhanced_nlp', True) # Default to enhanced NLP
|
| 39 |
+
|
| 40 |
+
if not user_query:
|
| 41 |
+
return jsonify({
|
| 42 |
+
'success': False,
|
| 43 |
+
'message': 'Please enter a query.'
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
# Auto-detect language and translate if needed
|
| 47 |
+
translation_info = translation_service.translate_query(user_query)
|
| 48 |
+
|
| 49 |
+
# Use translated query for processing
|
| 50 |
+
query_to_process = translation_info['translated_query']
|
| 51 |
+
|
| 52 |
+
# Log translation info to console
|
| 53 |
+
detected_lang = translation_info.get('detected_language', 'english')
|
| 54 |
+
if detected_lang != 'english':
|
| 55 |
+
logger.info(f"Translation: {detected_lang}->en method={translation_info['translation_method']} original='{translation_info['original_query']}' translated='{translation_info['translated_query']}'")
|
| 56 |
+
else:
|
| 57 |
+
logger.info(f"Processing English Query: '{user_query}'")
|
| 58 |
+
|
| 59 |
+
# Process the query with enhanced NLP or fallback to basic processor
|
| 60 |
+
if use_enhanced_nlp:
|
| 61 |
+
result = enhanced_nlp_processor.process_query(query_to_process)
|
| 62 |
+
else:
|
| 63 |
+
result = query_processor.process_query(query_to_process)
|
| 64 |
+
|
| 65 |
+
# If original query was not in English, translate the response back
|
| 66 |
+
detected_lang = translation_info.get('detected_language', 'english')
|
| 67 |
+
if detected_lang != 'english':
|
| 68 |
+
print(f" English Response: {result.get('message', 'No message')}")
|
| 69 |
+
result = translation_service.translate_response(result, detected_lang)
|
| 70 |
+
result['translation_info'] = translation_info
|
| 71 |
+
print(f" {detected_lang.title()} Response: {result.get('message', 'No message')}")
|
| 72 |
+
print(f" Translation Complete ✅")
|
| 73 |
+
|
| 74 |
+
logger.info(f"Response success={result.get('success')} type={result.get('query_type','n/a')} message='{result.get('message','')[:120]}'")
|
| 75 |
+
return jsonify(result)
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
return jsonify({
|
| 79 |
+
'success': False,
|
| 80 |
+
'message': f'Error processing query: {str(e)}'
|
| 81 |
+
})
|
| 82 |
+
|
| 83 |
+
@app.route('/api/suggestions', methods=['POST'])
|
| 84 |
+
def get_suggestions():
|
| 85 |
+
"""Get location suggestions for autocomplete"""
|
| 86 |
+
try:
|
| 87 |
+
data = request.get_json()
|
| 88 |
+
partial_location = data.get('location', '').strip()
|
| 89 |
+
|
| 90 |
+
if not partial_location:
|
| 91 |
+
return jsonify({'suggestions': []})
|
| 92 |
+
|
| 93 |
+
suggestions = spell_corrector.get_suggestions(partial_location)
|
| 94 |
+
|
| 95 |
+
return jsonify({
|
| 96 |
+
'suggestions': [{'name': name, 'confidence': conf} for name, conf in suggestions]
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
except Exception as e:
|
| 100 |
+
return jsonify({
|
| 101 |
+
'success': False,
|
| 102 |
+
'message': f'Error getting suggestions: {str(e)}'
|
| 103 |
+
})
|
| 104 |
+
|
| 105 |
+
@app.route('/api/status')
|
| 106 |
+
def get_status():
|
| 107 |
+
"""Get system status"""
|
| 108 |
+
try:
|
| 109 |
+
neo4j_connected = neo4j_service.is_connected()
|
| 110 |
+
places = neo4j_service.get_all_places() if neo4j_connected else []
|
| 111 |
+
stats = neo4j_service.get_route_statistics() if neo4j_connected else {}
|
| 112 |
+
|
| 113 |
+
return jsonify({
|
| 114 |
+
'neo4j_connected': neo4j_connected,
|
| 115 |
+
'total_places': len(places),
|
| 116 |
+
'statistics': stats
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
return jsonify({
|
| 121 |
+
'success': False,
|
| 122 |
+
'message': f'Error getting status: {str(e)}'
|
| 123 |
+
})
|
| 124 |
+
|
| 125 |
+
@app.route('/api/places')
|
| 126 |
+
def get_places():
|
| 127 |
+
"""Get all available places"""
|
| 128 |
+
try:
|
| 129 |
+
places = neo4j_service.get_all_places()
|
| 130 |
+
return jsonify({
|
| 131 |
+
'success': True,
|
| 132 |
+
'places': places
|
| 133 |
+
})
|
| 134 |
+
|
| 135 |
+
except Exception as e:
|
| 136 |
+
return jsonify({
|
| 137 |
+
'success': False,
|
| 138 |
+
'message': f'Error getting places: {str(e)}'
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
@app.route('/api/sinhala/examples')
|
| 142 |
+
def get_sinhala_examples():
|
| 143 |
+
"""Get example queries in Sinhala"""
|
| 144 |
+
try:
|
| 145 |
+
sinhala_examples = translation_service.get_sinhala_examples()
|
| 146 |
+
return jsonify({
|
| 147 |
+
'success': True,
|
| 148 |
+
'examples': sinhala_examples
|
| 149 |
+
})
|
| 150 |
+
|
| 151 |
+
except Exception as e:
|
| 152 |
+
return jsonify({
|
| 153 |
+
'success': False,
|
| 154 |
+
'message': f'Error getting Sinhala examples: {str(e)}'
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
@app.route('/api/tamil/examples')
|
| 158 |
+
def get_tamil_examples():
|
| 159 |
+
"""Get example queries in Tamil"""
|
| 160 |
+
try:
|
| 161 |
+
tamil_examples = translation_service.get_tamil_examples()
|
| 162 |
+
return jsonify({
|
| 163 |
+
'success': True,
|
| 164 |
+
'examples': tamil_examples
|
| 165 |
+
})
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
return jsonify({
|
| 169 |
+
'success': False,
|
| 170 |
+
'message': f'Error getting Tamil examples: {str(e)}'
|
| 171 |
+
})
|
| 172 |
+
|
| 173 |
+
@app.route('/api/language/detect', methods=['POST'])
|
| 174 |
+
def detect_language():
|
| 175 |
+
"""Detect the language of input text"""
|
| 176 |
+
try:
|
| 177 |
+
data = request.get_json()
|
| 178 |
+
text = data.get('text', '').strip()
|
| 179 |
+
|
| 180 |
+
if not text:
|
| 181 |
+
return jsonify({
|
| 182 |
+
'success': False,
|
| 183 |
+
'message': 'Please provide text to detect language.'
|
| 184 |
+
})
|
| 185 |
+
|
| 186 |
+
detection_result = translation_service.language_detector.detect_language(text)
|
| 187 |
+
|
| 188 |
+
return jsonify({
|
| 189 |
+
'success': True,
|
| 190 |
+
'detection_result': detection_result
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
+
except Exception as e:
|
| 194 |
+
return jsonify({
|
| 195 |
+
'success': False,
|
| 196 |
+
'message': f'Error detecting language: {str(e)}'
|
| 197 |
+
})
|
| 198 |
+
|
| 199 |
+
@app.route('/api/translation/test')
|
| 200 |
+
def test_translation():
|
| 201 |
+
"""Test translation functionality"""
|
| 202 |
+
try:
|
| 203 |
+
test_results = translation_service.test_translation()
|
| 204 |
+
return jsonify({
|
| 205 |
+
'success': True,
|
| 206 |
+
'test_results': test_results
|
| 207 |
+
})
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
return jsonify({
|
| 211 |
+
'success': False,
|
| 212 |
+
'message': f'Error testing translation: {str(e)}'
|
| 213 |
+
})
|
| 214 |
+
|
| 215 |
+
@app.route('/api/translation/translate', methods=['POST'])
|
| 216 |
+
def translate_text():
|
| 217 |
+
"""Translate text between supported languages (Sinhala, Tamil, Singlish, English)"""
|
| 218 |
+
try:
|
| 219 |
+
data = request.get_json()
|
| 220 |
+
text = data.get('text', '').strip()
|
| 221 |
+
target_lang = data.get('target_lang', 'en') # 'en', 'si', 'ta'
|
| 222 |
+
source_lang = data.get('source_lang', 'auto')
|
| 223 |
+
|
| 224 |
+
if not text:
|
| 225 |
+
return jsonify({
|
| 226 |
+
'success': False,
|
| 227 |
+
'message': 'Please provide text to translate.'
|
| 228 |
+
})
|
| 229 |
+
|
| 230 |
+
# Detect source language if auto
|
| 231 |
+
detection_result = translation_service.language_detector.detect_language(text)
|
| 232 |
+
detected_language = detection_result['language']
|
| 233 |
+
|
| 234 |
+
# Map detected language to language code
|
| 235 |
+
if detected_language == 'sinhala':
|
| 236 |
+
detected_lang_code = 'si'
|
| 237 |
+
elif detected_language == 'tamil':
|
| 238 |
+
detected_lang_code = 'ta'
|
| 239 |
+
elif detected_language == 'singlish':
|
| 240 |
+
detected_lang_code = 'si' # Treat Singlish as Sinhala for translation
|
| 241 |
+
else:
|
| 242 |
+
detected_lang_code = 'en'
|
| 243 |
+
|
| 244 |
+
translated_text = translation_service.translate_text(text, target_lang, source_lang)
|
| 245 |
+
|
| 246 |
+
return jsonify({
|
| 247 |
+
'success': True,
|
| 248 |
+
'original_text': text,
|
| 249 |
+
'translated_text': translated_text,
|
| 250 |
+
'detected_language': detected_language,
|
| 251 |
+
'source_language': detected_lang_code,
|
| 252 |
+
'target_language': target_lang,
|
| 253 |
+
'translation_method': translation_service.last_translation_method or 'dictionary',
|
| 254 |
+
'detection_confidence': detection_result['confidence']
|
| 255 |
+
})
|
| 256 |
+
|
| 257 |
+
except Exception as e:
|
| 258 |
+
return jsonify({
|
| 259 |
+
'success': False,
|
| 260 |
+
'message': f'Error translating text: {str(e)}'
|
| 261 |
+
})
|
| 262 |
+
|
| 263 |
+
@app.route('/api/nlp/capabilities')
|
| 264 |
+
def get_nlp_capabilities():
|
| 265 |
+
"""Get information about natural language processing capabilities with live examples"""
|
| 266 |
+
|
| 267 |
+
# Test queries for each type to demonstrate actual results
|
| 268 |
+
test_queries = [
|
| 269 |
+
{
|
| 270 |
+
'type': 'fare_inquiry',
|
| 271 |
+
'description': 'Find fare between two specific locations',
|
| 272 |
+
'examples': [
|
| 273 |
+
'What is the fare from Colombo to Kandy?',
|
| 274 |
+
'fare of anuradhapura to kandy',
|
| 275 |
+
'price from panadura to galle',
|
| 276 |
+
'Colombo to Kandy fare'
|
| 277 |
+
]
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
'type': 'comparison',
|
| 281 |
+
'description': 'Compare fares between different routes',
|
| 282 |
+
'examples': [
|
| 283 |
+
'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 284 |
+
'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
|
| 285 |
+
'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
|
| 286 |
+
]
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
'type': 'range_search',
|
| 290 |
+
'description': 'Find routes within specific price ranges',
|
| 291 |
+
'examples': [
|
| 292 |
+
'Find routes under 500 rupees',
|
| 293 |
+
'Show me routes between 200 and 800 rupees',
|
| 294 |
+
'Routes over 1000 rupees'
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
'type': 'recommendation',
|
| 299 |
+
'description': 'Get route recommendations based on criteria',
|
| 300 |
+
'examples': [
|
| 301 |
+
'Recommend cheap routes',
|
| 302 |
+
'Show me popular destinations',
|
| 303 |
+
'What are the best routes from Colombo?'
|
| 304 |
+
]
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
'type': 'route_inquiry',
|
| 308 |
+
'description': 'Find routes from/to specific locations',
|
| 309 |
+
'examples': [
|
| 310 |
+
'Routes from Colombo',
|
| 311 |
+
'Routes to Galle',
|
| 312 |
+
'What routes depart from Kandy?'
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
'type': 'statistics',
|
| 317 |
+
'description': 'Get database overview and statistics',
|
| 318 |
+
'examples': [
|
| 319 |
+
'What is the average fare?',
|
| 320 |
+
'Database statistics',
|
| 321 |
+
'How many routes are there?'
|
| 322 |
+
]
|
| 323 |
+
}
|
| 324 |
+
]
|
| 325 |
+
|
| 326 |
+
# Process each test query to get actual results
|
| 327 |
+
live_examples = []
|
| 328 |
+
for query_type in test_queries:
|
| 329 |
+
type_examples = []
|
| 330 |
+
for example_query in query_type['examples'][:2]: # Test first 2 examples
|
| 331 |
+
try:
|
| 332 |
+
result = enhanced_nlp_processor.process_query(example_query)
|
| 333 |
+
type_examples.append({
|
| 334 |
+
'query': example_query,
|
| 335 |
+
'result': result
|
| 336 |
+
})
|
| 337 |
+
except Exception as e:
|
| 338 |
+
type_examples.append({
|
| 339 |
+
'query': example_query,
|
| 340 |
+
'result': {
|
| 341 |
+
'success': False,
|
| 342 |
+
'message': f'Error: {str(e)}'
|
| 343 |
+
}
|
| 344 |
+
})
|
| 345 |
+
|
| 346 |
+
live_examples.append({
|
| 347 |
+
'type': query_type['type'],
|
| 348 |
+
'description': query_type['description'],
|
| 349 |
+
'examples': type_examples
|
| 350 |
+
})
|
| 351 |
+
|
| 352 |
+
capabilities = {
|
| 353 |
+
'natural_language_processing': {
|
| 354 |
+
'description': 'Advanced NLP for transport queries with enhanced understanding',
|
| 355 |
+
'features': [
|
| 356 |
+
'Multiple query formats (fare, price, cost)',
|
| 357 |
+
'Natural language patterns (from X to Y, X to Y fare, etc.)',
|
| 358 |
+
'Question formats (What is, How much, Show me, etc.)',
|
| 359 |
+
'Compact formats (Colombo to Kandy fare)',
|
| 360 |
+
'Spell correction and fuzzy matching',
|
| 361 |
+
'Automatic location name correction',
|
| 362 |
+
'LLM-powered query interpretation',
|
| 363 |
+
'Fallback keyword-based processing',
|
| 364 |
+
'Advanced intent classification',
|
| 365 |
+
'Entity extraction and normalization',
|
| 366 |
+
'Confidence scoring for query understanding'
|
| 367 |
+
]
|
| 368 |
+
},
|
| 369 |
+
'query_types': test_queries,
|
| 370 |
+
'live_examples': live_examples,
|
| 371 |
+
'spell_correction': {
|
| 372 |
+
'description': 'Automatic location name correction',
|
| 373 |
+
'methods': [
|
| 374 |
+
'Direct mapping (exact matches)',
|
| 375 |
+
'Fuzzy matching (similar names)',
|
| 376 |
+
'LLM correction (AI-powered)',
|
| 377 |
+
'Partial matching (substring matching)'
|
| 378 |
+
],
|
| 379 |
+
'examples': [
|
| 380 |
+
'panadra → Panadura',
|
| 381 |
+
'gale → Galle',
|
| 382 |
+
'colmbo → Colombo',
|
| 383 |
+
'kandee → Kandy'
|
| 384 |
+
]
|
| 385 |
+
},
|
| 386 |
+
'llm_integration': {
|
| 387 |
+
'description': 'AI-powered query interpretation with LLM Cypher generation',
|
| 388 |
+
'features': [
|
| 389 |
+
'Automatic query type detection',
|
| 390 |
+
'LLM-powered Cypher query generation',
|
| 391 |
+
'Natural language understanding',
|
| 392 |
+
'Fallback to keyword-based processing',
|
| 393 |
+
'Advanced entity extraction',
|
| 394 |
+
'Intent classification with confidence scoring',
|
| 395 |
+
'Real-time database querying'
|
| 396 |
+
]
|
| 397 |
+
},
|
| 398 |
+
'enhanced_features': {
|
| 399 |
+
'description': 'Advanced NLP capabilities',
|
| 400 |
+
'features': [
|
| 401 |
+
'Multi-intent query understanding',
|
| 402 |
+
'Context-aware responses',
|
| 403 |
+
'Query preprocessing and normalization',
|
| 404 |
+
'Advanced pattern matching',
|
| 405 |
+
'Confidence-based result ranking',
|
| 406 |
+
'Comprehensive query analysis',
|
| 407 |
+
'Live database results for all query types'
|
| 408 |
+
]
|
| 409 |
+
}
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
return jsonify({
|
| 413 |
+
'success': True,
|
| 414 |
+
'capabilities': capabilities
|
| 415 |
+
})
|
| 416 |
+
|
| 417 |
+
@app.route('/api/nlp/test', methods=['POST'])
|
| 418 |
+
def test_nlp_query():
|
| 419 |
+
"""Test a natural language query and return detailed analysis"""
|
| 420 |
+
try:
|
| 421 |
+
data = request.get_json()
|
| 422 |
+
user_query = data.get('query', '').strip()
|
| 423 |
+
use_enhanced_nlp = data.get('enhanced_nlp', True)
|
| 424 |
+
|
| 425 |
+
if not user_query:
|
| 426 |
+
return jsonify({
|
| 427 |
+
'success': False,
|
| 428 |
+
'message': 'Please provide a query to test.'
|
| 429 |
+
})
|
| 430 |
+
|
| 431 |
+
# Get detailed analysis
|
| 432 |
+
analysis = {
|
| 433 |
+
'original_query': user_query,
|
| 434 |
+
'processing_steps': []
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
# Step 1: Extract locations
|
| 438 |
+
locations = spell_corrector.extract_locations_from_query(user_query)
|
| 439 |
+
analysis['processing_steps'].append({
|
| 440 |
+
'step': 'Location Extraction',
|
| 441 |
+
'locations_found': len(locations),
|
| 442 |
+
'details': [
|
| 443 |
+
{
|
| 444 |
+
'original': loc[0],
|
| 445 |
+
'corrected': loc[1],
|
| 446 |
+
'confidence': loc[2],
|
| 447 |
+
'method': loc[3]
|
| 448 |
+
} for loc in locations
|
| 449 |
+
]
|
| 450 |
+
})
|
| 451 |
+
|
| 452 |
+
# Step 2: Process query with enhanced NLP
|
| 453 |
+
if use_enhanced_nlp:
|
| 454 |
+
result = enhanced_nlp_processor.process_query(user_query)
|
| 455 |
+
analysis['processing_steps'].append({
|
| 456 |
+
'step': 'Enhanced NLP Processing',
|
| 457 |
+
'success': result.get('success', False),
|
| 458 |
+
'query_type': result.get('query_type', 'unknown'),
|
| 459 |
+
'message': result.get('message', ''),
|
| 460 |
+
'confidence': result.get('query_analysis', {}).get('confidence', 0),
|
| 461 |
+
'intent': result.get('query_analysis', {}).get('intent', {}),
|
| 462 |
+
'entities': result.get('query_analysis', {}).get('entities', {})
|
| 463 |
+
})
|
| 464 |
+
else:
|
| 465 |
+
result = query_processor.process_query(user_query)
|
| 466 |
+
analysis['processing_steps'].append({
|
| 467 |
+
'step': 'Basic Query Processing',
|
| 468 |
+
'success': result.get('success', False),
|
| 469 |
+
'query_type': result.get('query_type', 'unknown'),
|
| 470 |
+
'message': result.get('message', ''),
|
| 471 |
+
'cypher_query': result.get('cypher_query', ''),
|
| 472 |
+
'corrections': result.get('corrections', [])
|
| 473 |
+
})
|
| 474 |
+
|
| 475 |
+
# Step 3: Results
|
| 476 |
+
if result.get('success') and result.get('data'):
|
| 477 |
+
analysis['processing_steps'].append({
|
| 478 |
+
'step': 'Database Results',
|
| 479 |
+
'results_count': len(result['data']),
|
| 480 |
+
'sample_results': result['data'][:3] # Show first 3 results
|
| 481 |
+
})
|
| 482 |
+
|
| 483 |
+
return jsonify({
|
| 484 |
+
'success': True,
|
| 485 |
+
'analysis': analysis,
|
| 486 |
+
'result': result
|
| 487 |
+
})
|
| 488 |
+
|
| 489 |
+
except Exception as e:
|
| 490 |
+
return jsonify({
|
| 491 |
+
'success': False,
|
| 492 |
+
'message': f'Error testing NLP query: {str(e)}'
|
| 493 |
+
})
|
| 494 |
+
|
| 495 |
+
@app.route('/api/nlp/demo')
|
| 496 |
+
def get_nlp_demo():
|
| 497 |
+
"""Get a comprehensive demo of natural language capabilities"""
|
| 498 |
+
demo_queries = [
|
| 499 |
+
{
|
| 500 |
+
'category': 'Basic Fare Queries',
|
| 501 |
+
'queries': [
|
| 502 |
+
'What is the fare from Colombo to Kandy?',
|
| 503 |
+
'fare of anuradhapura to kandy',
|
| 504 |
+
'price from panadura to galle',
|
| 505 |
+
'Colombo to Kandy fare'
|
| 506 |
+
]
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
'category': 'Comparison Queries',
|
| 510 |
+
'queries': [
|
| 511 |
+
'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 512 |
+
'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
|
| 513 |
+
'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
|
| 514 |
+
]
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
'category': 'Range Search Queries',
|
| 518 |
+
'queries': [
|
| 519 |
+
'Find routes under 500 rupees',
|
| 520 |
+
'Show me routes between 200 and 800 rupees',
|
| 521 |
+
'Routes over 1000 rupees'
|
| 522 |
+
]
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
'category': 'Recommendation Queries',
|
| 526 |
+
'queries': [
|
| 527 |
+
'Recommend cheap routes',
|
| 528 |
+
'Show me popular destinations',
|
| 529 |
+
'What are the best routes from Colombo?'
|
| 530 |
+
]
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
'category': 'Statistical Queries',
|
| 534 |
+
'queries': [
|
| 535 |
+
'What is the average fare?',
|
| 536 |
+
'Database statistics',
|
| 537 |
+
'How many routes are there?'
|
| 538 |
+
]
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
'category': 'Route Queries',
|
| 542 |
+
'queries': [
|
| 543 |
+
'Show me the cheapest routes',
|
| 544 |
+
'Routes from Colombo',
|
| 545 |
+
'Routes to Galle',
|
| 546 |
+
'What routes depart from Kandy?'
|
| 547 |
+
]
|
| 548 |
+
},
|
| 549 |
+
{
|
| 550 |
+
'category': 'Spell Correction Tests',
|
| 551 |
+
'queries': [
|
| 552 |
+
'price from panadra to gale',
|
| 553 |
+
'fare of colmbo to kandee',
|
| 554 |
+
'cost from anuradapura to kandy'
|
| 555 |
+
]
|
| 556 |
+
}
|
| 557 |
+
]
|
| 558 |
+
|
| 559 |
+
return jsonify({
|
| 560 |
+
'success': True,
|
| 561 |
+
'demo': {
|
| 562 |
+
'title': 'Enhanced Natural Language Transport Query Demo',
|
| 563 |
+
'description': 'Advanced NLP capabilities with comparison, range search, and recommendations',
|
| 564 |
+
'categories': demo_queries
|
| 565 |
+
}
|
| 566 |
+
})
|
| 567 |
+
|
| 568 |
+
@app.route('/api/examples')
|
| 569 |
+
def get_examples():
|
| 570 |
+
"""Get comprehensive example queries showcasing natural language capabilities"""
|
| 571 |
+
examples = [
|
| 572 |
+
# === SINHALA FARE QUERIES ===
|
| 573 |
+
{
|
| 574 |
+
'category': 'Sinhala Fare Queries (සිංහල)',
|
| 575 |
+
'examples': [
|
| 576 |
+
{
|
| 577 |
+
'query': 'කොළඹ සිට මහනුවරට ගාස්තුව කීයද?',
|
| 578 |
+
'description': 'Standard fare query format'
|
| 579 |
+
},
|
| 580 |
+
{
|
| 581 |
+
'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
|
| 582 |
+
'description': 'Alternative way to ask for fare'
|
| 583 |
+
},
|
| 584 |
+
{
|
| 585 |
+
'query': 'අනුරාධපුර සිට මහනුවර දක්වා ගාස්තුව',
|
| 586 |
+
'description': 'Natural language format'
|
| 587 |
+
},
|
| 588 |
+
{
|
| 589 |
+
'query': 'මහනුවර සිට මාතරට ගාස්තුව කීයද?',
|
| 590 |
+
'description': 'Question format'
|
| 591 |
+
}
|
| 592 |
+
]
|
| 593 |
+
},
|
| 594 |
+
|
| 595 |
+
# === TAMIL FARE QUERIES ===
|
| 596 |
+
{
|
| 597 |
+
'category': 'Tamil Fare Queries (தமிழ்)',
|
| 598 |
+
'examples': [
|
| 599 |
+
{
|
| 600 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
|
| 601 |
+
'description': 'Standard Tamil fare query'
|
| 602 |
+
},
|
| 603 |
+
{
|
| 604 |
+
'query': 'ம��த்தறை இருந்து காலி வரை விலை எவ்வளவு?',
|
| 605 |
+
'description': 'Alternative Tamil fare query'
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
'query': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம்',
|
| 609 |
+
'description': 'Tamil natural language format'
|
| 610 |
+
},
|
| 611 |
+
{
|
| 612 |
+
'query': 'பனதுரை இருந்து காலி வரை பேருந்து கட்டணம் எவ்வளவு?',
|
| 613 |
+
'description': 'Tamil question format'
|
| 614 |
+
}
|
| 615 |
+
]
|
| 616 |
+
},
|
| 617 |
+
|
| 618 |
+
# === SINGLISH FARE QUERIES ===
|
| 619 |
+
{
|
| 620 |
+
'category': 'Singlish Fare Queries (Mixed)',
|
| 621 |
+
'examples': [
|
| 622 |
+
{
|
| 623 |
+
'query': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
|
| 624 |
+
'description': 'Sinhala-English mixed query'
|
| 625 |
+
},
|
| 626 |
+
{
|
| 627 |
+
'query': 'Colombo සිට ගාල්ලට bus fare කීයද?',
|
| 628 |
+
'description': 'English-Sinhala mixed query'
|
| 629 |
+
},
|
| 630 |
+
{
|
| 631 |
+
'query': 'කොළඹ සිට Panadura දක්වා price කීයද?',
|
| 632 |
+
'description': 'Mixed language with English terms'
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
'query': 'Galle සිට මාතරට ticket cost කීයද?',
|
| 636 |
+
'description': 'Mixed language fare query'
|
| 637 |
+
}
|
| 638 |
+
]
|
| 639 |
+
},
|
| 640 |
+
|
| 641 |
+
# === ENGLISH FARE QUERIES ===
|
| 642 |
+
{
|
| 643 |
+
'category': 'English Fare Queries',
|
| 644 |
+
'examples': [
|
| 645 |
+
{
|
| 646 |
+
'query': 'What is the fare from Colombo to Kandy?',
|
| 647 |
+
'description': 'Standard English fare query'
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
'query': 'How much is the bus fare from Panadura to Galle?',
|
| 651 |
+
'description': 'English question format'
|
| 652 |
+
},
|
| 653 |
+
{
|
| 654 |
+
'query': 'Price from Anuradhapura to Kandy',
|
| 655 |
+
'description': 'Compact English format'
|
| 656 |
+
},
|
| 657 |
+
{
|
| 658 |
+
'query': 'Show me the cost from Matara to Colombo',
|
| 659 |
+
'description': 'English request format'
|
| 660 |
+
}
|
| 661 |
+
]
|
| 662 |
+
},
|
| 663 |
+
|
| 664 |
+
# === COMPARISON QUERIES ===
|
| 665 |
+
{
|
| 666 |
+
'category': 'Sinhala Comparison Queries (සිංහල)',
|
| 667 |
+
'examples': [
|
| 668 |
+
{
|
| 669 |
+
'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සංසන්දනය කරන්න.',
|
| 670 |
+
'description': 'Compare two different routes'
|
| 671 |
+
},
|
| 672 |
+
{
|
| 673 |
+
'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට අනුරාධපුර දක්වා ලාභදායී වන්නේ කුමක්ද?',
|
| 674 |
+
'description': 'Find the cheaper option'
|
| 675 |
+
}
|
| 676 |
+
]
|
| 677 |
+
},
|
| 678 |
+
|
| 679 |
+
{
|
| 680 |
+
'category': 'Tamil Comparison Queries (தமிழ்)',
|
| 681 |
+
'examples': [
|
| 682 |
+
{
|
| 683 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.',
|
| 684 |
+
'description': 'Compare two different routes in Tamil'
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து அனுராதபுரம் வரை கட்டணத்தின் வித்தியாசம் எவ்வளவு?',
|
| 688 |
+
'description': 'Calculate fare difference in Tamil'
|
| 689 |
+
}
|
| 690 |
+
]
|
| 691 |
+
},
|
| 692 |
+
|
| 693 |
+
{
|
| 694 |
+
'category': 'Singlish Comparison Queries (Mixed)',
|
| 695 |
+
'examples': [
|
| 696 |
+
{
|
| 697 |
+
'query': 'කොළඹ සිට Kandy සහ Colombo සිට Galle fares compare කරන්න.',
|
| 698 |
+
'description': 'Mixed language comparison'
|
| 699 |
+
},
|
| 700 |
+
{
|
| 701 |
+
'query': 'Colombo සිට මහනුවර සහ Colombo සිට අනුරාධපුර cheaper කුමක්ද?',
|
| 702 |
+
'description': 'Mixed language cheaper option'
|
| 703 |
+
}
|
| 704 |
+
]
|
| 705 |
+
},
|
| 706 |
+
|
| 707 |
+
# === RANGE SEARCH QUERIES ===
|
| 708 |
+
{
|
| 709 |
+
'category': 'Sinhala Range Queries (සිංහල)',
|
| 710 |
+
'examples': [
|
| 711 |
+
{
|
| 712 |
+
'query': 'රුපියල් 500ට අඩු මාර්ග සොයා ගන්න',
|
| 713 |
+
'description': 'Find affordable routes'
|
| 714 |
+
},
|
| 715 |
+
{
|
| 716 |
+
'query': 'රුපියල් 200 සහ 800 අතර මාර්ග සොයා ගන්න',
|
| 717 |
+
'description': 'Find routes in price range'
|
| 718 |
+
}
|
| 719 |
+
]
|
| 720 |
+
},
|
| 721 |
+
|
| 722 |
+
{
|
| 723 |
+
'category': 'Tamil Range Queries (தமிழ்)',
|
| 724 |
+
'examples': [
|
| 725 |
+
{
|
| 726 |
+
'query': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி',
|
| 727 |
+
'description': 'Find affordable routes in Tamil'
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
'query': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை காட்டு',
|
| 731 |
+
'description': 'Find routes in price range in Tamil'
|
| 732 |
+
}
|
| 733 |
+
]
|
| 734 |
+
},
|
| 735 |
+
|
| 736 |
+
{
|
| 737 |
+
'category': 'Singlish Range Queries (Mixed)',
|
| 738 |
+
'examples': [
|
| 739 |
+
{
|
| 740 |
+
'query': 'රුපියල් 500ට අඩු routes find කරන්න',
|
| 741 |
+
'description': 'Mixed language range search'
|
| 742 |
+
},
|
| 743 |
+
{
|
| 744 |
+
'query': 'Rs. 200 සහ 800 අතර මාර්ග show කරන්න',
|
| 745 |
+
'description': 'Mixed language price range'
|
| 746 |
+
}
|
| 747 |
+
]
|
| 748 |
+
},
|
| 749 |
+
|
| 750 |
+
# === RECOMMENDATION QUERIES ===
|
| 751 |
+
{
|
| 752 |
+
'category': 'Sinhala Recommendation Queries (සිංහල)',
|
| 753 |
+
'examples': [
|
| 754 |
+
{
|
| 755 |
+
'query': 'ලාභ මාර්ග නිර්දේශ කරන්න',
|
| 756 |
+
'description': 'Get budget-friendly recommendations'
|
| 757 |
+
},
|
| 758 |
+
{
|
| 759 |
+
'query': 'මට ජනප්රිය ගමනාන්ත පෙන්වන්න',
|
| 760 |
+
'description': 'Find frequently traveled routes'
|
| 761 |
+
}
|
| 762 |
+
]
|
| 763 |
+
},
|
| 764 |
+
|
| 765 |
+
{
|
| 766 |
+
'category': 'Tamil Recommendation Queries (தமிழ்)',
|
| 767 |
+
'examples': [
|
| 768 |
+
{
|
| 769 |
+
'query': 'குறைந்த விலையில் பாதைகளை பரிந்துரை',
|
| 770 |
+
'description': 'Get budget-friendly recommendations in Tamil'
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
'query': 'பிரபலமான இலக்குகளை காட்டு',
|
| 774 |
+
'description': 'Find frequently traveled routes in Tamil'
|
| 775 |
+
}
|
| 776 |
+
]
|
| 777 |
+
},
|
| 778 |
+
|
| 779 |
+
{
|
| 780 |
+
'category': 'Singlish Recommendation Queries (Mixed)',
|
| 781 |
+
'examples': [
|
| 782 |
+
{
|
| 783 |
+
'query': 'ලාභ routes recommend කරන්න',
|
| 784 |
+
'description': 'Mixed language recommendations'
|
| 785 |
+
},
|
| 786 |
+
{
|
| 787 |
+
'query': 'Popular destinations show කරන්න',
|
| 788 |
+
'description': 'Mixed language popular routes'
|
| 789 |
+
}
|
| 790 |
+
]
|
| 791 |
+
},
|
| 792 |
+
|
| 793 |
+
# === STATISTICAL QUERIES ===
|
| 794 |
+
{
|
| 795 |
+
'category': 'Sinhala Statistical Queries (සිංහල)',
|
| 796 |
+
'examples': [
|
| 797 |
+
{
|
| 798 |
+
'query': 'සාමාන්ය ගාස්තුව කීයද?',
|
| 799 |
+
'description': 'Get average fare statistics'
|
| 800 |
+
},
|
| 801 |
+
{
|
| 802 |
+
'query': 'දත්ත සමුදා සංඛ්යා ලේඛන',
|
| 803 |
+
'description': 'Get comprehensive database overview'
|
| 804 |
+
}
|
| 805 |
+
]
|
| 806 |
+
},
|
| 807 |
+
|
| 808 |
+
{
|
| 809 |
+
'category': 'Tamil Statistical Queries (தமிழ்)',
|
| 810 |
+
'examples': [
|
| 811 |
+
{
|
| 812 |
+
'query': 'சராசரி கட்டணம் எவ்வளவு?',
|
| 813 |
+
'description': 'Get average fare statistics in Tamil'
|
| 814 |
+
},
|
| 815 |
+
{
|
| 816 |
+
'query': 'தரவு சேமிப்பக புள்ளிவிவரங்கள்',
|
| 817 |
+
'description': 'Get comprehensive database overview in Tamil'
|
| 818 |
+
}
|
| 819 |
+
]
|
| 820 |
+
},
|
| 821 |
+
|
| 822 |
+
{
|
| 823 |
+
'category': 'Singlish Statistical Queries (Mixed)',
|
| 824 |
+
'examples': [
|
| 825 |
+
{
|
| 826 |
+
'query': 'Average fare කීයද?',
|
| 827 |
+
'description': 'Mixed language statistics'
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
'query': 'Database statistics show කරන්න',
|
| 831 |
+
'description': 'Mixed language database overview'
|
| 832 |
+
}
|
| 833 |
+
]
|
| 834 |
+
},
|
| 835 |
+
|
| 836 |
+
# === ROUTE QUERIES ===
|
| 837 |
+
{
|
| 838 |
+
'category': 'Sinhala Route Queries (සිංහල)',
|
| 839 |
+
'examples': [
|
| 840 |
+
{
|
| 841 |
+
'query': 'මට ලාභදායී මාර්ග 10ක් පෙන්වන්න',
|
| 842 |
+
'description': 'Find top 10 cheapest routes'
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
'query': 'කොළඹ සිට යාමට මාර්ග මොනවාද?',
|
| 846 |
+
'description': 'Find all routes departing from a location'
|
| 847 |
+
}
|
| 848 |
+
]
|
| 849 |
+
},
|
| 850 |
+
|
| 851 |
+
{
|
| 852 |
+
'category': 'Tamil Route Queries (தமிழ்)',
|
| 853 |
+
'examples': [
|
| 854 |
+
{
|
| 855 |
+
'query': 'குறைந்த விலையில் பாதைகள் 10 காட்டு',
|
| 856 |
+
'description': 'Find top 10 cheapest routes in Tamil'
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
'query': 'கொழும்பு இருந்து போகும் பாதைகள் என்ன?',
|
| 860 |
+
'description': 'Find all routes departing from a location in Tamil'
|
| 861 |
+
}
|
| 862 |
+
]
|
| 863 |
+
},
|
| 864 |
+
|
| 865 |
+
{
|
| 866 |
+
'category': 'Singlish Route Queries (Mixed)',
|
| 867 |
+
'examples': [
|
| 868 |
+
{
|
| 869 |
+
'query': 'ලාභදායී routes 10ක් show කරන්න',
|
| 870 |
+
'description': 'Mixed language cheapest routes'
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
'query': 'Colombo සිට යන මාර්ග මොනවාද?',
|
| 874 |
+
'description': 'Mixed language route queries'
|
| 875 |
+
}
|
| 876 |
+
]
|
| 877 |
+
},
|
| 878 |
+
|
| 879 |
+
# === SPELLING ERROR EXAMPLES ===
|
| 880 |
+
{
|
| 881 |
+
'category': 'Sinhala Spell Correction (සිංහල)',
|
| 882 |
+
'examples': [
|
| 883 |
+
{
|
| 884 |
+
'query': 'පාන්දුරේ ඉඳන් ගාල්ල්ට කීයක් යනවද?',
|
| 885 |
+
'description': 'Test spell correction (Panadura, Galle)'
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
'query': 'කොළ්බ්හ සිට මහනුවර්ට ගාස්තුව කීයද?',
|
| 889 |
+
'description': 'Test spell correction (Colombo, Kandy)'
|
| 890 |
+
}
|
| 891 |
+
]
|
| 892 |
+
},
|
| 893 |
+
|
| 894 |
+
{
|
| 895 |
+
'category': 'Tamil Spell Correction (தமிழ்)',
|
| 896 |
+
'examples': [
|
| 897 |
+
{
|
| 898 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
|
| 899 |
+
'description': 'Test Tamil spell correction'
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
'query': 'பனதுரை இருந்து காலி வரை விலை எவ்வளவு?',
|
| 903 |
+
'description': 'Test Tamil with common variations'
|
| 904 |
+
}
|
| 905 |
+
]
|
| 906 |
+
},
|
| 907 |
+
|
| 908 |
+
{
|
| 909 |
+
'category': 'Singlish Spell Correction (Mixed)',
|
| 910 |
+
'examples': [
|
| 911 |
+
{
|
| 912 |
+
'query': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
|
| 913 |
+
'description': 'Test mixed language spell correction'
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
'query': 'Colombo සිට ගාල්ලට bus fare කීයද?',
|
| 917 |
+
'description': 'Test Singlish with English terms'
|
| 918 |
+
}
|
| 919 |
+
]
|
| 920 |
+
}
|
| 921 |
+
]
|
| 922 |
+
|
| 923 |
+
return jsonify({
|
| 924 |
+
'success': True,
|
| 925 |
+
'examples': examples
|
| 926 |
+
})
|
| 927 |
+
|
| 928 |
+
@app.route('/api/nlp/advanced', methods=['POST'])
|
| 929 |
+
def advanced_nlp_query():
|
| 930 |
+
"""Advanced NLP query processing with detailed analysis"""
|
| 931 |
+
try:
|
| 932 |
+
data = request.get_json()
|
| 933 |
+
user_query = data.get('query', '').strip()
|
| 934 |
+
|
| 935 |
+
if not user_query:
|
| 936 |
+
return jsonify({
|
| 937 |
+
'success': False,
|
| 938 |
+
'message': 'Please provide a query to process.'
|
| 939 |
+
})
|
| 940 |
+
|
| 941 |
+
# Process with enhanced NLP
|
| 942 |
+
result = enhanced_nlp_processor.process_query(user_query)
|
| 943 |
+
|
| 944 |
+
return jsonify(result)
|
| 945 |
+
|
| 946 |
+
except Exception as e:
|
| 947 |
+
return jsonify({
|
| 948 |
+
'success': False,
|
| 949 |
+
'message': f'Error processing advanced NLP query: {str(e)}'
|
| 950 |
+
})
|
| 951 |
+
|
| 952 |
+
@app.route('/api/nlp/compare', methods=['POST'])
|
| 953 |
+
def compare_routes():
|
| 954 |
+
"""Compare multiple routes"""
|
| 955 |
+
try:
|
| 956 |
+
data = request.get_json()
|
| 957 |
+
routes = data.get('routes', [])
|
| 958 |
+
|
| 959 |
+
if len(routes) < 2:
|
| 960 |
+
return jsonify({
|
| 961 |
+
'success': False,
|
| 962 |
+
'message': 'Please provide at least 2 routes to compare.'
|
| 963 |
+
})
|
| 964 |
+
|
| 965 |
+
# Build comparison query
|
| 966 |
+
comparison_query = "MATCH "
|
| 967 |
+
for i, route in enumerate(routes):
|
| 968 |
+
from_loc = route.get('from')
|
| 969 |
+
to_loc = route.get('to')
|
| 970 |
+
if from_loc and to_loc:
|
| 971 |
+
if i > 0:
|
| 972 |
+
comparison_query += ", "
|
| 973 |
+
comparison_query += f"(a{i}:Place {{name: '{from_loc}'}})-[r{i}:Fare]->(b{i}:Place {{name: '{to_loc}'}})"
|
| 974 |
+
|
| 975 |
+
comparison_query += " RETURN "
|
| 976 |
+
for i, route in enumerate(routes):
|
| 977 |
+
if i > 0:
|
| 978 |
+
comparison_query += ", "
|
| 979 |
+
comparison_query += f"a{i}.name + ' to ' + b{i}.name as route{i+1}, r{i}.fare as fare{i+1}"
|
| 980 |
+
|
| 981 |
+
# Execute query
|
| 982 |
+
with neo4j_service.driver.session() as session:
|
| 983 |
+
result = session.run(comparison_query)
|
| 984 |
+
results = [dict(record) for record in result]
|
| 985 |
+
|
| 986 |
+
return jsonify({
|
| 987 |
+
'success': True,
|
| 988 |
+
'data': results,
|
| 989 |
+
'message': f'Comparison of {len(routes)} routes completed'
|
| 990 |
+
})
|
| 991 |
+
|
| 992 |
+
except Exception as e:
|
| 993 |
+
return jsonify({
|
| 994 |
+
'success': False,
|
| 995 |
+
'message': f'Error comparing routes: {str(e)}'
|
| 996 |
+
})
|
| 997 |
+
|
| 998 |
+
@app.route('/api/nlp/range', methods=['POST'])
|
| 999 |
+
def search_by_range():
|
| 1000 |
+
"""Search routes by price range"""
|
| 1001 |
+
try:
|
| 1002 |
+
data = request.get_json()
|
| 1003 |
+
min_price = data.get('min_price')
|
| 1004 |
+
max_price = data.get('max_price')
|
| 1005 |
+
|
| 1006 |
+
if min_price is None and max_price is None:
|
| 1007 |
+
return jsonify({
|
| 1008 |
+
'success': False,
|
| 1009 |
+
'message': 'Please provide min_price or max_price or both.'
|
| 1010 |
+
})
|
| 1011 |
+
|
| 1012 |
+
# Build range query
|
| 1013 |
+
range_query = "MATCH (a:Place)-[r:Fare]->(b:Place) WHERE "
|
| 1014 |
+
conditions = []
|
| 1015 |
+
|
| 1016 |
+
if min_price is not None:
|
| 1017 |
+
conditions.append(f"r.fare >= {min_price}")
|
| 1018 |
+
if max_price is not None:
|
| 1019 |
+
conditions.append(f"r.fare <= {max_price}")
|
| 1020 |
+
|
| 1021 |
+
range_query += " AND ".join(conditions)
|
| 1022 |
+
range_query += " RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare"
|
| 1023 |
+
|
| 1024 |
+
# Execute query
|
| 1025 |
+
with neo4j_service.driver.session() as session:
|
| 1026 |
+
result = session.run(range_query)
|
| 1027 |
+
results = [dict(record) for record in result]
|
| 1028 |
+
|
| 1029 |
+
return jsonify({
|
| 1030 |
+
'success': True,
|
| 1031 |
+
'data': results,
|
| 1032 |
+
'message': f'Found {len(results)} routes in the specified range'
|
| 1033 |
+
})
|
| 1034 |
+
|
| 1035 |
+
except Exception as e:
|
| 1036 |
+
return jsonify({
|
| 1037 |
+
'success': False,
|
| 1038 |
+
'message': f'Error searching by range: {str(e)}'
|
| 1039 |
+
})
|
| 1040 |
+
|
| 1041 |
+
@app.route('/api/nlp/test-all-types')
|
| 1042 |
+
def test_all_query_types():
|
| 1043 |
+
"""Test all query types with live results from Neo4j database"""
|
| 1044 |
+
try:
|
| 1045 |
+
# Define test queries for each type
|
| 1046 |
+
test_queries = {
|
| 1047 |
+
'fare_inquiry': [
|
| 1048 |
+
'What is the fare from Colombo to Kandy?',
|
| 1049 |
+
'fare of anuradhapura to kandy',
|
| 1050 |
+
'price from panadura to galle'
|
| 1051 |
+
],
|
| 1052 |
+
'comparison': [
|
| 1053 |
+
'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 1054 |
+
'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?'
|
| 1055 |
+
],
|
| 1056 |
+
'range_search': [
|
| 1057 |
+
'Find routes under 500 rupees',
|
| 1058 |
+
'Show me routes between 200 and 800 rupees',
|
| 1059 |
+
'Routes over 1000 rupees'
|
| 1060 |
+
],
|
| 1061 |
+
'recommendation': [
|
| 1062 |
+
'Recommend cheap routes',
|
| 1063 |
+
'Show me popular destinations',
|
| 1064 |
+
'What are the best routes from Colombo?'
|
| 1065 |
+
],
|
| 1066 |
+
'route_inquiry': [
|
| 1067 |
+
'Routes from Colombo',
|
| 1068 |
+
'Routes to Galle',
|
| 1069 |
+
'What routes depart from Kandy?'
|
| 1070 |
+
],
|
| 1071 |
+
'statistics': [
|
| 1072 |
+
'What is the average fare?',
|
| 1073 |
+
'Database statistics',
|
| 1074 |
+
'How many routes are there?'
|
| 1075 |
+
]
|
| 1076 |
+
}
|
| 1077 |
+
|
| 1078 |
+
results = {}
|
| 1079 |
+
|
| 1080 |
+
for query_type, queries in test_queries.items():
|
| 1081 |
+
type_results = []
|
| 1082 |
+
for query in queries:
|
| 1083 |
+
try:
|
| 1084 |
+
# Process with enhanced NLP (uses LLM for Cypher generation)
|
| 1085 |
+
result = enhanced_nlp_processor.process_query(query)
|
| 1086 |
+
type_results.append({
|
| 1087 |
+
'query': query,
|
| 1088 |
+
'result': result,
|
| 1089 |
+
'success': result.get('success', False)
|
| 1090 |
+
})
|
| 1091 |
+
except Exception as e:
|
| 1092 |
+
type_results.append({
|
| 1093 |
+
'query': query,
|
| 1094 |
+
'result': {
|
| 1095 |
+
'success': False,
|
| 1096 |
+
'message': f'Error processing query: {str(e)}'
|
| 1097 |
+
},
|
| 1098 |
+
'success': False
|
| 1099 |
+
})
|
| 1100 |
+
|
| 1101 |
+
results[query_type] = {
|
| 1102 |
+
'description': f'Test results for {query_type} queries',
|
| 1103 |
+
'total_queries': len(queries),
|
| 1104 |
+
'successful_queries': sum(1 for r in type_results if r['success']),
|
| 1105 |
+
'examples': type_results
|
| 1106 |
+
}
|
| 1107 |
+
|
| 1108 |
+
# Summary statistics
|
| 1109 |
+
total_queries = sum(len(queries) for queries in test_queries.values())
|
| 1110 |
+
total_successful = sum(
|
| 1111 |
+
results[query_type]['successful_queries']
|
| 1112 |
+
for query_type in results
|
| 1113 |
+
)
|
| 1114 |
+
|
| 1115 |
+
return jsonify({
|
| 1116 |
+
'success': True,
|
| 1117 |
+
'message': f'Tested {total_queries} queries across {len(test_queries)} types. {total_successful} successful.',
|
| 1118 |
+
'summary': {
|
| 1119 |
+
'total_query_types': len(test_queries),
|
| 1120 |
+
'total_queries_tested': total_queries,
|
| 1121 |
+
'successful_queries': total_successful,
|
| 1122 |
+
'success_rate': round((total_successful / total_queries) * 100, 2) if total_queries > 0 else 0
|
| 1123 |
+
},
|
| 1124 |
+
'results': results,
|
| 1125 |
+
'neo4j_connected': neo4j_service.is_connected()
|
| 1126 |
+
})
|
| 1127 |
+
|
| 1128 |
+
except Exception as e:
|
| 1129 |
+
return jsonify({
|
| 1130 |
+
'success': False,
|
| 1131 |
+
'message': f'Error testing query types: {str(e)}',
|
| 1132 |
+
'neo4j_connected': neo4j_service.is_connected()
|
| 1133 |
+
})
|
| 1134 |
+
|
| 1135 |
+
@app.errorhandler(404)
|
| 1136 |
+
def not_found(error):
|
| 1137 |
+
return jsonify({
|
| 1138 |
+
'success': False,
|
| 1139 |
+
'message': 'Endpoint not found'
|
| 1140 |
+
}), 404
|
| 1141 |
+
|
| 1142 |
+
@app.errorhandler(500)
|
| 1143 |
+
def internal_error(error):
|
| 1144 |
+
return jsonify({
|
| 1145 |
+
'success': False,
|
| 1146 |
+
'message': 'Internal server error'
|
| 1147 |
+
}), 500
|
| 1148 |
+
|
| 1149 |
+
if __name__ == '__main__':
|
| 1150 |
+
port = int(os.getenv('PORT', 7860)) # Hugging Face Spaces uses port 7860 by default
|
| 1151 |
+
|
| 1152 |
+
print("🚌 Natural Language Transport Query System")
|
| 1153 |
+
print("=" * 60)
|
| 1154 |
+
print(f"🚀 Starting on port {port}")
|
| 1155 |
+
print(f"🌐 Open your browser and go to: http://localhost:{port}")
|
| 1156 |
+
|
| 1157 |
+
# Check Neo4j connection
|
| 1158 |
+
if neo4j_service.is_connected():
|
| 1159 |
+
print("✅ Connected to Neo4j database")
|
| 1160 |
+
stats = neo4j_service.get_route_statistics()
|
| 1161 |
+
if stats:
|
| 1162 |
+
print(f"📊 Database: {stats.get('total_places', 0)} places, {stats.get('total_routes', 0)} routes")
|
| 1163 |
+
else:
|
| 1164 |
+
print("⚠️ Neo4j not connected - some features may not work")
|
| 1165 |
+
|
| 1166 |
+
# Check LLM availability
|
| 1167 |
+
if spell_corrector.llm_available:
|
| 1168 |
+
print("🤖 LLM integration available for spell correction")
|
| 1169 |
+
else:
|
| 1170 |
+
print("⚠️ LLM not available - using fuzzy matching only")
|
| 1171 |
+
|
| 1172 |
+
print("\n🎯 Enhanced Natural Language Capabilities:")
|
| 1173 |
+
print(" • Multiple query formats (fare, price, cost)")
|
| 1174 |
+
print(" • Natural language patterns (from X to Y, X to Y fare)")
|
| 1175 |
+
print(" • Question formats (What is, How much, Show me)")
|
| 1176 |
+
print(" • Compact formats (Colombo to Kandy fare)")
|
| 1177 |
+
print(" • Spell correction and fuzzy matching")
|
| 1178 |
+
print(" • LLM-powered query interpretation")
|
| 1179 |
+
print(" • Automatic Cypher query generation")
|
| 1180 |
+
print(" • Advanced intent classification")
|
| 1181 |
+
print(" • Entity extraction and normalization")
|
| 1182 |
+
print(" • Comparison queries (vs, versus, compare)")
|
| 1183 |
+
print(" • Range search queries (under, over, between)")
|
| 1184 |
+
print(" • Recommendation queries (recommend, suggest)")
|
| 1185 |
+
print(" • Confidence scoring for query understanding")
|
| 1186 |
+
print(" • Multi-language support: Sinhala, Tamil, Singlish, English")
|
| 1187 |
+
print(" • Automatic language detection and translation")
|
| 1188 |
+
print(" • Dictionary-based, LLM, and API translation methods")
|
| 1189 |
+
print(" • Response translation back to detected language")
|
| 1190 |
+
|
| 1191 |
+
print("\n🔗 Available API Endpoints:")
|
| 1192 |
+
print(" • /api/query - Process natural language queries (enhanced NLP)")
|
| 1193 |
+
print(" • /api/nlp/capabilities - View enhanced NLP capabilities with live examples")
|
| 1194 |
+
print(" • /api/nlp/test-all-types - Test all query types with live results")
|
| 1195 |
+
print(" • /api/nlp/test - Test queries with detailed analysis")
|
| 1196 |
+
print(" • /api/nlp/demo - Get comprehensive demo queries")
|
| 1197 |
+
print(" • /api/examples - Get categorized example queries")
|
| 1198 |
+
print(" • /api/sinhala/examples - Get Sinhala example queries")
|
| 1199 |
+
print(" • /api/tamil/examples - Get Tamil example queries")
|
| 1200 |
+
print(" • /api/language/detect - Detect language of input text")
|
| 1201 |
+
print(" • /api/translation/test - Test translation functionality")
|
| 1202 |
+
print(" • /api/translation/translate - Translate text between languages")
|
| 1203 |
+
print(" • /api/status - System status and statistics")
|
| 1204 |
+
print(" • /api/suggestions - Get location suggestions")
|
| 1205 |
+
print(" • /api/places - Get all available places")
|
| 1206 |
+
|
| 1207 |
+
print("=" * 60)
|
| 1208 |
+
|
| 1209 |
+
try:
|
| 1210 |
+
app.run(debug=False, port=port, host='0.0.0.0') # Set debug=False for production
|
| 1211 |
+
except Exception as e:
|
| 1212 |
+
print(f"❌ Error starting application: {e}")
|
| 1213 |
+
print("💡 Try running as administrator or check if another application is using the port")
|
| 1214 |
+
|
| 1215 |
+
|
enhanced_nlp_processor.py
CHANGED
|
@@ -1,904 +1,904 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Enhanced NLP Processor for Transport Query Application
|
| 4 |
-
Advanced natural language understanding and query processing
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import re
|
| 8 |
-
import json
|
| 9 |
-
from typing import Dict, List, Tuple, Optional, Any
|
| 10 |
-
from datetime import datetime
|
| 11 |
-
from spell_corrector import SpellCorrector
|
| 12 |
-
from neo4j_service import Neo4jService
|
| 13 |
-
from config import Config
|
| 14 |
-
from logger import get_logger
|
| 15 |
-
|
| 16 |
-
class EnhancedNLPProcessor:
|
| 17 |
-
"""Advanced NLP processor with sophisticated query understanding"""
|
| 18 |
-
|
| 19 |
-
def __init__(self):
|
| 20 |
-
self.config = Config()
|
| 21 |
-
self.spell_corrector = SpellCorrector()
|
| 22 |
-
self.neo4j_service = Neo4jService()
|
| 23 |
-
self.logger = get_logger(self.__class__.__name__)
|
| 24 |
-
|
| 25 |
-
# Query patterns and templates
|
| 26 |
-
self.query_patterns = {
|
| 27 |
-
'fare_queries': [
|
| 28 |
-
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 29 |
-
r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 30 |
-
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 31 |
-
r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
|
| 32 |
-
r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 33 |
-
r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 34 |
-
r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 35 |
-
r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 36 |
-
],
|
| 37 |
-
'comparison_queries': [
|
| 38 |
-
r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 39 |
-
r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 40 |
-
],
|
| 41 |
-
'range_queries': [
|
| 42 |
-
r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
|
| 43 |
-
r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
|
| 44 |
-
r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
|
| 45 |
-
],
|
| 46 |
-
'route_queries': [
|
| 47 |
-
r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
|
| 48 |
-
r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
|
| 49 |
-
r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
|
| 50 |
-
r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 51 |
-
],
|
| 52 |
-
'statistical_queries': [
|
| 53 |
-
r'(?:average|mean|median)\s+(?:fare|price|cost)',
|
| 54 |
-
r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
|
| 55 |
-
r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
|
| 56 |
-
r'(?:database|system)\s+(?:statistics?|stats?|overview)',
|
| 57 |
-
r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
|
| 58 |
-
],
|
| 59 |
-
'recommendation_queries': [
|
| 60 |
-
r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
|
| 61 |
-
r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 62 |
-
r'(?:popular|frequent)\s+(?:routes?|destinations?)',
|
| 63 |
-
r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
|
| 64 |
-
]
|
| 65 |
-
}
|
| 66 |
-
|
| 67 |
-
# Query intent classification
|
| 68 |
-
self.intent_keywords = {
|
| 69 |
-
'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
|
| 70 |
-
'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
|
| 71 |
-
'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
|
| 72 |
-
'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
|
| 73 |
-
'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
|
| 74 |
-
'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
|
| 75 |
-
'availability': ['available', 'exist', 'have', 'is there', 'can i']
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
def process_query(self, user_query: str) -> Dict[str, Any]:
|
| 79 |
-
"""
|
| 80 |
-
Process natural language query with advanced NLP understanding
|
| 81 |
-
|
| 82 |
-
Args:
|
| 83 |
-
user_query: Natural language query string
|
| 84 |
-
|
| 85 |
-
Returns:
|
| 86 |
-
Dictionary with comprehensive query analysis and results
|
| 87 |
-
"""
|
| 88 |
-
try:
|
| 89 |
-
# Step 1: Preprocess query
|
| 90 |
-
processed_query = self._preprocess_query(user_query)
|
| 91 |
-
self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
|
| 92 |
-
|
| 93 |
-
# Step 2: Extract entities and intent
|
| 94 |
-
entities = self._extract_entities(processed_query)
|
| 95 |
-
intent = self._classify_intent(processed_query, entities)
|
| 96 |
-
|
| 97 |
-
# Step 3: Generate Cypher query
|
| 98 |
-
cypher_query = self._generate_cypher_query(intent, entities, processed_query)
|
| 99 |
-
self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
# Step 4: Execute query and format results
|
| 104 |
-
if cypher_query:
|
| 105 |
-
results = self._execute_query(cypher_query)
|
| 106 |
-
self.logger.info(f"Query results count: {len(results)}")
|
| 107 |
-
response = self._format_response(intent, entities, results, processed_query)
|
| 108 |
-
else:
|
| 109 |
-
response = self._handle_unclear_query(processed_query)
|
| 110 |
-
|
| 111 |
-
# Step 5: Add metadata
|
| 112 |
-
response.update({
|
| 113 |
-
'query_analysis': {
|
| 114 |
-
'original_query': user_query,
|
| 115 |
-
'processed_query': processed_query,
|
| 116 |
-
'intent': intent,
|
| 117 |
-
'entities': entities,
|
| 118 |
-
'confidence': self._calculate_confidence(intent, entities)
|
| 119 |
-
}
|
| 120 |
-
})
|
| 121 |
-
|
| 122 |
-
return response
|
| 123 |
-
|
| 124 |
-
except Exception as e:
|
| 125 |
-
return {
|
| 126 |
-
'success': False,
|
| 127 |
-
'message': f'Error processing query: {str(e)}',
|
| 128 |
-
'suggestions': self._get_suggestions()
|
| 129 |
-
}
|
| 130 |
-
|
| 131 |
-
def _preprocess_query(self, query: str) -> str:
|
| 132 |
-
"""Preprocess and normalize the query"""
|
| 133 |
-
# Convert to lowercase
|
| 134 |
-
query = query.lower().strip()
|
| 135 |
-
|
| 136 |
-
# Remove extra whitespace
|
| 137 |
-
query = re.sub(r'\s+', ' ', query)
|
| 138 |
-
|
| 139 |
-
# Normalize common variations
|
| 140 |
-
replacements = {
|
| 141 |
-
'rs.': 'rupees',
|
| 142 |
-
'rs': 'rupees',
|
| 143 |
-
'lkr': 'rupees',
|
| 144 |
-
'→': 'to',
|
| 145 |
-
'->': 'to',
|
| 146 |
-
'vs': 'versus',
|
| 147 |
-
'&': 'and',
|
| 148 |
-
'w/': 'with',
|
| 149 |
-
'w/o': 'without'
|
| 150 |
-
}
|
| 151 |
-
|
| 152 |
-
for old, new in replacements.items():
|
| 153 |
-
query = query.replace(old, new)
|
| 154 |
-
|
| 155 |
-
return query
|
| 156 |
-
|
| 157 |
-
def _extract_entities(self, query: str) -> Dict[str, Any]:
|
| 158 |
-
"""Extract entities from the query"""
|
| 159 |
-
entities = {
|
| 160 |
-
'locations': [],
|
| 161 |
-
'numbers': [],
|
| 162 |
-
'currencies': [],
|
| 163 |
-
'comparators': [],
|
| 164 |
-
'time_expressions': []
|
| 165 |
-
}
|
| 166 |
-
|
| 167 |
-
# Extract locations with priority for different query types
|
| 168 |
-
comparison_patterns = [
|
| 169 |
-
r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 170 |
-
r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 171 |
-
r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 172 |
-
# Simpler patterns for comparison
|
| 173 |
-
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 174 |
-
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 175 |
-
]
|
| 176 |
-
|
| 177 |
-
fare_patterns = [
|
| 178 |
-
r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 179 |
-
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 180 |
-
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 181 |
-
]
|
| 182 |
-
|
| 183 |
-
general_patterns = [
|
| 184 |
-
r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 185 |
-
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 186 |
-
r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 187 |
-
]
|
| 188 |
-
|
| 189 |
-
# Use a set to avoid duplicates
|
| 190 |
-
seen_locations = set()
|
| 191 |
-
|
| 192 |
-
# Try comparison patterns first (highest priority)
|
| 193 |
-
for pattern in comparison_patterns:
|
| 194 |
-
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 195 |
-
for match in matches:
|
| 196 |
-
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 197 |
-
for loc in locations:
|
| 198 |
-
# Skip if we've already processed this location
|
| 199 |
-
if loc.lower() in seen_locations:
|
| 200 |
-
continue
|
| 201 |
-
seen_locations.add(loc.lower())
|
| 202 |
-
|
| 203 |
-
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 204 |
-
if confidence > 0.5:
|
| 205 |
-
entities['locations'].append({
|
| 206 |
-
'original': loc,
|
| 207 |
-
'corrected': corrected,
|
| 208 |
-
'confidence': confidence,
|
| 209 |
-
'method': method
|
| 210 |
-
})
|
| 211 |
-
|
| 212 |
-
# If no locations found with comparison patterns, try fare patterns
|
| 213 |
-
if not entities['locations']:
|
| 214 |
-
for pattern in fare_patterns:
|
| 215 |
-
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 216 |
-
for match in matches:
|
| 217 |
-
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 218 |
-
for loc in locations:
|
| 219 |
-
# Skip if we've already processed this location
|
| 220 |
-
if loc.lower() in seen_locations:
|
| 221 |
-
continue
|
| 222 |
-
seen_locations.add(loc.lower())
|
| 223 |
-
|
| 224 |
-
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 225 |
-
if confidence > 0.5:
|
| 226 |
-
entities['locations'].append({
|
| 227 |
-
'original': loc,
|
| 228 |
-
'corrected': corrected,
|
| 229 |
-
'confidence': confidence,
|
| 230 |
-
'method': method
|
| 231 |
-
})
|
| 232 |
-
|
| 233 |
-
# If no locations found with fare patterns, try general patterns
|
| 234 |
-
if not entities['locations']:
|
| 235 |
-
for pattern in general_patterns:
|
| 236 |
-
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 237 |
-
for match in matches:
|
| 238 |
-
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 239 |
-
for loc in locations:
|
| 240 |
-
# Skip if we've already processed this location
|
| 241 |
-
if loc.lower() in seen_locations:
|
| 242 |
-
continue
|
| 243 |
-
seen_locations.add(loc.lower())
|
| 244 |
-
|
| 245 |
-
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 246 |
-
if confidence > 0.5:
|
| 247 |
-
entities['locations'].append({
|
| 248 |
-
'original': loc,
|
| 249 |
-
'corrected': corrected,
|
| 250 |
-
'confidence': confidence,
|
| 251 |
-
'method': method
|
| 252 |
-
})
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
# Extract numbers and currencies
|
| 259 |
-
number_patterns = [
|
| 260 |
-
r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
|
| 261 |
-
r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
|
| 262 |
-
r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
|
| 263 |
-
]
|
| 264 |
-
|
| 265 |
-
for pattern in number_patterns:
|
| 266 |
-
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 267 |
-
for match in matches:
|
| 268 |
-
groups = match.groups()
|
| 269 |
-
if len(groups) >= 2:
|
| 270 |
-
if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
|
| 271 |
-
# Pattern: (under|below|less than|over|above|more than) (number) (currency)
|
| 272 |
-
comparator = groups[0]
|
| 273 |
-
number = groups[1]
|
| 274 |
-
currency = groups[2] if len(groups) >= 3 else 'rupees'
|
| 275 |
-
|
| 276 |
-
entities['numbers'].append({
|
| 277 |
-
'value': float(number.replace(',', '')),
|
| 278 |
-
'currency': currency,
|
| 279 |
-
'comparator': comparator
|
| 280 |
-
})
|
| 281 |
-
elif 'between' in pattern:
|
| 282 |
-
# Pattern: between (number1) and (number2) (currency)
|
| 283 |
-
min_number = groups[0]
|
| 284 |
-
max_number = groups[1]
|
| 285 |
-
currency = groups[2] if len(groups) >= 3 else 'rupees'
|
| 286 |
-
|
| 287 |
-
entities['numbers'].append({
|
| 288 |
-
'value': float(min_number.replace(',', '')),
|
| 289 |
-
'currency': currency,
|
| 290 |
-
'comparator': 'between_min'
|
| 291 |
-
})
|
| 292 |
-
entities['numbers'].append({
|
| 293 |
-
'value': float(max_number.replace(',', '')),
|
| 294 |
-
'currency': currency,
|
| 295 |
-
'comparator': 'between_max'
|
| 296 |
-
})
|
| 297 |
-
else:
|
| 298 |
-
# Pattern: (number) (currency)
|
| 299 |
-
number = groups[0]
|
| 300 |
-
currency = groups[1] if len(groups) >= 2 else 'rupees'
|
| 301 |
-
|
| 302 |
-
entities['numbers'].append({
|
| 303 |
-
'value': float(number.replace(',', '')),
|
| 304 |
-
'currency': currency,
|
| 305 |
-
'comparator': None
|
| 306 |
-
})
|
| 307 |
-
|
| 308 |
-
# Extract comparators
|
| 309 |
-
comparator_patterns = [
|
| 310 |
-
r'(cheaper|more\s+expensive|better|worse|faster|slower)',
|
| 311 |
-
r'(compare|difference|vs|versus)',
|
| 312 |
-
r'(under|below|less\s+than|over|above|more\s+than)'
|
| 313 |
-
]
|
| 314 |
-
|
| 315 |
-
for pattern in comparator_patterns:
|
| 316 |
-
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 317 |
-
for match in matches:
|
| 318 |
-
entities['comparators'].append(match.group(1).lower())
|
| 319 |
-
|
| 320 |
-
return entities
|
| 321 |
-
|
| 322 |
-
def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
|
| 323 |
-
"""Classify the intent of the query"""
|
| 324 |
-
intent_scores = {}
|
| 325 |
-
|
| 326 |
-
for intent, keywords in self.intent_keywords.items():
|
| 327 |
-
score = 0
|
| 328 |
-
for keyword in keywords:
|
| 329 |
-
if keyword in query:
|
| 330 |
-
score += 1
|
| 331 |
-
intent_scores[intent] = score
|
| 332 |
-
|
| 333 |
-
# Get primary intent
|
| 334 |
-
primary_intent = max(intent_scores.items(), key=lambda x: x[1])
|
| 335 |
-
|
| 336 |
-
# Check for specific patterns with priority
|
| 337 |
-
if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
|
| 338 |
-
primary_intent = ('comparison', 10)
|
| 339 |
-
elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
|
| 340 |
-
primary_intent = ('recommendation', 10)
|
| 341 |
-
elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
|
| 342 |
-
primary_intent = ('range_search', 10)
|
| 343 |
-
elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
|
| 344 |
-
# Check if we have at least 2 locations
|
| 345 |
-
if entities and len(entities.get('locations', [])) >= 2:
|
| 346 |
-
primary_intent = ('fare_inquiry', 10)
|
| 347 |
-
elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
|
| 348 |
-
primary_intent = ('route_inquiry', 10)
|
| 349 |
-
|
| 350 |
-
return {
|
| 351 |
-
'primary': primary_intent[0],
|
| 352 |
-
'confidence': primary_intent[1] / 10,
|
| 353 |
-
'all_scores': intent_scores
|
| 354 |
-
}
|
| 355 |
-
|
| 356 |
-
def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
|
| 357 |
-
"""Generate Cypher query using LLM for better understanding"""
|
| 358 |
-
try:
|
| 359 |
-
# Try LLM-based query generation first
|
| 360 |
-
llm_query = self._generate_cypher_with_llm(query, intent, entities)
|
| 361 |
-
if llm_query:
|
| 362 |
-
return llm_query
|
| 363 |
-
except Exception as e:
|
| 364 |
-
print(f"LLM query generation failed: {e}")
|
| 365 |
-
|
| 366 |
-
# Fallback to rule-based generation
|
| 367 |
-
primary_intent = intent['primary']
|
| 368 |
-
|
| 369 |
-
if primary_intent == 'fare_inquiry':
|
| 370 |
-
return self._generate_fare_query(entities)
|
| 371 |
-
elif primary_intent == 'comparison':
|
| 372 |
-
return self._generate_comparison_query(entities)
|
| 373 |
-
elif primary_intent == 'route_inquiry':
|
| 374 |
-
return self._generate_route_query(entities, query)
|
| 375 |
-
elif primary_intent == 'statistics':
|
| 376 |
-
return self._generate_statistics_query(entities)
|
| 377 |
-
elif primary_intent == 'recommendation':
|
| 378 |
-
return self._generate_recommendation_query(entities, query)
|
| 379 |
-
elif primary_intent == 'range_search':
|
| 380 |
-
return self._generate_range_query(entities)
|
| 381 |
-
else:
|
| 382 |
-
return self._generate_fallback_query(query)
|
| 383 |
-
|
| 384 |
-
def _generate_fare_query(self, entities: Dict) -> Optional[str]:
|
| 385 |
-
"""Generate fare inquiry Cypher query"""
|
| 386 |
-
locations = entities.get('locations', [])
|
| 387 |
-
|
| 388 |
-
if len(locations) >= 2:
|
| 389 |
-
from_loc = locations[0]['corrected']
|
| 390 |
-
to_loc = locations[1]['corrected']
|
| 391 |
-
|
| 392 |
-
return f"""
|
| 393 |
-
MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
|
| 394 |
-
RETURN
|
| 395 |
-
a.name as from_place,
|
| 396 |
-
b.name as to_place,
|
| 397 |
-
r.fare as fare,
|
| 398 |
-
'Direct route' as route_type
|
| 399 |
-
"""
|
| 400 |
-
|
| 401 |
-
return None
|
| 402 |
-
|
| 403 |
-
def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
|
| 404 |
-
"""Generate comparison Cypher query"""
|
| 405 |
-
locations = entities.get('locations', [])
|
| 406 |
-
|
| 407 |
-
if len(locations) >= 3:
|
| 408 |
-
# Handle case where we have same origin, different destinations
|
| 409 |
-
if len(locations) == 3:
|
| 410 |
-
# Pattern: "Colombo to Kandy and Colombo to Anuradapura"
|
| 411 |
-
route1_from = locations[0]['corrected']
|
| 412 |
-
route1_to = locations[1]['corrected']
|
| 413 |
-
route2_from = locations[0]['corrected'] # Same origin
|
| 414 |
-
route2_to = locations[2]['corrected']
|
| 415 |
-
elif len(locations) >= 4:
|
| 416 |
-
# Pattern: "Colombo to Kandy and Anuradapura to Galle"
|
| 417 |
-
route1_from = locations[0]['corrected']
|
| 418 |
-
route1_to = locations[1]['corrected']
|
| 419 |
-
route2_from = locations[2]['corrected']
|
| 420 |
-
route2_to = locations[3]['corrected']
|
| 421 |
-
else:
|
| 422 |
-
return None
|
| 423 |
-
|
| 424 |
-
return f"""
|
| 425 |
-
MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
|
| 426 |
-
MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
|
| 427 |
-
RETURN
|
| 428 |
-
a1.name + ' to ' + b1.name as route1,
|
| 429 |
-
r1.fare as fare1,
|
| 430 |
-
a2.name + ' to ' + b2.name as route2,
|
| 431 |
-
r2.fare as fare2,
|
| 432 |
-
r1.fare - r2.fare as difference,
|
| 433 |
-
CASE
|
| 434 |
-
WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
|
| 435 |
-
WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
|
| 436 |
-
ELSE 'Both routes have the same fare'
|
| 437 |
-
END as comparison
|
| 438 |
-
"""
|
| 439 |
-
|
| 440 |
-
return None
|
| 441 |
-
|
| 442 |
-
def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
|
| 443 |
-
"""Generate route inquiry Cypher query"""
|
| 444 |
-
locations = entities.get('locations', [])
|
| 445 |
-
|
| 446 |
-
if 'from' in query and locations:
|
| 447 |
-
location = locations[0]['corrected']
|
| 448 |
-
return f"""
|
| 449 |
-
MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
|
| 450 |
-
RETURN
|
| 451 |
-
a.name as from_place,
|
| 452 |
-
b.name as to_place,
|
| 453 |
-
r.fare as fare
|
| 454 |
-
ORDER BY r.fare
|
| 455 |
-
"""
|
| 456 |
-
elif 'to' in query and locations:
|
| 457 |
-
location = locations[0]['corrected']
|
| 458 |
-
return f"""
|
| 459 |
-
MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
|
| 460 |
-
RETURN
|
| 461 |
-
a.name as from_place,
|
| 462 |
-
b.name as to_place,
|
| 463 |
-
r.fare as fare
|
| 464 |
-
ORDER BY r.fare
|
| 465 |
-
"""
|
| 466 |
-
|
| 467 |
-
return None
|
| 468 |
-
|
| 469 |
-
def _generate_statistics_query(self, entities: Dict) -> str:
|
| 470 |
-
"""Generate statistics Cypher query"""
|
| 471 |
-
return """
|
| 472 |
-
MATCH (p:Place)
|
| 473 |
-
MATCH ()-[r:Fare]->()
|
| 474 |
-
RETURN
|
| 475 |
-
count(DISTINCT p) as total_places,
|
| 476 |
-
count(r) as total_routes,
|
| 477 |
-
round(avg(r.fare), 2) as average_fare,
|
| 478 |
-
min(r.fare) as minimum_fare,
|
| 479 |
-
max(r.fare) as maximum_fare,
|
| 480 |
-
round(stdDev(r.fare), 2) as fare_standard_deviation
|
| 481 |
-
"""
|
| 482 |
-
|
| 483 |
-
def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
|
| 484 |
-
"""Generate recommendation Cypher query"""
|
| 485 |
-
if 'cheap' in query or 'budget' in query or 'affordable' in query:
|
| 486 |
-
return """
|
| 487 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 488 |
-
RETURN
|
| 489 |
-
a.name as from_place,
|
| 490 |
-
b.name as to_place,
|
| 491 |
-
r.fare as fare
|
| 492 |
-
ORDER BY r.fare ASC
|
| 493 |
-
LIMIT 10
|
| 494 |
-
"""
|
| 495 |
-
elif 'popular' in query or 'frequent' in query:
|
| 496 |
-
return """
|
| 497 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 498 |
-
RETURN
|
| 499 |
-
a.name as from_place,
|
| 500 |
-
b.name as to_place,
|
| 501 |
-
r.fare as fare
|
| 502 |
-
ORDER BY r.fare DESC
|
| 503 |
-
LIMIT 10
|
| 504 |
-
"""
|
| 505 |
-
else:
|
| 506 |
-
return """
|
| 507 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 508 |
-
RETURN
|
| 509 |
-
a.name as from_place,
|
| 510 |
-
b.name as to_place,
|
| 511 |
-
r.fare as fare
|
| 512 |
-
ORDER BY r.fare ASC
|
| 513 |
-
LIMIT 5
|
| 514 |
-
"""
|
| 515 |
-
|
| 516 |
-
def _generate_range_query(self, entities: Dict) -> Optional[str]:
|
| 517 |
-
"""Generate range search Cypher query"""
|
| 518 |
-
numbers = entities.get('numbers', [])
|
| 519 |
-
|
| 520 |
-
if numbers:
|
| 521 |
-
# Check for between range
|
| 522 |
-
between_min = None
|
| 523 |
-
between_max = None
|
| 524 |
-
single_value = None
|
| 525 |
-
single_comparator = None
|
| 526 |
-
|
| 527 |
-
for number in numbers:
|
| 528 |
-
comparator = number.get('comparator', '')
|
| 529 |
-
value = number['value']
|
| 530 |
-
|
| 531 |
-
if comparator == 'between_min':
|
| 532 |
-
between_min = value
|
| 533 |
-
elif comparator == 'between_max':
|
| 534 |
-
between_max = value
|
| 535 |
-
elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
|
| 536 |
-
single_value = value
|
| 537 |
-
single_comparator = comparator
|
| 538 |
-
|
| 539 |
-
# Generate query based on type
|
| 540 |
-
if between_min is not None and between_max is not None:
|
| 541 |
-
return f"""
|
| 542 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 543 |
-
WHERE r.fare >= {between_min} AND r.fare <= {between_max}
|
| 544 |
-
RETURN
|
| 545 |
-
a.name as from_place,
|
| 546 |
-
b.name as to_place,
|
| 547 |
-
r.fare as fare
|
| 548 |
-
ORDER BY r.fare ASC
|
| 549 |
-
"""
|
| 550 |
-
elif single_value is not None and single_comparator is not None:
|
| 551 |
-
if single_comparator in ['under', 'below', 'less than']:
|
| 552 |
-
return f"""
|
| 553 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 554 |
-
WHERE r.fare < {single_value}
|
| 555 |
-
RETURN
|
| 556 |
-
a.name as from_place,
|
| 557 |
-
b.name as to_place,
|
| 558 |
-
r.fare as fare
|
| 559 |
-
ORDER BY r.fare ASC
|
| 560 |
-
"""
|
| 561 |
-
elif single_comparator in ['over', 'above', 'more than']:
|
| 562 |
-
return f"""
|
| 563 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 564 |
-
WHERE r.fare > {single_value}
|
| 565 |
-
RETURN
|
| 566 |
-
a.name as from_place,
|
| 567 |
-
b.name as to_place,
|
| 568 |
-
r.fare as fare
|
| 569 |
-
ORDER BY r.fare DESC
|
| 570 |
-
"""
|
| 571 |
-
|
| 572 |
-
return None
|
| 573 |
-
|
| 574 |
-
def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
|
| 575 |
-
"""Generate Cypher query using LLM for better understanding"""
|
| 576 |
-
try:
|
| 577 |
-
if not self.config.OPENAI_API_KEY:
|
| 578 |
-
return None
|
| 579 |
-
|
| 580 |
-
# Get available places for context
|
| 581 |
-
available_places = list(self.neo4j_service.get_all_places())
|
| 582 |
-
|
| 583 |
-
# Create comprehensive prompt for Cypher generation
|
| 584 |
-
prompt = f"""
|
| 585 |
-
You are a Neo4j Cypher query generator for a transport database.
|
| 586 |
-
|
| 587 |
-
Database Schema:
|
| 588 |
-
- Nodes: Place (with property 'name')
|
| 589 |
-
- Relationships: Fare (with property 'fare')
|
| 590 |
-
|
| 591 |
-
Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
|
| 592 |
-
|
| 593 |
-
User Query: "{query}"
|
| 594 |
-
Detected Intent: {intent.get('primary', 'unknown')}
|
| 595 |
-
Extracted Entities: {entities}
|
| 596 |
-
|
| 597 |
-
Your task is to generate a valid Cypher query that answers the user's question.
|
| 598 |
-
|
| 599 |
-
Query Types and Examples:
|
| 600 |
-
|
| 601 |
-
1. FARE INQUIRY:
|
| 602 |
-
- "What is the fare from Colombo to Kandy?"
|
| 603 |
-
- Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 604 |
-
|
| 605 |
-
2. COMPARISON:
|
| 606 |
-
- "Compare fares from Colombo to Kandy vs Colombo to Galle"
|
| 607 |
-
- Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
|
| 608 |
-
|
| 609 |
-
3. RANGE SEARCH:
|
| 610 |
-
- "Find routes under 500 rupees"
|
| 611 |
-
- Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
|
| 612 |
-
|
| 613 |
-
4. RECOMMENDATION:
|
| 614 |
-
- "Recommend cheap routes"
|
| 615 |
-
- Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
|
| 616 |
-
|
| 617 |
-
5. STATISTICS:
|
| 618 |
-
- "What is the average fare?"
|
| 619 |
-
- Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 620 |
-
|
| 621 |
-
6. ROUTE INQUIRY:
|
| 622 |
-
- "Routes from Colombo"
|
| 623 |
-
- Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
|
| 624 |
-
|
| 625 |
-
Important Rules:
|
| 626 |
-
1. Always use proper Cypher syntax
|
| 627 |
-
2. Use exact place names from the available places list
|
| 628 |
-
3. For comparisons, use multiple MATCH clauses
|
| 629 |
-
4. For ranges, use WHERE clauses with appropriate operators
|
| 630 |
-
5. For statistics, use aggregation functions
|
| 631 |
-
6. Always include meaningful column aliases
|
| 632 |
-
7. Use ORDER BY for sorted results
|
| 633 |
-
8. Use LIMIT for large result sets
|
| 634 |
-
|
| 635 |
-
Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
|
| 636 |
-
"""
|
| 637 |
-
|
| 638 |
-
cypher_query = None
|
| 639 |
-
# Prefer new SDK
|
| 640 |
-
try:
|
| 641 |
-
from openai import OpenAI
|
| 642 |
-
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 643 |
-
response = client.chat.completions.create(
|
| 644 |
-
model=self.config.OPENAI_MODEL,
|
| 645 |
-
messages=[
|
| 646 |
-
{"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
|
| 647 |
-
{"role": "user", "content": prompt}
|
| 648 |
-
],
|
| 649 |
-
max_tokens=300,
|
| 650 |
-
temperature=0.1
|
| 651 |
-
)
|
| 652 |
-
cypher_query = response.choices[0].message.content.strip()
|
| 653 |
-
except Exception as sdk_err:
|
| 654 |
-
import openai
|
| 655 |
-
try:
|
| 656 |
-
openai.api_key = self.config.OPENAI_API_KEY
|
| 657 |
-
response = openai.ChatCompletion.create(
|
| 658 |
-
model=self.config.OPENAI_MODEL,
|
| 659 |
-
messages=[
|
| 660 |
-
{"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
|
| 661 |
-
{"role": "user", "content": prompt}
|
| 662 |
-
],
|
| 663 |
-
max_tokens=300,
|
| 664 |
-
temperature=0.1
|
| 665 |
-
)
|
| 666 |
-
cypher_query = response.choices[0].message.content.strip()
|
| 667 |
-
except Exception:
|
| 668 |
-
raise sdk_err
|
| 669 |
-
|
| 670 |
-
# Validate the response
|
| 671 |
-
if cypher_query.upper() == "FALLBACK":
|
| 672 |
-
return None
|
| 673 |
-
|
| 674 |
-
# Basic validation - check if it starts with MATCH
|
| 675 |
-
if cypher_query.upper().startswith('MATCH'):
|
| 676 |
-
return cypher_query
|
| 677 |
-
|
| 678 |
-
return None
|
| 679 |
-
|
| 680 |
-
except Exception as e:
|
| 681 |
-
print(f"LLM Cypher generation error: {e}")
|
| 682 |
-
return None
|
| 683 |
-
|
| 684 |
-
def _generate_fallback_query(self, query: str) -> Optional[str]:
|
| 685 |
-
"""Generate fallback query when intent is unclear"""
|
| 686 |
-
# Try to extract locations using spell corrector
|
| 687 |
-
locations = self.spell_corrector.extract_locations_from_query(query)
|
| 688 |
-
|
| 689 |
-
if len(locations) >= 2:
|
| 690 |
-
from_loc = locations[0][1]
|
| 691 |
-
to_loc = locations[1][1]
|
| 692 |
-
return f"""
|
| 693 |
-
MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
|
| 694 |
-
RETURN
|
| 695 |
-
a.name as from_place,
|
| 696 |
-
b.name as to_place,
|
| 697 |
-
r.fare as fare
|
| 698 |
-
"""
|
| 699 |
-
|
| 700 |
-
# Additional fallback: direct pattern matching for fare queries
|
| 701 |
-
if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
|
| 702 |
-
import re
|
| 703 |
-
fare_patterns = [
|
| 704 |
-
r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 705 |
-
r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 706 |
-
r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 707 |
-
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 708 |
-
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
|
| 709 |
-
]
|
| 710 |
-
|
| 711 |
-
for pattern in fare_patterns:
|
| 712 |
-
match = re.search(pattern, query.lower())
|
| 713 |
-
if match:
|
| 714 |
-
from_loc = match.group(1).strip()
|
| 715 |
-
to_loc = match.group(2).strip()
|
| 716 |
-
|
| 717 |
-
# Correct locations
|
| 718 |
-
from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
|
| 719 |
-
to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
|
| 720 |
-
|
| 721 |
-
if from_conf > 0.5 and to_conf > 0.5:
|
| 722 |
-
return f"""
|
| 723 |
-
MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
|
| 724 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 725 |
-
"""
|
| 726 |
-
|
| 727 |
-
return None
|
| 728 |
-
|
| 729 |
-
def _execute_query(self, cypher_query: str) -> List[Dict]:
|
| 730 |
-
"""Execute Cypher query and return results"""
|
| 731 |
-
try:
|
| 732 |
-
with self.neo4j_service.driver.session() as session:
|
| 733 |
-
result = session.run(cypher_query)
|
| 734 |
-
return [dict(record) for record in result]
|
| 735 |
-
except Exception as e:
|
| 736 |
-
print(f"Query execution error: {e}")
|
| 737 |
-
return []
|
| 738 |
-
|
| 739 |
-
def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
|
| 740 |
-
"""Format the response based on intent and results"""
|
| 741 |
-
primary_intent = intent['primary']
|
| 742 |
-
|
| 743 |
-
if not results:
|
| 744 |
-
return {
|
| 745 |
-
'success': False,
|
| 746 |
-
'message': 'No results found for your query.',
|
| 747 |
-
'suggestions': self._get_suggestions()
|
| 748 |
-
}
|
| 749 |
-
|
| 750 |
-
if primary_intent == 'fare_inquiry':
|
| 751 |
-
return self._format_fare_response(results, entities)
|
| 752 |
-
elif primary_intent == 'comparison':
|
| 753 |
-
return self._format_comparison_response(results, entities)
|
| 754 |
-
elif primary_intent == 'route_inquiry':
|
| 755 |
-
return self._format_route_response(results, entities)
|
| 756 |
-
elif primary_intent == 'statistics':
|
| 757 |
-
return self._format_statistics_response(results)
|
| 758 |
-
elif primary_intent == 'recommendation':
|
| 759 |
-
return self._format_recommendation_response(results, query)
|
| 760 |
-
elif primary_intent == 'range_search':
|
| 761 |
-
return self._format_range_response(results, entities)
|
| 762 |
-
else:
|
| 763 |
-
return self._format_generic_response(results)
|
| 764 |
-
|
| 765 |
-
def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 766 |
-
"""Format fare inquiry response"""
|
| 767 |
-
if results:
|
| 768 |
-
result = results[0]
|
| 769 |
-
return {
|
| 770 |
-
'success': True,
|
| 771 |
-
'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
|
| 772 |
-
'data': results,
|
| 773 |
-
'query_type': 'fare_inquiry',
|
| 774 |
-
'summary': {
|
| 775 |
-
'from_place': result['from_place'],
|
| 776 |
-
'to_place': result['to_place'],
|
| 777 |
-
'fare': result['fare']
|
| 778 |
-
}
|
| 779 |
-
}
|
| 780 |
-
return {'success': False, 'message': 'Fare information not found.'}
|
| 781 |
-
|
| 782 |
-
def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 783 |
-
"""Format comparison response"""
|
| 784 |
-
if results:
|
| 785 |
-
result = results[0]
|
| 786 |
-
return {
|
| 787 |
-
'success': True,
|
| 788 |
-
'message': result.get('comparison', 'Comparison completed'),
|
| 789 |
-
'data': results,
|
| 790 |
-
'query_type': 'comparison',
|
| 791 |
-
'summary': {
|
| 792 |
-
'route1': result.get('route1'),
|
| 793 |
-
'route2': result.get('route2'),
|
| 794 |
-
'difference': result.get('difference')
|
| 795 |
-
}
|
| 796 |
-
}
|
| 797 |
-
return {'success': False, 'message': 'Comparison not possible.'}
|
| 798 |
-
|
| 799 |
-
def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 800 |
-
"""Format route inquiry response"""
|
| 801 |
-
return {
|
| 802 |
-
'success': True,
|
| 803 |
-
'message': f"Found {len(results)} routes",
|
| 804 |
-
'data': results,
|
| 805 |
-
'query_type': 'route_inquiry',
|
| 806 |
-
'summary': {
|
| 807 |
-
'total_routes': len(results),
|
| 808 |
-
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 809 |
-
}
|
| 810 |
-
}
|
| 811 |
-
|
| 812 |
-
def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
|
| 813 |
-
"""Format statistics response"""
|
| 814 |
-
if results:
|
| 815 |
-
stats = results[0]
|
| 816 |
-
return {
|
| 817 |
-
'success': True,
|
| 818 |
-
'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
|
| 819 |
-
'data': results,
|
| 820 |
-
'query_type': 'statistics',
|
| 821 |
-
'summary': {
|
| 822 |
-
'total_places': stats['total_places'],
|
| 823 |
-
'total_routes': stats['total_routes'],
|
| 824 |
-
'average_fare': stats['average_fare'],
|
| 825 |
-
'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
|
| 826 |
-
}
|
| 827 |
-
}
|
| 828 |
-
return {'success': False, 'message': 'Statistics not available.'}
|
| 829 |
-
|
| 830 |
-
def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
|
| 831 |
-
"""Format recommendation response"""
|
| 832 |
-
return {
|
| 833 |
-
'success': True,
|
| 834 |
-
'message': f"Here are {len(results)} recommended routes",
|
| 835 |
-
'data': results,
|
| 836 |
-
'query_type': 'recommendation',
|
| 837 |
-
'summary': {
|
| 838 |
-
'recommendations_count': len(results),
|
| 839 |
-
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 840 |
-
}
|
| 841 |
-
}
|
| 842 |
-
|
| 843 |
-
def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 844 |
-
"""Format range search response"""
|
| 845 |
-
return {
|
| 846 |
-
'success': True,
|
| 847 |
-
'message': f"Found {len(results)} routes in your specified range",
|
| 848 |
-
'data': results,
|
| 849 |
-
'query_type': 'range_search',
|
| 850 |
-
'summary': {
|
| 851 |
-
'routes_found': len(results),
|
| 852 |
-
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 853 |
-
}
|
| 854 |
-
}
|
| 855 |
-
|
| 856 |
-
def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
|
| 857 |
-
"""Format generic response"""
|
| 858 |
-
return {
|
| 859 |
-
'success': True,
|
| 860 |
-
'message': f"Found {len(results)} results",
|
| 861 |
-
'data': results,
|
| 862 |
-
'query_type': 'generic'
|
| 863 |
-
}
|
| 864 |
-
|
| 865 |
-
def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
|
| 866 |
-
"""Handle unclear or ambiguous queries"""
|
| 867 |
-
return {
|
| 868 |
-
'success': False,
|
| 869 |
-
'message': 'I could not understand your query. Please try rephrasing it.',
|
| 870 |
-
'suggestions': self._get_suggestions(),
|
| 871 |
-
'query_type': 'unclear'
|
| 872 |
-
}
|
| 873 |
-
|
| 874 |
-
def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
|
| 875 |
-
"""Calculate confidence score for the query interpretation"""
|
| 876 |
-
confidence = 0.0
|
| 877 |
-
|
| 878 |
-
# Intent confidence
|
| 879 |
-
confidence += intent.get('confidence', 0) * 0.4
|
| 880 |
-
|
| 881 |
-
# Entity confidence
|
| 882 |
-
locations = entities.get('locations', [])
|
| 883 |
-
if locations:
|
| 884 |
-
avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
|
| 885 |
-
confidence += avg_location_confidence * 0.4
|
| 886 |
-
|
| 887 |
-
# Query complexity bonus
|
| 888 |
-
if len(locations) >= 2:
|
| 889 |
-
confidence += 0.2
|
| 890 |
-
|
| 891 |
-
return min(confidence, 1.0)
|
| 892 |
-
|
| 893 |
-
def _get_suggestions(self) -> List[str]:
|
| 894 |
-
"""Get query suggestions"""
|
| 895 |
-
return [
|
| 896 |
-
"What is the fare from Colombo to Kandy?",
|
| 897 |
-
"Compare fares from Colombo to Kandy vs Colombo to Galle",
|
| 898 |
-
"Show me routes from Panadura",
|
| 899 |
-
"Find routes under 500 rupees",
|
| 900 |
-
"What are the cheapest routes?",
|
| 901 |
-
"Show me popular destinations",
|
| 902 |
-
"Give me database statistics",
|
| 903 |
-
"Recommend affordable routes"
|
| 904 |
-
]
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Enhanced NLP Processor for Transport Query Application
|
| 4 |
+
Advanced natural language understanding and query processing
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
import json
|
| 9 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from spell_corrector import SpellCorrector
|
| 12 |
+
from neo4j_service import Neo4jService
|
| 13 |
+
from config import Config
|
| 14 |
+
from logger import get_logger
|
| 15 |
+
|
| 16 |
+
class EnhancedNLPProcessor:
|
| 17 |
+
"""Advanced NLP processor with sophisticated query understanding"""
|
| 18 |
+
|
| 19 |
+
def __init__(self):
|
| 20 |
+
self.config = Config()
|
| 21 |
+
self.spell_corrector = SpellCorrector()
|
| 22 |
+
self.neo4j_service = Neo4jService()
|
| 23 |
+
self.logger = get_logger(self.__class__.__name__)
|
| 24 |
+
|
| 25 |
+
# Query patterns and templates
|
| 26 |
+
self.query_patterns = {
|
| 27 |
+
'fare_queries': [
|
| 28 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 29 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 30 |
+
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 31 |
+
r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
|
| 32 |
+
r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 33 |
+
r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 34 |
+
r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 35 |
+
r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 36 |
+
],
|
| 37 |
+
'comparison_queries': [
|
| 38 |
+
r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 39 |
+
r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 40 |
+
],
|
| 41 |
+
'range_queries': [
|
| 42 |
+
r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
|
| 43 |
+
r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
|
| 44 |
+
r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
|
| 45 |
+
],
|
| 46 |
+
'route_queries': [
|
| 47 |
+
r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
|
| 48 |
+
r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
|
| 49 |
+
r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
|
| 50 |
+
r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 51 |
+
],
|
| 52 |
+
'statistical_queries': [
|
| 53 |
+
r'(?:average|mean|median)\s+(?:fare|price|cost)',
|
| 54 |
+
r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
|
| 55 |
+
r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
|
| 56 |
+
r'(?:database|system)\s+(?:statistics?|stats?|overview)',
|
| 57 |
+
r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
|
| 58 |
+
],
|
| 59 |
+
'recommendation_queries': [
|
| 60 |
+
r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
|
| 61 |
+
r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 62 |
+
r'(?:popular|frequent)\s+(?:routes?|destinations?)',
|
| 63 |
+
r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
|
| 64 |
+
]
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# Query intent classification
|
| 68 |
+
self.intent_keywords = {
|
| 69 |
+
'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
|
| 70 |
+
'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
|
| 71 |
+
'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
|
| 72 |
+
'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
|
| 73 |
+
'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
|
| 74 |
+
'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
|
| 75 |
+
'availability': ['available', 'exist', 'have', 'is there', 'can i']
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
def process_query(self, user_query: str) -> Dict[str, Any]:
|
| 79 |
+
"""
|
| 80 |
+
Process natural language query with advanced NLP understanding
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
user_query: Natural language query string
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
Dictionary with comprehensive query analysis and results
|
| 87 |
+
"""
|
| 88 |
+
try:
|
| 89 |
+
# Step 1: Preprocess query
|
| 90 |
+
processed_query = self._preprocess_query(user_query)
|
| 91 |
+
self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
|
| 92 |
+
|
| 93 |
+
# Step 2: Extract entities and intent
|
| 94 |
+
entities = self._extract_entities(processed_query)
|
| 95 |
+
intent = self._classify_intent(processed_query, entities)
|
| 96 |
+
|
| 97 |
+
# Step 3: Generate Cypher query
|
| 98 |
+
cypher_query = self._generate_cypher_query(intent, entities, processed_query)
|
| 99 |
+
self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Step 4: Execute query and format results
|
| 104 |
+
if cypher_query:
|
| 105 |
+
results = self._execute_query(cypher_query)
|
| 106 |
+
self.logger.info(f"Query results count: {len(results)}")
|
| 107 |
+
response = self._format_response(intent, entities, results, processed_query)
|
| 108 |
+
else:
|
| 109 |
+
response = self._handle_unclear_query(processed_query)
|
| 110 |
+
|
| 111 |
+
# Step 5: Add metadata
|
| 112 |
+
response.update({
|
| 113 |
+
'query_analysis': {
|
| 114 |
+
'original_query': user_query,
|
| 115 |
+
'processed_query': processed_query,
|
| 116 |
+
'intent': intent,
|
| 117 |
+
'entities': entities,
|
| 118 |
+
'confidence': self._calculate_confidence(intent, entities)
|
| 119 |
+
}
|
| 120 |
+
})
|
| 121 |
+
|
| 122 |
+
return response
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return {
|
| 126 |
+
'success': False,
|
| 127 |
+
'message': f'Error processing query: {str(e)}',
|
| 128 |
+
'suggestions': self._get_suggestions()
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
def _preprocess_query(self, query: str) -> str:
|
| 132 |
+
"""Preprocess and normalize the query"""
|
| 133 |
+
# Convert to lowercase
|
| 134 |
+
query = query.lower().strip()
|
| 135 |
+
|
| 136 |
+
# Remove extra whitespace
|
| 137 |
+
query = re.sub(r'\s+', ' ', query)
|
| 138 |
+
|
| 139 |
+
# Normalize common variations
|
| 140 |
+
replacements = {
|
| 141 |
+
'rs.': 'rupees',
|
| 142 |
+
'rs': 'rupees',
|
| 143 |
+
'lkr': 'rupees',
|
| 144 |
+
'→': 'to',
|
| 145 |
+
'->': 'to',
|
| 146 |
+
'vs': 'versus',
|
| 147 |
+
'&': 'and',
|
| 148 |
+
'w/': 'with',
|
| 149 |
+
'w/o': 'without'
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
for old, new in replacements.items():
|
| 153 |
+
query = query.replace(old, new)
|
| 154 |
+
|
| 155 |
+
return query
|
| 156 |
+
|
| 157 |
+
def _extract_entities(self, query: str) -> Dict[str, Any]:
|
| 158 |
+
"""Extract entities from the query"""
|
| 159 |
+
entities = {
|
| 160 |
+
'locations': [],
|
| 161 |
+
'numbers': [],
|
| 162 |
+
'currencies': [],
|
| 163 |
+
'comparators': [],
|
| 164 |
+
'time_expressions': []
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
# Extract locations with priority for different query types
|
| 168 |
+
comparison_patterns = [
|
| 169 |
+
r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 170 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 171 |
+
r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 172 |
+
# Simpler patterns for comparison
|
| 173 |
+
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 174 |
+
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
fare_patterns = [
|
| 178 |
+
r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 179 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 180 |
+
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 181 |
+
]
|
| 182 |
+
|
| 183 |
+
general_patterns = [
|
| 184 |
+
r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 185 |
+
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 186 |
+
r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 187 |
+
]
|
| 188 |
+
|
| 189 |
+
# Use a set to avoid duplicates
|
| 190 |
+
seen_locations = set()
|
| 191 |
+
|
| 192 |
+
# Try comparison patterns first (highest priority)
|
| 193 |
+
for pattern in comparison_patterns:
|
| 194 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 195 |
+
for match in matches:
|
| 196 |
+
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 197 |
+
for loc in locations:
|
| 198 |
+
# Skip if we've already processed this location
|
| 199 |
+
if loc.lower() in seen_locations:
|
| 200 |
+
continue
|
| 201 |
+
seen_locations.add(loc.lower())
|
| 202 |
+
|
| 203 |
+
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 204 |
+
if confidence > 0.5:
|
| 205 |
+
entities['locations'].append({
|
| 206 |
+
'original': loc,
|
| 207 |
+
'corrected': corrected,
|
| 208 |
+
'confidence': confidence,
|
| 209 |
+
'method': method
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
# If no locations found with comparison patterns, try fare patterns
|
| 213 |
+
if not entities['locations']:
|
| 214 |
+
for pattern in fare_patterns:
|
| 215 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 216 |
+
for match in matches:
|
| 217 |
+
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 218 |
+
for loc in locations:
|
| 219 |
+
# Skip if we've already processed this location
|
| 220 |
+
if loc.lower() in seen_locations:
|
| 221 |
+
continue
|
| 222 |
+
seen_locations.add(loc.lower())
|
| 223 |
+
|
| 224 |
+
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 225 |
+
if confidence > 0.5:
|
| 226 |
+
entities['locations'].append({
|
| 227 |
+
'original': loc,
|
| 228 |
+
'corrected': corrected,
|
| 229 |
+
'confidence': confidence,
|
| 230 |
+
'method': method
|
| 231 |
+
})
|
| 232 |
+
|
| 233 |
+
# If no locations found with fare patterns, try general patterns
|
| 234 |
+
if not entities['locations']:
|
| 235 |
+
for pattern in general_patterns:
|
| 236 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 237 |
+
for match in matches:
|
| 238 |
+
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 239 |
+
for loc in locations:
|
| 240 |
+
# Skip if we've already processed this location
|
| 241 |
+
if loc.lower() in seen_locations:
|
| 242 |
+
continue
|
| 243 |
+
seen_locations.add(loc.lower())
|
| 244 |
+
|
| 245 |
+
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 246 |
+
if confidence > 0.5:
|
| 247 |
+
entities['locations'].append({
|
| 248 |
+
'original': loc,
|
| 249 |
+
'corrected': corrected,
|
| 250 |
+
'confidence': confidence,
|
| 251 |
+
'method': method
|
| 252 |
+
})
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
# Extract numbers and currencies
|
| 259 |
+
number_patterns = [
|
| 260 |
+
r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
|
| 261 |
+
r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
|
| 262 |
+
r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
|
| 263 |
+
]
|
| 264 |
+
|
| 265 |
+
for pattern in number_patterns:
|
| 266 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 267 |
+
for match in matches:
|
| 268 |
+
groups = match.groups()
|
| 269 |
+
if len(groups) >= 2:
|
| 270 |
+
if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
|
| 271 |
+
# Pattern: (under|below|less than|over|above|more than) (number) (currency)
|
| 272 |
+
comparator = groups[0]
|
| 273 |
+
number = groups[1]
|
| 274 |
+
currency = groups[2] if len(groups) >= 3 else 'rupees'
|
| 275 |
+
|
| 276 |
+
entities['numbers'].append({
|
| 277 |
+
'value': float(number.replace(',', '')),
|
| 278 |
+
'currency': currency,
|
| 279 |
+
'comparator': comparator
|
| 280 |
+
})
|
| 281 |
+
elif 'between' in pattern:
|
| 282 |
+
# Pattern: between (number1) and (number2) (currency)
|
| 283 |
+
min_number = groups[0]
|
| 284 |
+
max_number = groups[1]
|
| 285 |
+
currency = groups[2] if len(groups) >= 3 else 'rupees'
|
| 286 |
+
|
| 287 |
+
entities['numbers'].append({
|
| 288 |
+
'value': float(min_number.replace(',', '')),
|
| 289 |
+
'currency': currency,
|
| 290 |
+
'comparator': 'between_min'
|
| 291 |
+
})
|
| 292 |
+
entities['numbers'].append({
|
| 293 |
+
'value': float(max_number.replace(',', '')),
|
| 294 |
+
'currency': currency,
|
| 295 |
+
'comparator': 'between_max'
|
| 296 |
+
})
|
| 297 |
+
else:
|
| 298 |
+
# Pattern: (number) (currency)
|
| 299 |
+
number = groups[0]
|
| 300 |
+
currency = groups[1] if len(groups) >= 2 else 'rupees'
|
| 301 |
+
|
| 302 |
+
entities['numbers'].append({
|
| 303 |
+
'value': float(number.replace(',', '')),
|
| 304 |
+
'currency': currency,
|
| 305 |
+
'comparator': None
|
| 306 |
+
})
|
| 307 |
+
|
| 308 |
+
# Extract comparators
|
| 309 |
+
comparator_patterns = [
|
| 310 |
+
r'(cheaper|more\s+expensive|better|worse|faster|slower)',
|
| 311 |
+
r'(compare|difference|vs|versus)',
|
| 312 |
+
r'(under|below|less\s+than|over|above|more\s+than)'
|
| 313 |
+
]
|
| 314 |
+
|
| 315 |
+
for pattern in comparator_patterns:
|
| 316 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 317 |
+
for match in matches:
|
| 318 |
+
entities['comparators'].append(match.group(1).lower())
|
| 319 |
+
|
| 320 |
+
return entities
|
| 321 |
+
|
| 322 |
+
def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
|
| 323 |
+
"""Classify the intent of the query"""
|
| 324 |
+
intent_scores = {}
|
| 325 |
+
|
| 326 |
+
for intent, keywords in self.intent_keywords.items():
|
| 327 |
+
score = 0
|
| 328 |
+
for keyword in keywords:
|
| 329 |
+
if keyword in query:
|
| 330 |
+
score += 1
|
| 331 |
+
intent_scores[intent] = score
|
| 332 |
+
|
| 333 |
+
# Get primary intent
|
| 334 |
+
primary_intent = max(intent_scores.items(), key=lambda x: x[1])
|
| 335 |
+
|
| 336 |
+
# Check for specific patterns with priority
|
| 337 |
+
if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
|
| 338 |
+
primary_intent = ('comparison', 10)
|
| 339 |
+
elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
|
| 340 |
+
primary_intent = ('recommendation', 10)
|
| 341 |
+
elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
|
| 342 |
+
primary_intent = ('range_search', 10)
|
| 343 |
+
elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
|
| 344 |
+
# Check if we have at least 2 locations
|
| 345 |
+
if entities and len(entities.get('locations', [])) >= 2:
|
| 346 |
+
primary_intent = ('fare_inquiry', 10)
|
| 347 |
+
elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
|
| 348 |
+
primary_intent = ('route_inquiry', 10)
|
| 349 |
+
|
| 350 |
+
return {
|
| 351 |
+
'primary': primary_intent[0],
|
| 352 |
+
'confidence': primary_intent[1] / 10,
|
| 353 |
+
'all_scores': intent_scores
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
|
| 357 |
+
"""Generate Cypher query using LLM for better understanding"""
|
| 358 |
+
try:
|
| 359 |
+
# Try LLM-based query generation first
|
| 360 |
+
llm_query = self._generate_cypher_with_llm(query, intent, entities)
|
| 361 |
+
if llm_query:
|
| 362 |
+
return llm_query
|
| 363 |
+
except Exception as e:
|
| 364 |
+
print(f"LLM query generation failed: {e}")
|
| 365 |
+
|
| 366 |
+
# Fallback to rule-based generation
|
| 367 |
+
primary_intent = intent['primary']
|
| 368 |
+
|
| 369 |
+
if primary_intent == 'fare_inquiry':
|
| 370 |
+
return self._generate_fare_query(entities)
|
| 371 |
+
elif primary_intent == 'comparison':
|
| 372 |
+
return self._generate_comparison_query(entities)
|
| 373 |
+
elif primary_intent == 'route_inquiry':
|
| 374 |
+
return self._generate_route_query(entities, query)
|
| 375 |
+
elif primary_intent == 'statistics':
|
| 376 |
+
return self._generate_statistics_query(entities)
|
| 377 |
+
elif primary_intent == 'recommendation':
|
| 378 |
+
return self._generate_recommendation_query(entities, query)
|
| 379 |
+
elif primary_intent == 'range_search':
|
| 380 |
+
return self._generate_range_query(entities)
|
| 381 |
+
else:
|
| 382 |
+
return self._generate_fallback_query(query)
|
| 383 |
+
|
| 384 |
+
def _generate_fare_query(self, entities: Dict) -> Optional[str]:
|
| 385 |
+
"""Generate fare inquiry Cypher query"""
|
| 386 |
+
locations = entities.get('locations', [])
|
| 387 |
+
|
| 388 |
+
if len(locations) >= 2:
|
| 389 |
+
from_loc = locations[0]['corrected']
|
| 390 |
+
to_loc = locations[1]['corrected']
|
| 391 |
+
|
| 392 |
+
return f"""
|
| 393 |
+
MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
|
| 394 |
+
RETURN
|
| 395 |
+
a.name as from_place,
|
| 396 |
+
b.name as to_place,
|
| 397 |
+
r.fare as fare,
|
| 398 |
+
'Direct route' as route_type
|
| 399 |
+
"""
|
| 400 |
+
|
| 401 |
+
return None
|
| 402 |
+
|
| 403 |
+
def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
|
| 404 |
+
"""Generate comparison Cypher query"""
|
| 405 |
+
locations = entities.get('locations', [])
|
| 406 |
+
|
| 407 |
+
if len(locations) >= 3:
|
| 408 |
+
# Handle case where we have same origin, different destinations
|
| 409 |
+
if len(locations) == 3:
|
| 410 |
+
# Pattern: "Colombo to Kandy and Colombo to Anuradapura"
|
| 411 |
+
route1_from = locations[0]['corrected']
|
| 412 |
+
route1_to = locations[1]['corrected']
|
| 413 |
+
route2_from = locations[0]['corrected'] # Same origin
|
| 414 |
+
route2_to = locations[2]['corrected']
|
| 415 |
+
elif len(locations) >= 4:
|
| 416 |
+
# Pattern: "Colombo to Kandy and Anuradapura to Galle"
|
| 417 |
+
route1_from = locations[0]['corrected']
|
| 418 |
+
route1_to = locations[1]['corrected']
|
| 419 |
+
route2_from = locations[2]['corrected']
|
| 420 |
+
route2_to = locations[3]['corrected']
|
| 421 |
+
else:
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
+
return f"""
|
| 425 |
+
MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
|
| 426 |
+
MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
|
| 427 |
+
RETURN
|
| 428 |
+
a1.name + ' to ' + b1.name as route1,
|
| 429 |
+
r1.fare as fare1,
|
| 430 |
+
a2.name + ' to ' + b2.name as route2,
|
| 431 |
+
r2.fare as fare2,
|
| 432 |
+
r1.fare - r2.fare as difference,
|
| 433 |
+
CASE
|
| 434 |
+
WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
|
| 435 |
+
WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
|
| 436 |
+
ELSE 'Both routes have the same fare'
|
| 437 |
+
END as comparison
|
| 438 |
+
"""
|
| 439 |
+
|
| 440 |
+
return None
|
| 441 |
+
|
| 442 |
+
def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
|
| 443 |
+
"""Generate route inquiry Cypher query"""
|
| 444 |
+
locations = entities.get('locations', [])
|
| 445 |
+
|
| 446 |
+
if 'from' in query and locations:
|
| 447 |
+
location = locations[0]['corrected']
|
| 448 |
+
return f"""
|
| 449 |
+
MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
|
| 450 |
+
RETURN
|
| 451 |
+
a.name as from_place,
|
| 452 |
+
b.name as to_place,
|
| 453 |
+
r.fare as fare
|
| 454 |
+
ORDER BY r.fare
|
| 455 |
+
"""
|
| 456 |
+
elif 'to' in query and locations:
|
| 457 |
+
location = locations[0]['corrected']
|
| 458 |
+
return f"""
|
| 459 |
+
MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
|
| 460 |
+
RETURN
|
| 461 |
+
a.name as from_place,
|
| 462 |
+
b.name as to_place,
|
| 463 |
+
r.fare as fare
|
| 464 |
+
ORDER BY r.fare
|
| 465 |
+
"""
|
| 466 |
+
|
| 467 |
+
return None
|
| 468 |
+
|
| 469 |
+
def _generate_statistics_query(self, entities: Dict) -> str:
|
| 470 |
+
"""Generate statistics Cypher query"""
|
| 471 |
+
return """
|
| 472 |
+
MATCH (p:Place)
|
| 473 |
+
MATCH ()-[r:Fare]->()
|
| 474 |
+
RETURN
|
| 475 |
+
count(DISTINCT p) as total_places,
|
| 476 |
+
count(r) as total_routes,
|
| 477 |
+
round(avg(r.fare), 2) as average_fare,
|
| 478 |
+
min(r.fare) as minimum_fare,
|
| 479 |
+
max(r.fare) as maximum_fare,
|
| 480 |
+
round(stdDev(r.fare), 2) as fare_standard_deviation
|
| 481 |
+
"""
|
| 482 |
+
|
| 483 |
+
def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
|
| 484 |
+
"""Generate recommendation Cypher query"""
|
| 485 |
+
if 'cheap' in query or 'budget' in query or 'affordable' in query:
|
| 486 |
+
return """
|
| 487 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 488 |
+
RETURN
|
| 489 |
+
a.name as from_place,
|
| 490 |
+
b.name as to_place,
|
| 491 |
+
r.fare as fare
|
| 492 |
+
ORDER BY r.fare ASC
|
| 493 |
+
LIMIT 10
|
| 494 |
+
"""
|
| 495 |
+
elif 'popular' in query or 'frequent' in query:
|
| 496 |
+
return """
|
| 497 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 498 |
+
RETURN
|
| 499 |
+
a.name as from_place,
|
| 500 |
+
b.name as to_place,
|
| 501 |
+
r.fare as fare
|
| 502 |
+
ORDER BY r.fare DESC
|
| 503 |
+
LIMIT 10
|
| 504 |
+
"""
|
| 505 |
+
else:
|
| 506 |
+
return """
|
| 507 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 508 |
+
RETURN
|
| 509 |
+
a.name as from_place,
|
| 510 |
+
b.name as to_place,
|
| 511 |
+
r.fare as fare
|
| 512 |
+
ORDER BY r.fare ASC
|
| 513 |
+
LIMIT 5
|
| 514 |
+
"""
|
| 515 |
+
|
| 516 |
+
def _generate_range_query(self, entities: Dict) -> Optional[str]:
|
| 517 |
+
"""Generate range search Cypher query"""
|
| 518 |
+
numbers = entities.get('numbers', [])
|
| 519 |
+
|
| 520 |
+
if numbers:
|
| 521 |
+
# Check for between range
|
| 522 |
+
between_min = None
|
| 523 |
+
between_max = None
|
| 524 |
+
single_value = None
|
| 525 |
+
single_comparator = None
|
| 526 |
+
|
| 527 |
+
for number in numbers:
|
| 528 |
+
comparator = number.get('comparator', '')
|
| 529 |
+
value = number['value']
|
| 530 |
+
|
| 531 |
+
if comparator == 'between_min':
|
| 532 |
+
between_min = value
|
| 533 |
+
elif comparator == 'between_max':
|
| 534 |
+
between_max = value
|
| 535 |
+
elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
|
| 536 |
+
single_value = value
|
| 537 |
+
single_comparator = comparator
|
| 538 |
+
|
| 539 |
+
# Generate query based on type
|
| 540 |
+
if between_min is not None and between_max is not None:
|
| 541 |
+
return f"""
|
| 542 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 543 |
+
WHERE r.fare >= {between_min} AND r.fare <= {between_max}
|
| 544 |
+
RETURN
|
| 545 |
+
a.name as from_place,
|
| 546 |
+
b.name as to_place,
|
| 547 |
+
r.fare as fare
|
| 548 |
+
ORDER BY r.fare ASC
|
| 549 |
+
"""
|
| 550 |
+
elif single_value is not None and single_comparator is not None:
|
| 551 |
+
if single_comparator in ['under', 'below', 'less than']:
|
| 552 |
+
return f"""
|
| 553 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 554 |
+
WHERE r.fare < {single_value}
|
| 555 |
+
RETURN
|
| 556 |
+
a.name as from_place,
|
| 557 |
+
b.name as to_place,
|
| 558 |
+
r.fare as fare
|
| 559 |
+
ORDER BY r.fare ASC
|
| 560 |
+
"""
|
| 561 |
+
elif single_comparator in ['over', 'above', 'more than']:
|
| 562 |
+
return f"""
|
| 563 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 564 |
+
WHERE r.fare > {single_value}
|
| 565 |
+
RETURN
|
| 566 |
+
a.name as from_place,
|
| 567 |
+
b.name as to_place,
|
| 568 |
+
r.fare as fare
|
| 569 |
+
ORDER BY r.fare DESC
|
| 570 |
+
"""
|
| 571 |
+
|
| 572 |
+
return None
|
| 573 |
+
|
| 574 |
+
def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
|
| 575 |
+
"""Generate Cypher query using LLM for better understanding"""
|
| 576 |
+
try:
|
| 577 |
+
if not self.config.OPENAI_API_KEY:
|
| 578 |
+
return None
|
| 579 |
+
|
| 580 |
+
# Get available places for context
|
| 581 |
+
available_places = list(self.neo4j_service.get_all_places())
|
| 582 |
+
|
| 583 |
+
# Create comprehensive prompt for Cypher generation
|
| 584 |
+
prompt = f"""
|
| 585 |
+
You are a Neo4j Cypher query generator for a transport database.
|
| 586 |
+
|
| 587 |
+
Database Schema:
|
| 588 |
+
- Nodes: Place (with property 'name')
|
| 589 |
+
- Relationships: Fare (with property 'fare')
|
| 590 |
+
|
| 591 |
+
Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
|
| 592 |
+
|
| 593 |
+
User Query: "{query}"
|
| 594 |
+
Detected Intent: {intent.get('primary', 'unknown')}
|
| 595 |
+
Extracted Entities: {entities}
|
| 596 |
+
|
| 597 |
+
Your task is to generate a valid Cypher query that answers the user's question.
|
| 598 |
+
|
| 599 |
+
Query Types and Examples:
|
| 600 |
+
|
| 601 |
+
1. FARE INQUIRY:
|
| 602 |
+
- "What is the fare from Colombo to Kandy?"
|
| 603 |
+
- Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 604 |
+
|
| 605 |
+
2. COMPARISON:
|
| 606 |
+
- "Compare fares from Colombo to Kandy vs Colombo to Galle"
|
| 607 |
+
- Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
|
| 608 |
+
|
| 609 |
+
3. RANGE SEARCH:
|
| 610 |
+
- "Find routes under 500 rupees"
|
| 611 |
+
- Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
|
| 612 |
+
|
| 613 |
+
4. RECOMMENDATION:
|
| 614 |
+
- "Recommend cheap routes"
|
| 615 |
+
- Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
|
| 616 |
+
|
| 617 |
+
5. STATISTICS:
|
| 618 |
+
- "What is the average fare?"
|
| 619 |
+
- Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 620 |
+
|
| 621 |
+
6. ROUTE INQUIRY:
|
| 622 |
+
- "Routes from Colombo"
|
| 623 |
+
- Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
|
| 624 |
+
|
| 625 |
+
Important Rules:
|
| 626 |
+
1. Always use proper Cypher syntax
|
| 627 |
+
2. Use exact place names from the available places list
|
| 628 |
+
3. For comparisons, use multiple MATCH clauses
|
| 629 |
+
4. For ranges, use WHERE clauses with appropriate operators
|
| 630 |
+
5. For statistics, use aggregation functions
|
| 631 |
+
6. Always include meaningful column aliases
|
| 632 |
+
7. Use ORDER BY for sorted results
|
| 633 |
+
8. Use LIMIT for large result sets
|
| 634 |
+
|
| 635 |
+
Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
|
| 636 |
+
"""
|
| 637 |
+
|
| 638 |
+
cypher_query = None
|
| 639 |
+
# Prefer new SDK
|
| 640 |
+
try:
|
| 641 |
+
from openai import OpenAI
|
| 642 |
+
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 643 |
+
response = client.chat.completions.create(
|
| 644 |
+
model=self.config.OPENAI_MODEL,
|
| 645 |
+
messages=[
|
| 646 |
+
{"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
|
| 647 |
+
{"role": "user", "content": prompt}
|
| 648 |
+
],
|
| 649 |
+
max_tokens=300,
|
| 650 |
+
temperature=0.1
|
| 651 |
+
)
|
| 652 |
+
cypher_query = response.choices[0].message.content.strip()
|
| 653 |
+
except Exception as sdk_err:
|
| 654 |
+
import openai
|
| 655 |
+
try:
|
| 656 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 657 |
+
response = openai.ChatCompletion.create(
|
| 658 |
+
model=self.config.OPENAI_MODEL,
|
| 659 |
+
messages=[
|
| 660 |
+
{"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
|
| 661 |
+
{"role": "user", "content": prompt}
|
| 662 |
+
],
|
| 663 |
+
max_tokens=300,
|
| 664 |
+
temperature=0.1
|
| 665 |
+
)
|
| 666 |
+
cypher_query = response.choices[0].message.content.strip()
|
| 667 |
+
except Exception:
|
| 668 |
+
raise sdk_err
|
| 669 |
+
|
| 670 |
+
# Validate the response
|
| 671 |
+
if cypher_query.upper() == "FALLBACK":
|
| 672 |
+
return None
|
| 673 |
+
|
| 674 |
+
# Basic validation - check if it starts with MATCH
|
| 675 |
+
if cypher_query.upper().startswith('MATCH'):
|
| 676 |
+
return cypher_query
|
| 677 |
+
|
| 678 |
+
return None
|
| 679 |
+
|
| 680 |
+
except Exception as e:
|
| 681 |
+
print(f"LLM Cypher generation error: {e}")
|
| 682 |
+
return None
|
| 683 |
+
|
| 684 |
+
def _generate_fallback_query(self, query: str) -> Optional[str]:
|
| 685 |
+
"""Generate fallback query when intent is unclear"""
|
| 686 |
+
# Try to extract locations using spell corrector
|
| 687 |
+
locations = self.spell_corrector.extract_locations_from_query(query)
|
| 688 |
+
|
| 689 |
+
if len(locations) >= 2:
|
| 690 |
+
from_loc = locations[0][1]
|
| 691 |
+
to_loc = locations[1][1]
|
| 692 |
+
return f"""
|
| 693 |
+
MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
|
| 694 |
+
RETURN
|
| 695 |
+
a.name as from_place,
|
| 696 |
+
b.name as to_place,
|
| 697 |
+
r.fare as fare
|
| 698 |
+
"""
|
| 699 |
+
|
| 700 |
+
# Additional fallback: direct pattern matching for fare queries
|
| 701 |
+
if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
|
| 702 |
+
import re
|
| 703 |
+
fare_patterns = [
|
| 704 |
+
r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 705 |
+
r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 706 |
+
r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 707 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 708 |
+
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
|
| 709 |
+
]
|
| 710 |
+
|
| 711 |
+
for pattern in fare_patterns:
|
| 712 |
+
match = re.search(pattern, query.lower())
|
| 713 |
+
if match:
|
| 714 |
+
from_loc = match.group(1).strip()
|
| 715 |
+
to_loc = match.group(2).strip()
|
| 716 |
+
|
| 717 |
+
# Correct locations
|
| 718 |
+
from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
|
| 719 |
+
to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
|
| 720 |
+
|
| 721 |
+
if from_conf > 0.5 and to_conf > 0.5:
|
| 722 |
+
return f"""
|
| 723 |
+
MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
|
| 724 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 725 |
+
"""
|
| 726 |
+
|
| 727 |
+
return None
|
| 728 |
+
|
| 729 |
+
def _execute_query(self, cypher_query: str) -> List[Dict]:
|
| 730 |
+
"""Execute Cypher query and return results"""
|
| 731 |
+
try:
|
| 732 |
+
with self.neo4j_service.driver.session() as session:
|
| 733 |
+
result = session.run(cypher_query)
|
| 734 |
+
return [dict(record) for record in result]
|
| 735 |
+
except Exception as e:
|
| 736 |
+
print(f"Query execution error: {e}")
|
| 737 |
+
return []
|
| 738 |
+
|
| 739 |
+
def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
|
| 740 |
+
"""Format the response based on intent and results"""
|
| 741 |
+
primary_intent = intent['primary']
|
| 742 |
+
|
| 743 |
+
if not results:
|
| 744 |
+
return {
|
| 745 |
+
'success': False,
|
| 746 |
+
'message': 'No results found for your query.',
|
| 747 |
+
'suggestions': self._get_suggestions()
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
if primary_intent == 'fare_inquiry':
|
| 751 |
+
return self._format_fare_response(results, entities)
|
| 752 |
+
elif primary_intent == 'comparison':
|
| 753 |
+
return self._format_comparison_response(results, entities)
|
| 754 |
+
elif primary_intent == 'route_inquiry':
|
| 755 |
+
return self._format_route_response(results, entities)
|
| 756 |
+
elif primary_intent == 'statistics':
|
| 757 |
+
return self._format_statistics_response(results)
|
| 758 |
+
elif primary_intent == 'recommendation':
|
| 759 |
+
return self._format_recommendation_response(results, query)
|
| 760 |
+
elif primary_intent == 'range_search':
|
| 761 |
+
return self._format_range_response(results, entities)
|
| 762 |
+
else:
|
| 763 |
+
return self._format_generic_response(results)
|
| 764 |
+
|
| 765 |
+
def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 766 |
+
"""Format fare inquiry response"""
|
| 767 |
+
if results:
|
| 768 |
+
result = results[0]
|
| 769 |
+
return {
|
| 770 |
+
'success': True,
|
| 771 |
+
'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
|
| 772 |
+
'data': results,
|
| 773 |
+
'query_type': 'fare_inquiry',
|
| 774 |
+
'summary': {
|
| 775 |
+
'from_place': result['from_place'],
|
| 776 |
+
'to_place': result['to_place'],
|
| 777 |
+
'fare': result['fare']
|
| 778 |
+
}
|
| 779 |
+
}
|
| 780 |
+
return {'success': False, 'message': 'Fare information not found.'}
|
| 781 |
+
|
| 782 |
+
def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 783 |
+
"""Format comparison response"""
|
| 784 |
+
if results:
|
| 785 |
+
result = results[0]
|
| 786 |
+
return {
|
| 787 |
+
'success': True,
|
| 788 |
+
'message': result.get('comparison', 'Comparison completed'),
|
| 789 |
+
'data': results,
|
| 790 |
+
'query_type': 'comparison',
|
| 791 |
+
'summary': {
|
| 792 |
+
'route1': result.get('route1'),
|
| 793 |
+
'route2': result.get('route2'),
|
| 794 |
+
'difference': result.get('difference')
|
| 795 |
+
}
|
| 796 |
+
}
|
| 797 |
+
return {'success': False, 'message': 'Comparison not possible.'}
|
| 798 |
+
|
| 799 |
+
def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 800 |
+
"""Format route inquiry response"""
|
| 801 |
+
return {
|
| 802 |
+
'success': True,
|
| 803 |
+
'message': f"Found {len(results)} routes",
|
| 804 |
+
'data': results,
|
| 805 |
+
'query_type': 'route_inquiry',
|
| 806 |
+
'summary': {
|
| 807 |
+
'total_routes': len(results),
|
| 808 |
+
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 809 |
+
}
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
|
| 813 |
+
"""Format statistics response"""
|
| 814 |
+
if results:
|
| 815 |
+
stats = results[0]
|
| 816 |
+
return {
|
| 817 |
+
'success': True,
|
| 818 |
+
'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
|
| 819 |
+
'data': results,
|
| 820 |
+
'query_type': 'statistics',
|
| 821 |
+
'summary': {
|
| 822 |
+
'total_places': stats['total_places'],
|
| 823 |
+
'total_routes': stats['total_routes'],
|
| 824 |
+
'average_fare': stats['average_fare'],
|
| 825 |
+
'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
|
| 826 |
+
}
|
| 827 |
+
}
|
| 828 |
+
return {'success': False, 'message': 'Statistics not available.'}
|
| 829 |
+
|
| 830 |
+
def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
|
| 831 |
+
"""Format recommendation response"""
|
| 832 |
+
return {
|
| 833 |
+
'success': True,
|
| 834 |
+
'message': f"Here are {len(results)} recommended routes",
|
| 835 |
+
'data': results,
|
| 836 |
+
'query_type': 'recommendation',
|
| 837 |
+
'summary': {
|
| 838 |
+
'recommendations_count': len(results),
|
| 839 |
+
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 840 |
+
}
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 844 |
+
"""Format range search response"""
|
| 845 |
+
return {
|
| 846 |
+
'success': True,
|
| 847 |
+
'message': f"Found {len(results)} routes in your specified range",
|
| 848 |
+
'data': results,
|
| 849 |
+
'query_type': 'range_search',
|
| 850 |
+
'summary': {
|
| 851 |
+
'routes_found': len(results),
|
| 852 |
+
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 853 |
+
}
|
| 854 |
+
}
|
| 855 |
+
|
| 856 |
+
def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
|
| 857 |
+
"""Format generic response"""
|
| 858 |
+
return {
|
| 859 |
+
'success': True,
|
| 860 |
+
'message': f"Found {len(results)} results",
|
| 861 |
+
'data': results,
|
| 862 |
+
'query_type': 'generic'
|
| 863 |
+
}
|
| 864 |
+
|
| 865 |
+
def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
|
| 866 |
+
"""Handle unclear or ambiguous queries"""
|
| 867 |
+
return {
|
| 868 |
+
'success': False,
|
| 869 |
+
'message': 'I could not understand your query. Please try rephrasing it.',
|
| 870 |
+
'suggestions': self._get_suggestions(),
|
| 871 |
+
'query_type': 'unclear'
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
|
| 875 |
+
"""Calculate confidence score for the query interpretation"""
|
| 876 |
+
confidence = 0.0
|
| 877 |
+
|
| 878 |
+
# Intent confidence
|
| 879 |
+
confidence += intent.get('confidence', 0) * 0.4
|
| 880 |
+
|
| 881 |
+
# Entity confidence
|
| 882 |
+
locations = entities.get('locations', [])
|
| 883 |
+
if locations:
|
| 884 |
+
avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
|
| 885 |
+
confidence += avg_location_confidence * 0.4
|
| 886 |
+
|
| 887 |
+
# Query complexity bonus
|
| 888 |
+
if len(locations) >= 2:
|
| 889 |
+
confidence += 0.2
|
| 890 |
+
|
| 891 |
+
return min(confidence, 1.0)
|
| 892 |
+
|
| 893 |
+
def _get_suggestions(self) -> List[str]:
|
| 894 |
+
"""Get query suggestions"""
|
| 895 |
+
return [
|
| 896 |
+
"What is the fare from Colombo to Kandy?",
|
| 897 |
+
"Compare fares from Colombo to Kandy vs Colombo to Galle",
|
| 898 |
+
"Show me routes from Panadura",
|
| 899 |
+
"Find routes under 500 rupees",
|
| 900 |
+
"What are the cheapest routes?",
|
| 901 |
+
"Show me popular destinations",
|
| 902 |
+
"Give me database statistics",
|
| 903 |
+
"Recommend affordable routes"
|
| 904 |
+
]
|
language_detector.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Language Detection Service
|
| 4 |
+
Auto-detects user input language: Sinhala, Singlish, English, Tamil
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from typing import Dict, Any, Optional, Tuple
|
| 9 |
+
from logger import get_logger
|
| 10 |
+
|
| 11 |
+
class LanguageDetector:
|
| 12 |
+
"""Detects language of user input with support for Sinhala, Singlish, English, and Tamil"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.logger = get_logger(self.__class__.__name__)
|
| 16 |
+
|
| 17 |
+
# Unicode ranges for different scripts
|
| 18 |
+
self.script_ranges = {
|
| 19 |
+
'sinhala': re.compile(r'[\u0D80-\u0DFF]'), # Sinhala script
|
| 20 |
+
'tamil': re.compile(r'[\u0B80-\u0BFF]'), # Tamil script
|
| 21 |
+
'english': re.compile(r'[a-zA-Z]'), # Latin script
|
| 22 |
+
'numbers': re.compile(r'[0-9]'), # Numbers
|
| 23 |
+
'punctuation': re.compile(r'[^\w\s]') # Punctuation
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
# Common Singlish patterns (Sinhala + English mixed)
|
| 27 |
+
self.singlish_patterns = [
|
| 28 |
+
r'[\u0D80-\u0DFF]+[a-zA-Z]+', # Sinhala followed by English
|
| 29 |
+
r'[a-zA-Z]+[\u0D80-\u0DFF]+', # English followed by Sinhala
|
| 30 |
+
r'[\u0D80-\u0DFF]+\s+[a-zA-Z]+', # Sinhala word followed by English word
|
| 31 |
+
r'[a-zA-Z]+\s+[\u0D80-\u0DFF]+', # English word followed by Sinhala word
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
# Common Singlish words/phrases
|
| 35 |
+
self.singlish_indicators = [
|
| 36 |
+
'bus', 'fare', 'price', 'cost', 'route', 'ticket', 'station',
|
| 37 |
+
'colombo', 'kandy', 'galle', 'matara', 'anuradhapura', 'panadura',
|
| 38 |
+
'rupees', 'rs', 'lkr', 'how much', 'what is', 'show me', 'find',
|
| 39 |
+
'from', 'to', 'and', 'or', 'the', 'a', 'an', 'is', 'are', 'was', 'were'
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
# Tamil transport terms (for detection)
|
| 43 |
+
self.tamil_transport_terms = [
|
| 44 |
+
'பேருந்து', 'கட்டணம்', 'விலை', 'செலவு', 'பாதை', 'டிக்கெட்', 'நிலையம்',
|
| 45 |
+
'கொழும்பு', 'கண்டி', 'காலி', 'மாத்தறை', 'அனுராதபுரம்', 'பனதுரை',
|
| 46 |
+
'ரூபாய்', 'எவ்வளவு', 'என்ன', 'காட்டு', 'கண்டுபிடி', 'இருந்து', 'வரை',
|
| 47 |
+
'மற்றும்', 'அல்லது', 'இது', 'அது', 'உள்ளது', 'இருக்கிறது'
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# Sinhala transport terms (for detection)
|
| 51 |
+
self.sinhala_transport_terms = [
|
| 52 |
+
'බස්', 'ගාස්තු', 'මිල', 'වාරික', 'මාර්ග', 'ටිකට්', 'නිලය',
|
| 53 |
+
'කොළඹ', 'මහනුවර', 'ගාල්ල', 'මාතර', 'අනුරාධපුර', 'පානදුර',
|
| 54 |
+
'රුපියල්', 'කීයද', 'මොනවාද', 'පෙන්වන්න', 'සොයන්න', 'සිට', 'ට',
|
| 55 |
+
'සහ', 'හෝ', 'මේ', 'ඒ', 'කියලා', 'ඉන්නවා'
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
def detect_language(self, text: str) -> Dict[str, Any]:
|
| 59 |
+
"""
|
| 60 |
+
Detect the language of the input text
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
text: Input text to analyze
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Dictionary with language detection results
|
| 67 |
+
"""
|
| 68 |
+
if not text or not text.strip():
|
| 69 |
+
return {
|
| 70 |
+
'language': 'unknown',
|
| 71 |
+
'confidence': 0.0,
|
| 72 |
+
'details': {
|
| 73 |
+
'script_analysis': {},
|
| 74 |
+
'pattern_matches': [],
|
| 75 |
+
'reasoning': 'Empty or whitespace-only text'
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
# Clean and normalize text
|
| 80 |
+
clean_text = text.strip()
|
| 81 |
+
|
| 82 |
+
# Analyze script composition
|
| 83 |
+
script_analysis = self._analyze_scripts(clean_text)
|
| 84 |
+
|
| 85 |
+
# Check for Singlish patterns
|
| 86 |
+
singlish_matches = self._detect_singlish(clean_text)
|
| 87 |
+
|
| 88 |
+
# Determine primary language
|
| 89 |
+
language, confidence, reasoning = self._determine_language(script_analysis, singlish_matches, clean_text)
|
| 90 |
+
|
| 91 |
+
return {
|
| 92 |
+
'language': language,
|
| 93 |
+
'confidence': confidence,
|
| 94 |
+
'details': {
|
| 95 |
+
'script_analysis': script_analysis,
|
| 96 |
+
'singlish_matches': singlish_matches,
|
| 97 |
+
'reasoning': reasoning,
|
| 98 |
+
'original_text': text,
|
| 99 |
+
'clean_text': clean_text
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
def _analyze_scripts(self, text: str) -> Dict[str, Any]:
|
| 104 |
+
"""Analyze the script composition of the text"""
|
| 105 |
+
analysis = {
|
| 106 |
+
'sinhala_chars': 0,
|
| 107 |
+
'tamil_chars': 0,
|
| 108 |
+
'english_chars': 0,
|
| 109 |
+
'number_chars': 0,
|
| 110 |
+
'punctuation_chars': 0,
|
| 111 |
+
'total_chars': len(text),
|
| 112 |
+
'sinhala_ratio': 0.0,
|
| 113 |
+
'tamil_ratio': 0.0,
|
| 114 |
+
'english_ratio': 0.0,
|
| 115 |
+
'mixed_script': False
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
for char in text:
|
| 119 |
+
if self.script_ranges['sinhala'].match(char):
|
| 120 |
+
analysis['sinhala_chars'] += 1
|
| 121 |
+
elif self.script_ranges['tamil'].match(char):
|
| 122 |
+
analysis['tamil_chars'] += 1
|
| 123 |
+
elif self.script_ranges['english'].match(char):
|
| 124 |
+
analysis['english_chars'] += 1
|
| 125 |
+
elif self.script_ranges['numbers'].match(char):
|
| 126 |
+
analysis['number_chars'] += 1
|
| 127 |
+
elif self.script_ranges['punctuation'].match(char):
|
| 128 |
+
analysis['punctuation_chars'] += 1
|
| 129 |
+
|
| 130 |
+
# Calculate ratios
|
| 131 |
+
if analysis['total_chars'] > 0:
|
| 132 |
+
analysis['sinhala_ratio'] = analysis['sinhala_chars'] / analysis['total_chars']
|
| 133 |
+
analysis['tamil_ratio'] = analysis['tamil_chars'] / analysis['total_chars']
|
| 134 |
+
analysis['english_ratio'] = analysis['english_chars'] / analysis['total_chars']
|
| 135 |
+
|
| 136 |
+
# Check for mixed script
|
| 137 |
+
script_count = sum([
|
| 138 |
+
analysis['sinhala_chars'] > 0,
|
| 139 |
+
analysis['tamil_chars'] > 0,
|
| 140 |
+
analysis['english_chars'] > 0
|
| 141 |
+
])
|
| 142 |
+
analysis['mixed_script'] = script_count > 1
|
| 143 |
+
|
| 144 |
+
return analysis
|
| 145 |
+
|
| 146 |
+
def _detect_singlish(self, text: str) -> Dict[str, Any]:
|
| 147 |
+
"""Detect Singlish patterns in the text"""
|
| 148 |
+
matches = {
|
| 149 |
+
'pattern_matches': [],
|
| 150 |
+
'indicator_words': [],
|
| 151 |
+
'is_singlish': False,
|
| 152 |
+
'confidence': 0.0
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Check for Singlish patterns
|
| 156 |
+
for pattern in self.singlish_patterns:
|
| 157 |
+
pattern_matches = re.findall(pattern, text)
|
| 158 |
+
if pattern_matches:
|
| 159 |
+
matches['pattern_matches'].extend(pattern_matches)
|
| 160 |
+
|
| 161 |
+
# Check for Singlish indicator words
|
| 162 |
+
text_lower = text.lower()
|
| 163 |
+
for indicator in self.singlish_indicators:
|
| 164 |
+
if indicator in text_lower:
|
| 165 |
+
matches['indicator_words'].append(indicator)
|
| 166 |
+
|
| 167 |
+
# Determine if it's Singlish
|
| 168 |
+
if matches['pattern_matches'] or len(matches['indicator_words']) >= 2:
|
| 169 |
+
matches['is_singlish'] = True
|
| 170 |
+
matches['confidence'] = min(0.9, 0.3 + (len(matches['pattern_matches']) * 0.2) + (len(matches['indicator_words']) * 0.1))
|
| 171 |
+
|
| 172 |
+
return matches
|
| 173 |
+
|
| 174 |
+
def _determine_language(self, script_analysis: Dict, singlish_matches: Dict, text: str) -> Tuple[str, float, str]:
|
| 175 |
+
"""Determine the primary language based on analysis"""
|
| 176 |
+
|
| 177 |
+
# High confidence cases
|
| 178 |
+
if singlish_matches['is_singlish'] and singlish_matches['confidence'] > 0.6:
|
| 179 |
+
return 'singlish', singlish_matches['confidence'], 'Detected Singlish patterns and indicator words'
|
| 180 |
+
|
| 181 |
+
# Pure script cases
|
| 182 |
+
if script_analysis['sinhala_ratio'] > 0.7 and script_analysis['tamil_ratio'] == 0:
|
| 183 |
+
return 'sinhala', script_analysis['sinhala_ratio'], 'High Sinhala script ratio'
|
| 184 |
+
|
| 185 |
+
if script_analysis['tamil_ratio'] > 0.7 and script_analysis['sinhala_ratio'] == 0:
|
| 186 |
+
return 'tamil', script_analysis['tamil_ratio'], 'High Tamil script ratio'
|
| 187 |
+
|
| 188 |
+
if script_analysis['english_ratio'] > 0.7 and script_analysis['sinhala_ratio'] == 0 and script_analysis['tamil_ratio'] == 0:
|
| 189 |
+
return 'english', script_analysis['english_ratio'], 'High English script ratio'
|
| 190 |
+
|
| 191 |
+
# Mixed cases with dominant script
|
| 192 |
+
if script_analysis['sinhala_ratio'] > 0.4:
|
| 193 |
+
confidence = script_analysis['sinhala_ratio']
|
| 194 |
+
if script_analysis['english_ratio'] > 0.2:
|
| 195 |
+
return 'singlish', confidence, 'Sinhala-dominant mixed text with English'
|
| 196 |
+
return 'sinhala', confidence, 'Sinhala-dominant text'
|
| 197 |
+
|
| 198 |
+
if script_analysis['tamil_ratio'] > 0.4:
|
| 199 |
+
confidence = script_analysis['tamil_ratio']
|
| 200 |
+
if script_analysis['english_ratio'] > 0.2:
|
| 201 |
+
return 'tamil_english', confidence, 'Tamil-dominant mixed text with English'
|
| 202 |
+
return 'tamil', confidence, 'Tamil-dominant text'
|
| 203 |
+
|
| 204 |
+
if script_analysis['english_ratio'] > 0.4:
|
| 205 |
+
confidence = script_analysis['english_ratio']
|
| 206 |
+
if script_analysis['sinhala_ratio'] > 0.1 or script_analysis['tamil_ratio'] > 0.1:
|
| 207 |
+
return 'singlish', confidence, 'English-dominant mixed text'
|
| 208 |
+
return 'english', confidence, 'English-dominant text'
|
| 209 |
+
|
| 210 |
+
# Fallback: check for specific terms
|
| 211 |
+
text_lower = text.lower()
|
| 212 |
+
sinhala_terms_found = sum(1 for term in self.sinhala_transport_terms if term in text_lower)
|
| 213 |
+
tamil_terms_found = sum(1 for term in self.tamil_transport_terms if term in text_lower)
|
| 214 |
+
|
| 215 |
+
if sinhala_terms_found > tamil_terms_found and sinhala_terms_found > 0:
|
| 216 |
+
return 'sinhala', 0.6, f'Found {sinhala_terms_found} Sinhala transport terms'
|
| 217 |
+
|
| 218 |
+
if tamil_terms_found > sinhala_terms_found and tamil_terms_found > 0:
|
| 219 |
+
return 'tamil', 0.6, f'Found {tamil_terms_found} Tamil transport terms'
|
| 220 |
+
|
| 221 |
+
# Check if it's pure English (no non-Latin characters)
|
| 222 |
+
if script_analysis['english_ratio'] > 0.8 and script_analysis['sinhala_ratio'] == 0 and script_analysis['tamil_ratio'] == 0:
|
| 223 |
+
return 'english', 0.8, 'Pure English text detected'
|
| 224 |
+
|
| 225 |
+
# Default to English if no clear indicators
|
| 226 |
+
return 'english', 0.5, 'Default to English - no clear language indicators'
|
| 227 |
+
|
| 228 |
+
def is_sinhala(self, text: str) -> bool:
|
| 229 |
+
"""Quick check if text is Sinhala"""
|
| 230 |
+
result = self.detect_language(text)
|
| 231 |
+
return result['language'] in ['sinhala', 'singlish']
|
| 232 |
+
|
| 233 |
+
def is_tamil(self, text: str) -> bool:
|
| 234 |
+
"""Quick check if text is Tamil"""
|
| 235 |
+
result = self.detect_language(text)
|
| 236 |
+
return result['language'] in ['tamil', 'tamil_english']
|
| 237 |
+
|
| 238 |
+
def is_english(self, text: str) -> bool:
|
| 239 |
+
"""Quick check if text is English"""
|
| 240 |
+
result = self.detect_language(text)
|
| 241 |
+
return result['language'] == 'english'
|
| 242 |
+
|
| 243 |
+
def is_singlish(self, text: str) -> bool:
|
| 244 |
+
"""Quick check if text is Singlish"""
|
| 245 |
+
result = self.detect_language(text)
|
| 246 |
+
return result['language'] == 'singlish'
|
| 247 |
+
|
| 248 |
+
def get_detection_summary(self, text: str) -> str:
|
| 249 |
+
"""Get a human-readable summary of language detection"""
|
| 250 |
+
result = self.detect_language(text)
|
| 251 |
+
return f"Language: {result['language']} (confidence: {result['confidence']:.2f}) - {result['details']['reasoning']}"
|
llm_query_processor.py
CHANGED
|
@@ -1,351 +1,384 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
LLM-Based Query Processor for Transport Query Application
|
| 4 |
-
Uses AI to interpret queries and generate Cypher queries
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import re
|
| 8 |
-
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
def
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
-
|
| 109 |
-
-
|
| 110 |
-
-
|
| 111 |
-
-
|
| 112 |
-
-
|
| 113 |
-
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
}
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
'
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
"""
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
'
|
| 336 |
-
'
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
LLM-Based Query Processor for Transport Query Application
|
| 4 |
+
Uses Google Gemini AI to interpret queries and generate Cypher queries
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
import json
|
| 9 |
+
from typing import Dict, List, Tuple, Optional
|
| 10 |
+
from spell_corrector import SpellCorrector
|
| 11 |
+
from neo4j_service import Neo4jService
|
| 12 |
+
from config import Config
|
| 13 |
+
import google.generativeai as genai
|
| 14 |
+
import os
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
class LLMQueryProcessor:
|
| 19 |
+
"""Process natural language queries using LLM for interpretation and Cypher generation"""
|
| 20 |
+
|
| 21 |
+
def __init__(self):
|
| 22 |
+
self.config = Config()
|
| 23 |
+
self.spell_corrector = SpellCorrector()
|
| 24 |
+
self.neo4j_service = Neo4jService()
|
| 25 |
+
|
| 26 |
+
# Configure Google Generative AI
|
| 27 |
+
if hasattr(self.config, 'GOOGLE_API_KEY') and self.config.GOOGLE_API_KEY:
|
| 28 |
+
genai.configure(api_key=self.config.GOOGLE_API_KEY)
|
| 29 |
+
self.google_api_available = True
|
| 30 |
+
else:
|
| 31 |
+
# Fallback to hardcoded API key if not in config
|
| 32 |
+
google_api_key = os.getenv("GOOGLE_API_KEY")
|
| 33 |
+
genai.configure(api_key=google_api_key)
|
| 34 |
+
self.google_api_available = True
|
| 35 |
+
|
| 36 |
+
def process_query(self, user_query: str) -> Dict:
|
| 37 |
+
"""
|
| 38 |
+
Process a natural language query using LLM for interpretation
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Dictionary with query results and metadata
|
| 42 |
+
"""
|
| 43 |
+
try:
|
| 44 |
+
# First, extract and correct locations from the query
|
| 45 |
+
locations = self.spell_corrector.extract_locations_from_query(user_query)
|
| 46 |
+
|
| 47 |
+
# Use LLM to interpret the query and generate Cypher
|
| 48 |
+
interpretation = self._interpret_query_with_llm(user_query, locations)
|
| 49 |
+
|
| 50 |
+
if interpretation['success']:
|
| 51 |
+
# Execute the generated Cypher query
|
| 52 |
+
result = self._execute_cypher_query(interpretation['cypher_query'])
|
| 53 |
+
|
| 54 |
+
return {
|
| 55 |
+
'success': True,
|
| 56 |
+
'message': interpretation['message'],
|
| 57 |
+
'cypher_query': interpretation['cypher_query'],
|
| 58 |
+
'data': result,
|
| 59 |
+
'corrections': self._format_corrections(locations),
|
| 60 |
+
'query_type': interpretation['query_type']
|
| 61 |
+
}
|
| 62 |
+
else:
|
| 63 |
+
return {
|
| 64 |
+
'success': False,
|
| 65 |
+
'message': interpretation['message'],
|
| 66 |
+
'suggestions': self._get_query_suggestions()
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f"Query processing error: {e}")
|
| 71 |
+
return {
|
| 72 |
+
'success': False,
|
| 73 |
+
'message': 'An error occurred while processing your query.',
|
| 74 |
+
'suggestions': self._get_query_suggestions()
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
def _interpret_query_with_llm(self, query: str, locations: List[Tuple]) -> Dict:
|
| 78 |
+
"""Use Google Gemini AI to interpret the query and generate appropriate Cypher"""
|
| 79 |
+
try:
|
| 80 |
+
if not self.google_api_available:
|
| 81 |
+
return self._fallback_interpretation(query, locations)
|
| 82 |
+
|
| 83 |
+
# Get available places for context
|
| 84 |
+
available_places = list(self.neo4j_service.get_all_places())
|
| 85 |
+
|
| 86 |
+
# Create comprehensive prompt for query interpretation
|
| 87 |
+
prompt = f"""
|
| 88 |
+
You are an intelligent transport query interpreter for a Neo4j database containing Sri Lankan transport data.
|
| 89 |
+
|
| 90 |
+
Database Schema:
|
| 91 |
+
- Nodes: Place (with property 'name')
|
| 92 |
+
- Relationships: Fare (with property 'fare')
|
| 93 |
+
|
| 94 |
+
Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
|
| 95 |
+
|
| 96 |
+
User Query: "{query}"
|
| 97 |
+
|
| 98 |
+
Extracted Locations: {[f"{orig}->{corr}" for orig, corr, conf, method in locations]}
|
| 99 |
+
|
| 100 |
+
Your task is to:
|
| 101 |
+
1. Determine the query type (fare, cheapest, expensive, places, routes_from, routes_to, statistics, lowest_fare)
|
| 102 |
+
2. Generate the appropriate Cypher query
|
| 103 |
+
3. Provide a clear response message
|
| 104 |
+
|
| 105 |
+
Query Types:
|
| 106 |
+
- fare: Find fare between two specific locations
|
| 107 |
+
- cheapest: Find cheapest routes (top 10)
|
| 108 |
+
- expensive: Find most expensive routes (top 10)
|
| 109 |
+
- places: List all places
|
| 110 |
+
- routes_from: Find routes departing from a location
|
| 111 |
+
- routes_to: Find routes arriving at a location
|
| 112 |
+
- statistics: Get database statistics
|
| 113 |
+
- lowest_fare: Find the single lowest fare with route details
|
| 114 |
+
|
| 115 |
+
Return your response in this exact JSON format:
|
| 116 |
+
{{
|
| 117 |
+
"query_type": "fare|cheapest|expensive|places|routes_from|routes_to|statistics|lowest_fare",
|
| 118 |
+
"cypher_query": "MATCH ... RETURN ...",
|
| 119 |
+
"message": "Clear response message for the user"
|
| 120 |
+
}}
|
| 121 |
+
|
| 122 |
+
Examples:
|
| 123 |
+
- "What is the fare from Colombo to Kandy?" → fare query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 124 |
+
- "fare of anuradhapura to kandy?" → fare query: MATCH (a:Place {name: 'Anuradapura'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 125 |
+
- "Show me the cheapest routes" → cheapest query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
|
| 126 |
+
- "What is the lowest fare?" → lowest_fare query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 1
|
| 127 |
+
- "List all places" → places query: MATCH (p:Place) RETURN DISTINCT p.name as place ORDER BY p.name
|
| 128 |
+
- "Routes from Colombo" → routes_from query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
|
| 129 |
+
- "Database statistics" → statistics query: MATCH (p:Place) MATCH ()-[r:Fare]->() RETURN count(DISTINCT p) as total_places, count(r) as total_routes, avg(r.fare) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 130 |
+
|
| 131 |
+
Keep Cypher queries simple and avoid complex functions like shortestPath. Use direct relationships only.
|
| 132 |
+
|
| 133 |
+
For fare queries, recognize various formats like "fare of X to Y", "fare from X to Y", "price from X to Y", etc.
|
| 134 |
+
"""
|
| 135 |
+
|
| 136 |
+
# Call Google Gemini AI
|
| 137 |
+
interpretation = None
|
| 138 |
+
try:
|
| 139 |
+
# Initialize the Gemini model
|
| 140 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
| 141 |
+
|
| 142 |
+
# Create the full prompt with system instructions
|
| 143 |
+
full_prompt = f"""You are a transport query interpreter for a Neo4j database. Return only valid JSON.
|
| 144 |
+
|
| 145 |
+
{prompt}"""
|
| 146 |
+
|
| 147 |
+
# Generate content using Gemini
|
| 148 |
+
response = model.generate_content(
|
| 149 |
+
full_prompt,
|
| 150 |
+
generation_config=genai.types.GenerationConfig(
|
| 151 |
+
max_output_tokens=500,
|
| 152 |
+
temperature=0.1,
|
| 153 |
+
response_mime_type="application/json"
|
| 154 |
+
)
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Parse the JSON response
|
| 158 |
+
interpretation = json.loads(response.text.strip())
|
| 159 |
+
|
| 160 |
+
except json.JSONDecodeError as json_err:
|
| 161 |
+
print(f"JSON parsing error: {json_err}")
|
| 162 |
+
# Try to extract JSON from response if it's wrapped in text
|
| 163 |
+
try:
|
| 164 |
+
response_text = response.text.strip()
|
| 165 |
+
# Look for JSON-like content in the response
|
| 166 |
+
json_start = response_text.find('{')
|
| 167 |
+
json_end = response_text.rfind('}') + 1
|
| 168 |
+
if json_start != -1 and json_end > json_start:
|
| 169 |
+
json_content = response_text[json_start:json_end]
|
| 170 |
+
interpretation = json.loads(json_content)
|
| 171 |
+
else:
|
| 172 |
+
raise json_err
|
| 173 |
+
except Exception:
|
| 174 |
+
print(f"Could not parse response: {response.text}")
|
| 175 |
+
raise json_err
|
| 176 |
+
|
| 177 |
+
# Validate the response
|
| 178 |
+
if interpretation and 'query_type' in interpretation and 'cypher_query' in interpretation and 'message' in interpretation:
|
| 179 |
+
return {
|
| 180 |
+
'success': True,
|
| 181 |
+
'query_type': interpretation['query_type'],
|
| 182 |
+
'cypher_query': interpretation['cypher_query'],
|
| 183 |
+
'message': interpretation['message']
|
| 184 |
+
}
|
| 185 |
+
else:
|
| 186 |
+
return self._fallback_interpretation(query, locations)
|
| 187 |
+
|
| 188 |
+
except Exception as e:
|
| 189 |
+
error_message = str(e)
|
| 190 |
+
print(f"Google Gemini AI interpretation error: {e}")
|
| 191 |
+
|
| 192 |
+
# Handle specific Google API errors
|
| 193 |
+
if "quota" in error_message.lower() or "limit" in error_message.lower():
|
| 194 |
+
print("⚠️ Google API quota exceeded, falling back to rule-based interpretation")
|
| 195 |
+
elif "API_KEY_INVALID" in error_message or "authentication" in error_message.lower():
|
| 196 |
+
print("⚠️ Google API authentication failed, falling back to rule-based interpretation")
|
| 197 |
+
elif "models/gemini" in error_message.lower():
|
| 198 |
+
print("⚠️ Gemini model not available, falling back to rule-based interpretation")
|
| 199 |
+
|
| 200 |
+
return self._fallback_interpretation(query, locations)
|
| 201 |
+
|
| 202 |
+
def _fallback_interpretation(self, query: str, locations: List[Tuple]) -> Dict:
|
| 203 |
+
"""Fallback interpretation when LLM is not available"""
|
| 204 |
+
query_lower = query.lower()
|
| 205 |
+
|
| 206 |
+
# Simple keyword-based interpretation
|
| 207 |
+
if 'lowest' in query_lower or 'minimum' in query_lower or 'cheapest' in query_lower:
|
| 208 |
+
if 'lowest fare' in query_lower or 'minimum fare' in query_lower:
|
| 209 |
+
return {
|
| 210 |
+
'success': True,
|
| 211 |
+
'query_type': 'lowest_fare',
|
| 212 |
+
'cypher_query': """
|
| 213 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 214 |
+
WITH a, b, r, r.fare as fare
|
| 215 |
+
ORDER BY r.fare ASC
|
| 216 |
+
LIMIT 1
|
| 217 |
+
RETURN a.name as from_place, b.name as to_place, fare
|
| 218 |
+
""",
|
| 219 |
+
'message': 'Finding the lowest fare in the database...'
|
| 220 |
+
}
|
| 221 |
+
else:
|
| 222 |
+
return {
|
| 223 |
+
'success': True,
|
| 224 |
+
'query_type': 'cheapest',
|
| 225 |
+
'cypher_query': """
|
| 226 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 227 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 228 |
+
ORDER BY r.fare ASC
|
| 229 |
+
LIMIT 10
|
| 230 |
+
""",
|
| 231 |
+
'message': 'Finding the cheapest routes...'
|
| 232 |
+
}
|
| 233 |
+
elif 'expensive' in query_lower or 'highest' in query_lower or 'maximum' in query_lower:
|
| 234 |
+
return {
|
| 235 |
+
'success': True,
|
| 236 |
+
'query_type': 'expensive',
|
| 237 |
+
'cypher_query': """
|
| 238 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 239 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 240 |
+
ORDER BY r.fare DESC
|
| 241 |
+
LIMIT 10
|
| 242 |
+
""",
|
| 243 |
+
'message': 'Finding the most expensive routes...'
|
| 244 |
+
}
|
| 245 |
+
elif 'places' in query_lower or 'locations' in query_lower or 'list all' in query_lower:
|
| 246 |
+
return {
|
| 247 |
+
'success': True,
|
| 248 |
+
'query_type': 'places',
|
| 249 |
+
'cypher_query': """
|
| 250 |
+
MATCH (p:Place)
|
| 251 |
+
RETURN DISTINCT p.name as place
|
| 252 |
+
ORDER BY p.name
|
| 253 |
+
""",
|
| 254 |
+
'message': 'Listing all places...'
|
| 255 |
+
}
|
| 256 |
+
elif 'statistics' in query_lower or 'stats' in query_lower:
|
| 257 |
+
return {
|
| 258 |
+
'success': True,
|
| 259 |
+
'query_type': 'statistics',
|
| 260 |
+
'cypher_query': """
|
| 261 |
+
MATCH (p:Place)
|
| 262 |
+
MATCH ()-[r:Fare]->()
|
| 263 |
+
RETURN
|
| 264 |
+
count(DISTINCT p) as total_places,
|
| 265 |
+
count(r) as total_routes,
|
| 266 |
+
avg(r.fare) as average_fare,
|
| 267 |
+
min(r.fare) as min_fare,
|
| 268 |
+
max(r.fare) as max_fare
|
| 269 |
+
""",
|
| 270 |
+
'message': 'Getting database statistics...'
|
| 271 |
+
}
|
| 272 |
+
elif len(locations) >= 2:
|
| 273 |
+
# Fare query between two locations
|
| 274 |
+
from_location = locations[0][1]
|
| 275 |
+
to_location = locations[1][1]
|
| 276 |
+
return {
|
| 277 |
+
'success': True,
|
| 278 |
+
'query_type': 'fare',
|
| 279 |
+
'cypher_query': f"""
|
| 280 |
+
MATCH (a:Place {{name: '{from_location}'}})-[r:Fare]->(b:Place {{name: '{to_location}'}})
|
| 281 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 282 |
+
""",
|
| 283 |
+
'message': f'Finding fare from {from_location} to {to_location}...'
|
| 284 |
+
}
|
| 285 |
+
elif 'fare' in query_lower and 'to' in query_lower:
|
| 286 |
+
# Handle queries like "fare of X to Y" where locations might not be extracted properly
|
| 287 |
+
# Try to extract locations using a simpler pattern
|
| 288 |
+
import re
|
| 289 |
+
fare_patterns = [
|
| 290 |
+
r'fare\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 291 |
+
r'price\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 292 |
+
r'cost\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 293 |
+
r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 294 |
+
r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 295 |
+
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)',
|
| 296 |
+
r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)'
|
| 297 |
+
]
|
| 298 |
+
|
| 299 |
+
for pattern in fare_patterns:
|
| 300 |
+
match = re.search(pattern, query_lower)
|
| 301 |
+
if match:
|
| 302 |
+
from_loc = match.group(1).strip()
|
| 303 |
+
to_loc = match.group(2).strip()
|
| 304 |
+
|
| 305 |
+
# Correct the locations
|
| 306 |
+
from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
|
| 307 |
+
to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
|
| 308 |
+
|
| 309 |
+
if from_conf > 0.5 and to_conf > 0.5:
|
| 310 |
+
return {
|
| 311 |
+
'success': True,
|
| 312 |
+
'query_type': 'fare',
|
| 313 |
+
'cypher_query': f"""
|
| 314 |
+
MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
|
| 315 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 316 |
+
""",
|
| 317 |
+
'message': f'Finding fare from {from_corrected} to {to_corrected}...'
|
| 318 |
+
}
|
| 319 |
+
elif len(locations) == 1:
|
| 320 |
+
# Routes from/to a single location
|
| 321 |
+
location = locations[0][1]
|
| 322 |
+
if 'from' in query_lower:
|
| 323 |
+
return {
|
| 324 |
+
'success': True,
|
| 325 |
+
'query_type': 'routes_from',
|
| 326 |
+
'cypher_query': f"""
|
| 327 |
+
MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
|
| 328 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 329 |
+
ORDER BY r.fare
|
| 330 |
+
""",
|
| 331 |
+
'message': f'Finding routes from {location}...'
|
| 332 |
+
}
|
| 333 |
+
else:
|
| 334 |
+
return {
|
| 335 |
+
'success': True,
|
| 336 |
+
'query_type': 'routes_to',
|
| 337 |
+
'cypher_query': f"""
|
| 338 |
+
MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
|
| 339 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 340 |
+
ORDER BY r.fare
|
| 341 |
+
""",
|
| 342 |
+
'message': f'Finding routes to {location}...'
|
| 343 |
+
}
|
| 344 |
+
else:
|
| 345 |
+
return {
|
| 346 |
+
'success': False,
|
| 347 |
+
'message': 'I could not understand your query. Please try rephrasing it.'
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
def _execute_cypher_query(self, cypher_query: str) -> List[Dict]:
|
| 351 |
+
"""Execute the generated Cypher query"""
|
| 352 |
+
try:
|
| 353 |
+
with self.neo4j_service.driver.session() as session:
|
| 354 |
+
result = session.run(cypher_query)
|
| 355 |
+
return [dict(record) for record in result]
|
| 356 |
+
except Exception as e:
|
| 357 |
+
print(f"Cypher execution error: {e}")
|
| 358 |
+
return []
|
| 359 |
+
|
| 360 |
+
def _format_corrections(self, locations: List[Tuple]) -> List[Dict]:
|
| 361 |
+
"""Format location corrections for display"""
|
| 362 |
+
corrections = []
|
| 363 |
+
for original, corrected, confidence, method in locations:
|
| 364 |
+
if original.lower() != corrected.lower():
|
| 365 |
+
corrections.append({
|
| 366 |
+
'original': original,
|
| 367 |
+
'corrected': corrected,
|
| 368 |
+
'confidence': confidence,
|
| 369 |
+
'method': method
|
| 370 |
+
})
|
| 371 |
+
return corrections
|
| 372 |
+
|
| 373 |
+
def _get_query_suggestions(self) -> List[str]:
|
| 374 |
+
"""Get query suggestions"""
|
| 375 |
+
return [
|
| 376 |
+
"What is the fare from Colombo to Kandy?",
|
| 377 |
+
"What is the lowest fare price?",
|
| 378 |
+
"Show me the cheapest routes",
|
| 379 |
+
"Show me the most expensive routes",
|
| 380 |
+
"List all places",
|
| 381 |
+
"Routes from Panadura",
|
| 382 |
+
"Routes to Galle",
|
| 383 |
+
"Database statistics"
|
| 384 |
+
]
|
logger.py
CHANGED
|
@@ -1,61 +1,61 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Centralized logging setup for the Transport Query Application.
|
| 4 |
-
Provides a rotating file handler and console output.
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import logging
|
| 8 |
-
import os
|
| 9 |
-
from logging.handlers import RotatingFileHandler
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
def get_logger(name: str) -> logging.Logger:
|
| 13 |
-
"""Create or retrieve a configured logger with file and console handlers."""
|
| 14 |
-
logger = logging.getLogger(name)
|
| 15 |
-
|
| 16 |
-
if getattr(logger, "_configured", False):
|
| 17 |
-
return logger
|
| 18 |
-
|
| 19 |
-
log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
|
| 20 |
-
log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
|
| 21 |
-
|
| 22 |
-
# Try to create log directory, fallback to current directory if it fails
|
| 23 |
-
try:
|
| 24 |
-
log_dir = os.path.abspath(log_dir)
|
| 25 |
-
os.makedirs(log_dir, exist_ok=True)
|
| 26 |
-
except Exception:
|
| 27 |
-
# Fallback to current directory if path invalid
|
| 28 |
-
log_dir = os.getcwd()
|
| 29 |
-
|
| 30 |
-
log_path = os.path.join(log_dir, "app.log")
|
| 31 |
-
|
| 32 |
-
formatter = logging.Formatter(
|
| 33 |
-
fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
|
| 34 |
-
datefmt="%Y-%m-%d %H:%M:%S",
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
# Console handler (always available)
|
| 38 |
-
console_handler = logging.StreamHandler()
|
| 39 |
-
console_handler.setFormatter(formatter)
|
| 40 |
-
logger.addHandler(console_handler)
|
| 41 |
-
|
| 42 |
-
# Try to add file handler, but don't fail if it doesn't work
|
| 43 |
-
try:
|
| 44 |
-
file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
|
| 45 |
-
file_handler.setFormatter(formatter)
|
| 46 |
-
logger.addHandler(file_handler)
|
| 47 |
-
logger.debug(f"File logging enabled: {log_path}")
|
| 48 |
-
except (PermissionError, OSError) as e:
|
| 49 |
-
# If file logging fails, just log to console
|
| 50 |
-
logger.warning(f"File logging disabled due to permission error: {e}")
|
| 51 |
-
logger.warning("Logging to console only")
|
| 52 |
-
|
| 53 |
-
# Configure logger
|
| 54 |
-
logger.setLevel(getattr(logging, log_level_str, logging.INFO))
|
| 55 |
-
logger.propagate = False
|
| 56 |
-
|
| 57 |
-
logger._configured = True # type: ignore[attr-defined]
|
| 58 |
-
logger.debug(f"Logger initialized. Level={log_level_str}")
|
| 59 |
-
return logger
|
| 60 |
-
|
| 61 |
-
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Centralized logging setup for the Transport Query Application.
|
| 4 |
+
Provides a rotating file handler and console output.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
from logging.handlers import RotatingFileHandler
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_logger(name: str) -> logging.Logger:
|
| 13 |
+
"""Create or retrieve a configured logger with file and console handlers."""
|
| 14 |
+
logger = logging.getLogger(name)
|
| 15 |
+
|
| 16 |
+
if getattr(logger, "_configured", False):
|
| 17 |
+
return logger
|
| 18 |
+
|
| 19 |
+
log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
|
| 20 |
+
log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
|
| 21 |
+
|
| 22 |
+
# Try to create log directory, fallback to current directory if it fails
|
| 23 |
+
try:
|
| 24 |
+
log_dir = os.path.abspath(log_dir)
|
| 25 |
+
os.makedirs(log_dir, exist_ok=True)
|
| 26 |
+
except Exception:
|
| 27 |
+
# Fallback to current directory if path invalid
|
| 28 |
+
log_dir = os.getcwd()
|
| 29 |
+
|
| 30 |
+
log_path = os.path.join(log_dir, "app.log")
|
| 31 |
+
|
| 32 |
+
formatter = logging.Formatter(
|
| 33 |
+
fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
|
| 34 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Console handler (always available)
|
| 38 |
+
console_handler = logging.StreamHandler()
|
| 39 |
+
console_handler.setFormatter(formatter)
|
| 40 |
+
logger.addHandler(console_handler)
|
| 41 |
+
|
| 42 |
+
# Try to add file handler, but don't fail if it doesn't work
|
| 43 |
+
try:
|
| 44 |
+
file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
|
| 45 |
+
file_handler.setFormatter(formatter)
|
| 46 |
+
logger.addHandler(file_handler)
|
| 47 |
+
logger.debug(f"File logging enabled: {log_path}")
|
| 48 |
+
except (PermissionError, OSError) as e:
|
| 49 |
+
# If file logging fails, just log to console
|
| 50 |
+
logger.warning(f"File logging disabled due to permission error: {e}")
|
| 51 |
+
logger.warning("Logging to console only")
|
| 52 |
+
|
| 53 |
+
# Configure logger
|
| 54 |
+
logger.setLevel(getattr(logging, log_level_str, logging.INFO))
|
| 55 |
+
logger.propagate = False
|
| 56 |
+
|
| 57 |
+
logger._configured = True # type: ignore[attr-defined]
|
| 58 |
+
logger.debug(f"Logger initialized. Level={log_level_str}")
|
| 59 |
+
return logger
|
| 60 |
+
|
| 61 |
+
|
neo4j_service.py
CHANGED
|
@@ -1,222 +1,222 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Neo4j Service for Transport Query Application
|
| 4 |
-
Handles all database operations
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
from neo4j import GraphDatabase
|
| 8 |
-
from typing import List, Dict, Optional, Tuple
|
| 9 |
-
from config import Config
|
| 10 |
-
|
| 11 |
-
class Neo4jService:
|
| 12 |
-
"""Neo4j database service"""
|
| 13 |
-
|
| 14 |
-
def __init__(self):
|
| 15 |
-
self.config = Config()
|
| 16 |
-
self.driver = None
|
| 17 |
-
self._connect()
|
| 18 |
-
|
| 19 |
-
def _connect(self):
|
| 20 |
-
"""Connect to Neo4j database"""
|
| 21 |
-
try:
|
| 22 |
-
self.driver = GraphDatabase.driver(
|
| 23 |
-
self.config.NEO4J_URI,
|
| 24 |
-
auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
|
| 25 |
-
)
|
| 26 |
-
# Test connection
|
| 27 |
-
with self.driver.session() as session:
|
| 28 |
-
session.run("RETURN 1")
|
| 29 |
-
print("✅ Connected to Neo4j database")
|
| 30 |
-
except Exception as e:
|
| 31 |
-
print(f"❌ Failed to connect to Neo4j: {e}")
|
| 32 |
-
self.driver = None
|
| 33 |
-
|
| 34 |
-
def is_connected(self) -> bool:
|
| 35 |
-
"""Check if connected to Neo4j"""
|
| 36 |
-
return self.driver is not None
|
| 37 |
-
|
| 38 |
-
def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
|
| 39 |
-
"""Get fare between two locations"""
|
| 40 |
-
if not self.is_connected():
|
| 41 |
-
return None
|
| 42 |
-
|
| 43 |
-
try:
|
| 44 |
-
with self.driver.session() as session:
|
| 45 |
-
result = session.run("""
|
| 46 |
-
MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
|
| 47 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 48 |
-
""", from_location=from_location, to_location=to_location)
|
| 49 |
-
|
| 50 |
-
record = result.single()
|
| 51 |
-
if record:
|
| 52 |
-
return {
|
| 53 |
-
'from_place': record['from_place'],
|
| 54 |
-
'to_place': record['to_place'],
|
| 55 |
-
'fare': record['fare']
|
| 56 |
-
}
|
| 57 |
-
return None
|
| 58 |
-
|
| 59 |
-
except Exception as e:
|
| 60 |
-
print(f"Error getting fare: {e}")
|
| 61 |
-
return None
|
| 62 |
-
|
| 63 |
-
def get_all_places(self) -> List[str]:
|
| 64 |
-
"""Get all available places"""
|
| 65 |
-
if not self.is_connected():
|
| 66 |
-
return []
|
| 67 |
-
|
| 68 |
-
try:
|
| 69 |
-
with self.driver.session() as session:
|
| 70 |
-
result = session.run("""
|
| 71 |
-
MATCH (p:Place)
|
| 72 |
-
RETURN DISTINCT p.name as place
|
| 73 |
-
ORDER BY p.name
|
| 74 |
-
""")
|
| 75 |
-
|
| 76 |
-
return [record['place'] for record in result]
|
| 77 |
-
|
| 78 |
-
except Exception as e:
|
| 79 |
-
print(f"Error getting places: {e}")
|
| 80 |
-
return []
|
| 81 |
-
|
| 82 |
-
def get_routes_from_location(self, from_location: str) -> List[Dict]:
|
| 83 |
-
"""Get all routes from a specific location"""
|
| 84 |
-
if not self.is_connected():
|
| 85 |
-
return []
|
| 86 |
-
|
| 87 |
-
try:
|
| 88 |
-
with self.driver.session() as session:
|
| 89 |
-
result = session.run("""
|
| 90 |
-
MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
|
| 91 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 92 |
-
ORDER BY r.fare
|
| 93 |
-
""", from_location=from_location)
|
| 94 |
-
|
| 95 |
-
return [dict(record) for record in result]
|
| 96 |
-
|
| 97 |
-
except Exception as e:
|
| 98 |
-
print(f"Error getting routes from location: {e}")
|
| 99 |
-
return []
|
| 100 |
-
|
| 101 |
-
def get_routes_to_location(self, to_location: str) -> List[Dict]:
|
| 102 |
-
"""Get all routes to a specific location"""
|
| 103 |
-
if not self.is_connected():
|
| 104 |
-
return []
|
| 105 |
-
|
| 106 |
-
try:
|
| 107 |
-
with self.driver.session() as session:
|
| 108 |
-
result = session.run("""
|
| 109 |
-
MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
|
| 110 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 111 |
-
ORDER BY r.fare
|
| 112 |
-
""", to_location=to_location)
|
| 113 |
-
|
| 114 |
-
return [dict(record) for record in result]
|
| 115 |
-
|
| 116 |
-
except Exception as e:
|
| 117 |
-
print(f"Error getting routes to location: {e}")
|
| 118 |
-
return []
|
| 119 |
-
|
| 120 |
-
def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
|
| 121 |
-
"""Get cheapest routes"""
|
| 122 |
-
if not self.is_connected():
|
| 123 |
-
return []
|
| 124 |
-
|
| 125 |
-
try:
|
| 126 |
-
with self.driver.session() as session:
|
| 127 |
-
result = session.run("""
|
| 128 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 129 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 130 |
-
ORDER BY r.fare ASC
|
| 131 |
-
LIMIT $limit
|
| 132 |
-
""", limit=limit)
|
| 133 |
-
|
| 134 |
-
return [dict(record) for record in result]
|
| 135 |
-
|
| 136 |
-
except Exception as e:
|
| 137 |
-
print(f"Error getting cheapest routes: {e}")
|
| 138 |
-
return []
|
| 139 |
-
|
| 140 |
-
def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
|
| 141 |
-
"""Get most expensive routes"""
|
| 142 |
-
if not self.is_connected():
|
| 143 |
-
return []
|
| 144 |
-
|
| 145 |
-
try:
|
| 146 |
-
with self.driver.session() as session:
|
| 147 |
-
result = session.run("""
|
| 148 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 149 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 150 |
-
ORDER BY r.fare DESC
|
| 151 |
-
LIMIT $limit
|
| 152 |
-
""", limit=limit)
|
| 153 |
-
|
| 154 |
-
return [dict(record) for record in result]
|
| 155 |
-
|
| 156 |
-
except Exception as e:
|
| 157 |
-
print(f"Error getting most expensive routes: {e}")
|
| 158 |
-
return []
|
| 159 |
-
|
| 160 |
-
def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
|
| 161 |
-
"""Search routes within a fare range"""
|
| 162 |
-
if not self.is_connected():
|
| 163 |
-
return []
|
| 164 |
-
|
| 165 |
-
try:
|
| 166 |
-
with self.driver.session() as session:
|
| 167 |
-
result = session.run("""
|
| 168 |
-
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 169 |
-
WHERE r.fare >= $min_fare AND r.fare <= $max_fare
|
| 170 |
-
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 171 |
-
ORDER BY r.fare
|
| 172 |
-
""", min_fare=min_fare, max_fare=max_fare)
|
| 173 |
-
|
| 174 |
-
return [dict(record) for record in result]
|
| 175 |
-
|
| 176 |
-
except Exception as e:
|
| 177 |
-
print(f"Error searching routes by fare range: {e}")
|
| 178 |
-
return []
|
| 179 |
-
|
| 180 |
-
def get_route_statistics(self) -> Dict:
|
| 181 |
-
"""Get database statistics"""
|
| 182 |
-
if not self.is_connected():
|
| 183 |
-
return {}
|
| 184 |
-
|
| 185 |
-
try:
|
| 186 |
-
with self.driver.session() as session:
|
| 187 |
-
# Count places
|
| 188 |
-
places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
|
| 189 |
-
place_count = places_result.single()['place_count']
|
| 190 |
-
|
| 191 |
-
# Count routes
|
| 192 |
-
routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
|
| 193 |
-
route_count = routes_result.single()['route_count']
|
| 194 |
-
|
| 195 |
-
# Average fare
|
| 196 |
-
avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
|
| 197 |
-
avg_fare = avg_result.single()['avg_fare']
|
| 198 |
-
|
| 199 |
-
# Min and max fares
|
| 200 |
-
fare_range_result = session.run("""
|
| 201 |
-
MATCH ()-[r:Fare]->()
|
| 202 |
-
RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 203 |
-
""")
|
| 204 |
-
fare_range = fare_range_result.single()
|
| 205 |
-
|
| 206 |
-
return {
|
| 207 |
-
'total_places': place_count,
|
| 208 |
-
'total_routes': route_count,
|
| 209 |
-
'average_fare': round(avg_fare, 2) if avg_fare else 0,
|
| 210 |
-
'min_fare': fare_range['min_fare'],
|
| 211 |
-
'max_fare': fare_range['max_fare']
|
| 212 |
-
}
|
| 213 |
-
|
| 214 |
-
except Exception as e:
|
| 215 |
-
print(f"Error getting statistics: {e}")
|
| 216 |
-
return {}
|
| 217 |
-
|
| 218 |
-
def close(self):
|
| 219 |
-
"""Close database connection"""
|
| 220 |
-
if self.driver:
|
| 221 |
-
self.driver.close()
|
| 222 |
-
self.driver = None
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Neo4j Service for Transport Query Application
|
| 4 |
+
Handles all database operations
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from neo4j import GraphDatabase
|
| 8 |
+
from typing import List, Dict, Optional, Tuple
|
| 9 |
+
from config import Config
|
| 10 |
+
|
| 11 |
+
class Neo4jService:
|
| 12 |
+
"""Neo4j database service"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.config = Config()
|
| 16 |
+
self.driver = None
|
| 17 |
+
self._connect()
|
| 18 |
+
|
| 19 |
+
def _connect(self):
|
| 20 |
+
"""Connect to Neo4j database"""
|
| 21 |
+
try:
|
| 22 |
+
self.driver = GraphDatabase.driver(
|
| 23 |
+
self.config.NEO4J_URI,
|
| 24 |
+
auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
|
| 25 |
+
)
|
| 26 |
+
# Test connection
|
| 27 |
+
with self.driver.session() as session:
|
| 28 |
+
session.run("RETURN 1")
|
| 29 |
+
print("✅ Connected to Neo4j database")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"❌ Failed to connect to Neo4j: {e}")
|
| 32 |
+
self.driver = None
|
| 33 |
+
|
| 34 |
+
def is_connected(self) -> bool:
|
| 35 |
+
"""Check if connected to Neo4j"""
|
| 36 |
+
return self.driver is not None
|
| 37 |
+
|
| 38 |
+
def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
|
| 39 |
+
"""Get fare between two locations"""
|
| 40 |
+
if not self.is_connected():
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
with self.driver.session() as session:
|
| 45 |
+
result = session.run("""
|
| 46 |
+
MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
|
| 47 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 48 |
+
""", from_location=from_location, to_location=to_location)
|
| 49 |
+
|
| 50 |
+
record = result.single()
|
| 51 |
+
if record:
|
| 52 |
+
return {
|
| 53 |
+
'from_place': record['from_place'],
|
| 54 |
+
'to_place': record['to_place'],
|
| 55 |
+
'fare': record['fare']
|
| 56 |
+
}
|
| 57 |
+
return None
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Error getting fare: {e}")
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
def get_all_places(self) -> List[str]:
|
| 64 |
+
"""Get all available places"""
|
| 65 |
+
if not self.is_connected():
|
| 66 |
+
return []
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
with self.driver.session() as session:
|
| 70 |
+
result = session.run("""
|
| 71 |
+
MATCH (p:Place)
|
| 72 |
+
RETURN DISTINCT p.name as place
|
| 73 |
+
ORDER BY p.name
|
| 74 |
+
""")
|
| 75 |
+
|
| 76 |
+
return [record['place'] for record in result]
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"Error getting places: {e}")
|
| 80 |
+
return []
|
| 81 |
+
|
| 82 |
+
def get_routes_from_location(self, from_location: str) -> List[Dict]:
|
| 83 |
+
"""Get all routes from a specific location"""
|
| 84 |
+
if not self.is_connected():
|
| 85 |
+
return []
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
with self.driver.session() as session:
|
| 89 |
+
result = session.run("""
|
| 90 |
+
MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
|
| 91 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 92 |
+
ORDER BY r.fare
|
| 93 |
+
""", from_location=from_location)
|
| 94 |
+
|
| 95 |
+
return [dict(record) for record in result]
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"Error getting routes from location: {e}")
|
| 99 |
+
return []
|
| 100 |
+
|
| 101 |
+
def get_routes_to_location(self, to_location: str) -> List[Dict]:
|
| 102 |
+
"""Get all routes to a specific location"""
|
| 103 |
+
if not self.is_connected():
|
| 104 |
+
return []
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
with self.driver.session() as session:
|
| 108 |
+
result = session.run("""
|
| 109 |
+
MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
|
| 110 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 111 |
+
ORDER BY r.fare
|
| 112 |
+
""", to_location=to_location)
|
| 113 |
+
|
| 114 |
+
return [dict(record) for record in result]
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f"Error getting routes to location: {e}")
|
| 118 |
+
return []
|
| 119 |
+
|
| 120 |
+
def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
|
| 121 |
+
"""Get cheapest routes"""
|
| 122 |
+
if not self.is_connected():
|
| 123 |
+
return []
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
with self.driver.session() as session:
|
| 127 |
+
result = session.run("""
|
| 128 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 129 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 130 |
+
ORDER BY r.fare ASC
|
| 131 |
+
LIMIT $limit
|
| 132 |
+
""", limit=limit)
|
| 133 |
+
|
| 134 |
+
return [dict(record) for record in result]
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"Error getting cheapest routes: {e}")
|
| 138 |
+
return []
|
| 139 |
+
|
| 140 |
+
def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
|
| 141 |
+
"""Get most expensive routes"""
|
| 142 |
+
if not self.is_connected():
|
| 143 |
+
return []
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
with self.driver.session() as session:
|
| 147 |
+
result = session.run("""
|
| 148 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 149 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 150 |
+
ORDER BY r.fare DESC
|
| 151 |
+
LIMIT $limit
|
| 152 |
+
""", limit=limit)
|
| 153 |
+
|
| 154 |
+
return [dict(record) for record in result]
|
| 155 |
+
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"Error getting most expensive routes: {e}")
|
| 158 |
+
return []
|
| 159 |
+
|
| 160 |
+
def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
|
| 161 |
+
"""Search routes within a fare range"""
|
| 162 |
+
if not self.is_connected():
|
| 163 |
+
return []
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
with self.driver.session() as session:
|
| 167 |
+
result = session.run("""
|
| 168 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 169 |
+
WHERE r.fare >= $min_fare AND r.fare <= $max_fare
|
| 170 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 171 |
+
ORDER BY r.fare
|
| 172 |
+
""", min_fare=min_fare, max_fare=max_fare)
|
| 173 |
+
|
| 174 |
+
return [dict(record) for record in result]
|
| 175 |
+
|
| 176 |
+
except Exception as e:
|
| 177 |
+
print(f"Error searching routes by fare range: {e}")
|
| 178 |
+
return []
|
| 179 |
+
|
| 180 |
+
def get_route_statistics(self) -> Dict:
|
| 181 |
+
"""Get database statistics"""
|
| 182 |
+
if not self.is_connected():
|
| 183 |
+
return {}
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
with self.driver.session() as session:
|
| 187 |
+
# Count places
|
| 188 |
+
places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
|
| 189 |
+
place_count = places_result.single()['place_count']
|
| 190 |
+
|
| 191 |
+
# Count routes
|
| 192 |
+
routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
|
| 193 |
+
route_count = routes_result.single()['route_count']
|
| 194 |
+
|
| 195 |
+
# Average fare
|
| 196 |
+
avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
|
| 197 |
+
avg_fare = avg_result.single()['avg_fare']
|
| 198 |
+
|
| 199 |
+
# Min and max fares
|
| 200 |
+
fare_range_result = session.run("""
|
| 201 |
+
MATCH ()-[r:Fare]->()
|
| 202 |
+
RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 203 |
+
""")
|
| 204 |
+
fare_range = fare_range_result.single()
|
| 205 |
+
|
| 206 |
+
return {
|
| 207 |
+
'total_places': place_count,
|
| 208 |
+
'total_routes': route_count,
|
| 209 |
+
'average_fare': round(avg_fare, 2) if avg_fare else 0,
|
| 210 |
+
'min_fare': fare_range['min_fare'],
|
| 211 |
+
'max_fare': fare_range['max_fare']
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
print(f"Error getting statistics: {e}")
|
| 216 |
+
return {}
|
| 217 |
+
|
| 218 |
+
def close(self):
|
| 219 |
+
"""Close database connection"""
|
| 220 |
+
if self.driver:
|
| 221 |
+
self.driver.close()
|
| 222 |
+
self.driver = None
|
spell_corrector.py
CHANGED
|
@@ -1,257 +1,257 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Spell Correction Module for Transport Query Application
|
| 4 |
-
Handles location name corrections using fuzzy matching and LLM
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import re
|
| 8 |
-
from fuzzywuzzy import fuzz
|
| 9 |
-
from typing import List, Tuple, Optional
|
| 10 |
-
import openai
|
| 11 |
-
from config import Config
|
| 12 |
-
|
| 13 |
-
class SpellCorrector:
|
| 14 |
-
"""Spell correction for location names"""
|
| 15 |
-
|
| 16 |
-
def __init__(self):
|
| 17 |
-
self.config = Config()
|
| 18 |
-
self.location_mapping = self.config.LOCATION_MAPPING
|
| 19 |
-
self.available_locations = set(self.location_mapping.values())
|
| 20 |
-
|
| 21 |
-
# Initialize OpenAI if API key is available
|
| 22 |
-
if self.config.OPENAI_API_KEY:
|
| 23 |
-
try:
|
| 24 |
-
# Prefer new SDK client if installed; otherwise set legacy api key
|
| 25 |
-
try:
|
| 26 |
-
from openai import OpenAI # noqa: F401
|
| 27 |
-
self.llm_available = True
|
| 28 |
-
except Exception:
|
| 29 |
-
openai.api_key = self.config.OPENAI_API_KEY
|
| 30 |
-
self.llm_available = True
|
| 31 |
-
except Exception:
|
| 32 |
-
self.llm_available = False
|
| 33 |
-
else:
|
| 34 |
-
self.llm_available = False
|
| 35 |
-
|
| 36 |
-
def correct_location(self, location: str) -> Tuple[str, float, str]:
|
| 37 |
-
"""
|
| 38 |
-
Correct a location name using multiple methods
|
| 39 |
-
|
| 40 |
-
Returns:
|
| 41 |
-
Tuple of (corrected_name, confidence_score, correction_method)
|
| 42 |
-
"""
|
| 43 |
-
location = location.strip().lower()
|
| 44 |
-
|
| 45 |
-
# Method 1: Direct mapping
|
| 46 |
-
if location in self.location_mapping:
|
| 47 |
-
corrected = self.location_mapping[location]
|
| 48 |
-
return corrected, 1.0, "direct_mapping"
|
| 49 |
-
|
| 50 |
-
# Method 2: Fuzzy matching
|
| 51 |
-
best_match, confidence = self._fuzzy_match(location)
|
| 52 |
-
if confidence >= self.config.SIMILARITY_THRESHOLD:
|
| 53 |
-
return best_match, confidence, "fuzzy_matching"
|
| 54 |
-
|
| 55 |
-
# Method 3: LLM correction (if available)
|
| 56 |
-
if self.llm_available:
|
| 57 |
-
llm_corrected = self._llm_correct(location)
|
| 58 |
-
if llm_corrected:
|
| 59 |
-
# Verify LLM suggestion with fuzzy matching
|
| 60 |
-
llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
|
| 61 |
-
if llm_confidence >= 0.6: # Lower threshold for LLM suggestions
|
| 62 |
-
return llm_corrected, llm_confidence, "llm_correction"
|
| 63 |
-
|
| 64 |
-
# Method 4: Partial matching
|
| 65 |
-
partial_match = self._partial_match(location)
|
| 66 |
-
if partial_match:
|
| 67 |
-
return partial_match, 0.7, "partial_matching"
|
| 68 |
-
|
| 69 |
-
# No correction found
|
| 70 |
-
return location.title(), 0.0, "no_correction"
|
| 71 |
-
|
| 72 |
-
def _fuzzy_match(self, location: str) -> Tuple[str, float]:
|
| 73 |
-
"""Find best fuzzy match for location"""
|
| 74 |
-
best_match = None
|
| 75 |
-
best_score = 0
|
| 76 |
-
|
| 77 |
-
for available_location in self.available_locations:
|
| 78 |
-
score = fuzz.ratio(location.lower(), available_location.lower()) / 100
|
| 79 |
-
if score > best_score:
|
| 80 |
-
best_score = score
|
| 81 |
-
best_match = available_location
|
| 82 |
-
|
| 83 |
-
return best_match, best_score
|
| 84 |
-
|
| 85 |
-
def _partial_match(self, location: str) -> Optional[str]:
|
| 86 |
-
"""Find partial matches (substring matching)"""
|
| 87 |
-
location_lower = location.lower()
|
| 88 |
-
|
| 89 |
-
for available_location in self.available_locations:
|
| 90 |
-
available_lower = available_location.lower()
|
| 91 |
-
|
| 92 |
-
# Check if location is contained in available location
|
| 93 |
-
if location_lower in available_lower or available_lower in location_lower:
|
| 94 |
-
return available_location
|
| 95 |
-
|
| 96 |
-
return None
|
| 97 |
-
|
| 98 |
-
def _llm_correct(self, location: str) -> Optional[str]:
|
| 99 |
-
"""Use LLM to correct location name"""
|
| 100 |
-
try:
|
| 101 |
-
prompt = f"""
|
| 102 |
-
You are a location name correction system for Sri Lankan cities and towns.
|
| 103 |
-
Given a potentially misspelled location name, return the correct spelling.
|
| 104 |
-
|
| 105 |
-
Available locations include: {', '.join(sorted(self.available_locations))}
|
| 106 |
-
|
| 107 |
-
Input location: "{location}"
|
| 108 |
-
|
| 109 |
-
Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
|
| 110 |
-
"""
|
| 111 |
-
|
| 112 |
-
corrected = None
|
| 113 |
-
# Try new SDK first
|
| 114 |
-
try:
|
| 115 |
-
from openai import OpenAI
|
| 116 |
-
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 117 |
-
response = client.chat.completions.create(
|
| 118 |
-
model=self.config.OPENAI_MODEL,
|
| 119 |
-
messages=[
|
| 120 |
-
{"role": "system", "content": "You are a helpful assistant that corrects location names."},
|
| 121 |
-
{"role": "user", "content": prompt}
|
| 122 |
-
],
|
| 123 |
-
max_tokens=50,
|
| 124 |
-
temperature=0.1
|
| 125 |
-
)
|
| 126 |
-
corrected = response.choices[0].message.content.strip()
|
| 127 |
-
except Exception as sdk_err:
|
| 128 |
-
# Fallback to legacy API if present
|
| 129 |
-
import openai
|
| 130 |
-
try:
|
| 131 |
-
openai.api_key = self.config.OPENAI_API_KEY
|
| 132 |
-
response = openai.ChatCompletion.create(
|
| 133 |
-
model=self.config.OPENAI_MODEL,
|
| 134 |
-
messages=[
|
| 135 |
-
{"role": "system", "content": "You are a helpful assistant that corrects location names."},
|
| 136 |
-
{"role": "user", "content": prompt}
|
| 137 |
-
],
|
| 138 |
-
max_tokens=50,
|
| 139 |
-
temperature=0.1
|
| 140 |
-
)
|
| 141 |
-
corrected = response.choices[0].message.content.strip()
|
| 142 |
-
except Exception:
|
| 143 |
-
raise sdk_err
|
| 144 |
-
|
| 145 |
-
# Validate LLM response
|
| 146 |
-
if corrected.upper() == "UNKNOWN":
|
| 147 |
-
return None
|
| 148 |
-
|
| 149 |
-
# Check if corrected location exists in our database
|
| 150 |
-
if corrected in self.available_locations:
|
| 151 |
-
return corrected
|
| 152 |
-
|
| 153 |
-
# Try fuzzy matching on LLM response
|
| 154 |
-
llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
|
| 155 |
-
if confidence >= 0.8:
|
| 156 |
-
return llm_fuzzy_match
|
| 157 |
-
|
| 158 |
-
return None
|
| 159 |
-
|
| 160 |
-
except Exception as e:
|
| 161 |
-
print(f"LLM correction error: {e}")
|
| 162 |
-
return None
|
| 163 |
-
|
| 164 |
-
def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
|
| 165 |
-
"""
|
| 166 |
-
Extract and correct locations from a natural language query
|
| 167 |
-
|
| 168 |
-
Returns:
|
| 169 |
-
List of tuples: (original, corrected, confidence, method)
|
| 170 |
-
"""
|
| 171 |
-
# Common patterns for location extraction
|
| 172 |
-
patterns = [
|
| 173 |
-
r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 174 |
-
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 175 |
-
r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 176 |
-
r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 177 |
-
r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 178 |
-
r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 179 |
-
r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 180 |
-
r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 181 |
-
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
|
| 182 |
-
r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
|
| 183 |
-
]
|
| 184 |
-
|
| 185 |
-
locations = []
|
| 186 |
-
|
| 187 |
-
# Try all patterns to find locations
|
| 188 |
-
for pattern in patterns:
|
| 189 |
-
match = re.search(pattern, query, re.IGNORECASE)
|
| 190 |
-
if match:
|
| 191 |
-
# Extract locations from the match
|
| 192 |
-
groups = match.groups()
|
| 193 |
-
if len(groups) >= 2:
|
| 194 |
-
from_location = groups[0].strip()
|
| 195 |
-
to_location = groups[1].strip()
|
| 196 |
-
|
| 197 |
-
# Skip if locations are too short or common words
|
| 198 |
-
if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
|
| 199 |
-
from_corrected, from_confidence, from_method = self.correct_location(from_location)
|
| 200 |
-
if from_confidence > 0.5:
|
| 201 |
-
locations.append((
|
| 202 |
-
from_location,
|
| 203 |
-
from_corrected,
|
| 204 |
-
from_confidence,
|
| 205 |
-
from_method
|
| 206 |
-
))
|
| 207 |
-
|
| 208 |
-
if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
|
| 209 |
-
to_corrected, to_confidence, to_method = self.correct_location(to_location)
|
| 210 |
-
if to_confidence > 0.5:
|
| 211 |
-
locations.append((
|
| 212 |
-
to_location,
|
| 213 |
-
to_corrected,
|
| 214 |
-
to_confidence,
|
| 215 |
-
to_method
|
| 216 |
-
))
|
| 217 |
-
|
| 218 |
-
# If we found locations, break to avoid duplicates
|
| 219 |
-
if len(locations) >= 2:
|
| 220 |
-
break
|
| 221 |
-
|
| 222 |
-
return locations
|
| 223 |
-
|
| 224 |
-
def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
|
| 225 |
-
"""Get location suggestions for autocomplete"""
|
| 226 |
-
suggestions = []
|
| 227 |
-
partial_lower = partial_location.lower()
|
| 228 |
-
|
| 229 |
-
for location in self.available_locations:
|
| 230 |
-
location_lower = location.lower()
|
| 231 |
-
|
| 232 |
-
# Check if partial location is a prefix
|
| 233 |
-
if location_lower.startswith(partial_lower):
|
| 234 |
-
suggestions.append((location, 1.0))
|
| 235 |
-
# Check fuzzy similarity
|
| 236 |
-
elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
|
| 237 |
-
suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
|
| 238 |
-
|
| 239 |
-
# Sort by confidence and return top suggestions
|
| 240 |
-
suggestions.sort(key=lambda x: x[1], reverse=True)
|
| 241 |
-
return suggestions[:self.config.MAX_SUGGESTIONS]
|
| 242 |
-
|
| 243 |
-
def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
|
| 244 |
-
"""Validate if a route exists in the database"""
|
| 245 |
-
from_corrected, from_confidence, _ = self.correct_location(from_location)
|
| 246 |
-
to_corrected, to_confidence, _ = self.correct_location(to_location)
|
| 247 |
-
|
| 248 |
-
if from_confidence < 0.5:
|
| 249 |
-
return False, f"Could not identify departure location: '{from_location}'"
|
| 250 |
-
|
| 251 |
-
if to_confidence < 0.5:
|
| 252 |
-
return False, f"Could not identify destination location: '{to_location}'"
|
| 253 |
-
|
| 254 |
-
if from_corrected == to_corrected:
|
| 255 |
-
return False, f"Departure and destination cannot be the same: '{from_corrected}'"
|
| 256 |
-
|
| 257 |
-
return True, f"Route: {from_corrected} → {to_corrected}"
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Spell Correction Module for Transport Query Application
|
| 4 |
+
Handles location name corrections using fuzzy matching and LLM
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from fuzzywuzzy import fuzz
|
| 9 |
+
from typing import List, Tuple, Optional
|
| 10 |
+
import openai
|
| 11 |
+
from config import Config
|
| 12 |
+
|
| 13 |
+
class SpellCorrector:
|
| 14 |
+
"""Spell correction for location names"""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.config = Config()
|
| 18 |
+
self.location_mapping = self.config.LOCATION_MAPPING
|
| 19 |
+
self.available_locations = set(self.location_mapping.values())
|
| 20 |
+
|
| 21 |
+
# Initialize OpenAI if API key is available
|
| 22 |
+
if self.config.OPENAI_API_KEY:
|
| 23 |
+
try:
|
| 24 |
+
# Prefer new SDK client if installed; otherwise set legacy api key
|
| 25 |
+
try:
|
| 26 |
+
from openai import OpenAI # noqa: F401
|
| 27 |
+
self.llm_available = True
|
| 28 |
+
except Exception:
|
| 29 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 30 |
+
self.llm_available = True
|
| 31 |
+
except Exception:
|
| 32 |
+
self.llm_available = False
|
| 33 |
+
else:
|
| 34 |
+
self.llm_available = False
|
| 35 |
+
|
| 36 |
+
def correct_location(self, location: str) -> Tuple[str, float, str]:
|
| 37 |
+
"""
|
| 38 |
+
Correct a location name using multiple methods
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Tuple of (corrected_name, confidence_score, correction_method)
|
| 42 |
+
"""
|
| 43 |
+
location = location.strip().lower()
|
| 44 |
+
|
| 45 |
+
# Method 1: Direct mapping
|
| 46 |
+
if location in self.location_mapping:
|
| 47 |
+
corrected = self.location_mapping[location]
|
| 48 |
+
return corrected, 1.0, "direct_mapping"
|
| 49 |
+
|
| 50 |
+
# Method 2: Fuzzy matching
|
| 51 |
+
best_match, confidence = self._fuzzy_match(location)
|
| 52 |
+
if confidence >= self.config.SIMILARITY_THRESHOLD:
|
| 53 |
+
return best_match, confidence, "fuzzy_matching"
|
| 54 |
+
|
| 55 |
+
# Method 3: LLM correction (if available)
|
| 56 |
+
if self.llm_available:
|
| 57 |
+
llm_corrected = self._llm_correct(location)
|
| 58 |
+
if llm_corrected:
|
| 59 |
+
# Verify LLM suggestion with fuzzy matching
|
| 60 |
+
llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
|
| 61 |
+
if llm_confidence >= 0.6: # Lower threshold for LLM suggestions
|
| 62 |
+
return llm_corrected, llm_confidence, "llm_correction"
|
| 63 |
+
|
| 64 |
+
# Method 4: Partial matching
|
| 65 |
+
partial_match = self._partial_match(location)
|
| 66 |
+
if partial_match:
|
| 67 |
+
return partial_match, 0.7, "partial_matching"
|
| 68 |
+
|
| 69 |
+
# No correction found
|
| 70 |
+
return location.title(), 0.0, "no_correction"
|
| 71 |
+
|
| 72 |
+
def _fuzzy_match(self, location: str) -> Tuple[str, float]:
|
| 73 |
+
"""Find best fuzzy match for location"""
|
| 74 |
+
best_match = None
|
| 75 |
+
best_score = 0
|
| 76 |
+
|
| 77 |
+
for available_location in self.available_locations:
|
| 78 |
+
score = fuzz.ratio(location.lower(), available_location.lower()) / 100
|
| 79 |
+
if score > best_score:
|
| 80 |
+
best_score = score
|
| 81 |
+
best_match = available_location
|
| 82 |
+
|
| 83 |
+
return best_match, best_score
|
| 84 |
+
|
| 85 |
+
def _partial_match(self, location: str) -> Optional[str]:
|
| 86 |
+
"""Find partial matches (substring matching)"""
|
| 87 |
+
location_lower = location.lower()
|
| 88 |
+
|
| 89 |
+
for available_location in self.available_locations:
|
| 90 |
+
available_lower = available_location.lower()
|
| 91 |
+
|
| 92 |
+
# Check if location is contained in available location
|
| 93 |
+
if location_lower in available_lower or available_lower in location_lower:
|
| 94 |
+
return available_location
|
| 95 |
+
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
def _llm_correct(self, location: str) -> Optional[str]:
|
| 99 |
+
"""Use LLM to correct location name"""
|
| 100 |
+
try:
|
| 101 |
+
prompt = f"""
|
| 102 |
+
You are a location name correction system for Sri Lankan cities and towns.
|
| 103 |
+
Given a potentially misspelled location name, return the correct spelling.
|
| 104 |
+
|
| 105 |
+
Available locations include: {', '.join(sorted(self.available_locations))}
|
| 106 |
+
|
| 107 |
+
Input location: "{location}"
|
| 108 |
+
|
| 109 |
+
Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
corrected = None
|
| 113 |
+
# Try new SDK first
|
| 114 |
+
try:
|
| 115 |
+
from openai import OpenAI
|
| 116 |
+
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 117 |
+
response = client.chat.completions.create(
|
| 118 |
+
model=self.config.OPENAI_MODEL,
|
| 119 |
+
messages=[
|
| 120 |
+
{"role": "system", "content": "You are a helpful assistant that corrects location names."},
|
| 121 |
+
{"role": "user", "content": prompt}
|
| 122 |
+
],
|
| 123 |
+
max_tokens=50,
|
| 124 |
+
temperature=0.1
|
| 125 |
+
)
|
| 126 |
+
corrected = response.choices[0].message.content.strip()
|
| 127 |
+
except Exception as sdk_err:
|
| 128 |
+
# Fallback to legacy API if present
|
| 129 |
+
import openai
|
| 130 |
+
try:
|
| 131 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 132 |
+
response = openai.ChatCompletion.create(
|
| 133 |
+
model=self.config.OPENAI_MODEL,
|
| 134 |
+
messages=[
|
| 135 |
+
{"role": "system", "content": "You are a helpful assistant that corrects location names."},
|
| 136 |
+
{"role": "user", "content": prompt}
|
| 137 |
+
],
|
| 138 |
+
max_tokens=50,
|
| 139 |
+
temperature=0.1
|
| 140 |
+
)
|
| 141 |
+
corrected = response.choices[0].message.content.strip()
|
| 142 |
+
except Exception:
|
| 143 |
+
raise sdk_err
|
| 144 |
+
|
| 145 |
+
# Validate LLM response
|
| 146 |
+
if corrected.upper() == "UNKNOWN":
|
| 147 |
+
return None
|
| 148 |
+
|
| 149 |
+
# Check if corrected location exists in our database
|
| 150 |
+
if corrected in self.available_locations:
|
| 151 |
+
return corrected
|
| 152 |
+
|
| 153 |
+
# Try fuzzy matching on LLM response
|
| 154 |
+
llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
|
| 155 |
+
if confidence >= 0.8:
|
| 156 |
+
return llm_fuzzy_match
|
| 157 |
+
|
| 158 |
+
return None
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f"LLM correction error: {e}")
|
| 162 |
+
return None
|
| 163 |
+
|
| 164 |
+
def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
|
| 165 |
+
"""
|
| 166 |
+
Extract and correct locations from a natural language query
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
List of tuples: (original, corrected, confidence, method)
|
| 170 |
+
"""
|
| 171 |
+
# Common patterns for location extraction
|
| 172 |
+
patterns = [
|
| 173 |
+
r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 174 |
+
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 175 |
+
r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 176 |
+
r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 177 |
+
r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 178 |
+
r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 179 |
+
r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 180 |
+
r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 181 |
+
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
|
| 182 |
+
r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
|
| 183 |
+
]
|
| 184 |
+
|
| 185 |
+
locations = []
|
| 186 |
+
|
| 187 |
+
# Try all patterns to find locations
|
| 188 |
+
for pattern in patterns:
|
| 189 |
+
match = re.search(pattern, query, re.IGNORECASE)
|
| 190 |
+
if match:
|
| 191 |
+
# Extract locations from the match
|
| 192 |
+
groups = match.groups()
|
| 193 |
+
if len(groups) >= 2:
|
| 194 |
+
from_location = groups[0].strip()
|
| 195 |
+
to_location = groups[1].strip()
|
| 196 |
+
|
| 197 |
+
# Skip if locations are too short or common words
|
| 198 |
+
if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
|
| 199 |
+
from_corrected, from_confidence, from_method = self.correct_location(from_location)
|
| 200 |
+
if from_confidence > 0.5:
|
| 201 |
+
locations.append((
|
| 202 |
+
from_location,
|
| 203 |
+
from_corrected,
|
| 204 |
+
from_confidence,
|
| 205 |
+
from_method
|
| 206 |
+
))
|
| 207 |
+
|
| 208 |
+
if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
|
| 209 |
+
to_corrected, to_confidence, to_method = self.correct_location(to_location)
|
| 210 |
+
if to_confidence > 0.5:
|
| 211 |
+
locations.append((
|
| 212 |
+
to_location,
|
| 213 |
+
to_corrected,
|
| 214 |
+
to_confidence,
|
| 215 |
+
to_method
|
| 216 |
+
))
|
| 217 |
+
|
| 218 |
+
# If we found locations, break to avoid duplicates
|
| 219 |
+
if len(locations) >= 2:
|
| 220 |
+
break
|
| 221 |
+
|
| 222 |
+
return locations
|
| 223 |
+
|
| 224 |
+
def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
|
| 225 |
+
"""Get location suggestions for autocomplete"""
|
| 226 |
+
suggestions = []
|
| 227 |
+
partial_lower = partial_location.lower()
|
| 228 |
+
|
| 229 |
+
for location in self.available_locations:
|
| 230 |
+
location_lower = location.lower()
|
| 231 |
+
|
| 232 |
+
# Check if partial location is a prefix
|
| 233 |
+
if location_lower.startswith(partial_lower):
|
| 234 |
+
suggestions.append((location, 1.0))
|
| 235 |
+
# Check fuzzy similarity
|
| 236 |
+
elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
|
| 237 |
+
suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
|
| 238 |
+
|
| 239 |
+
# Sort by confidence and return top suggestions
|
| 240 |
+
suggestions.sort(key=lambda x: x[1], reverse=True)
|
| 241 |
+
return suggestions[:self.config.MAX_SUGGESTIONS]
|
| 242 |
+
|
| 243 |
+
def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
|
| 244 |
+
"""Validate if a route exists in the database"""
|
| 245 |
+
from_corrected, from_confidence, _ = self.correct_location(from_location)
|
| 246 |
+
to_corrected, to_confidence, _ = self.correct_location(to_location)
|
| 247 |
+
|
| 248 |
+
if from_confidence < 0.5:
|
| 249 |
+
return False, f"Could not identify departure location: '{from_location}'"
|
| 250 |
+
|
| 251 |
+
if to_confidence < 0.5:
|
| 252 |
+
return False, f"Could not identify destination location: '{to_location}'"
|
| 253 |
+
|
| 254 |
+
if from_corrected == to_corrected:
|
| 255 |
+
return False, f"Departure and destination cannot be the same: '{from_corrected}'"
|
| 256 |
+
|
| 257 |
+
return True, f"Route: {from_corrected} → {to_corrected}"
|
translation_service.py
CHANGED
|
@@ -1,702 +1,1057 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Translation Service for
|
| 4 |
-
Handles translation of queries and responses
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import requests
|
| 8 |
-
import json
|
| 9 |
-
import re
|
| 10 |
-
import openai
|
| 11 |
-
from typing import Dict, Any, Optional
|
| 12 |
-
from config import Config
|
| 13 |
-
from logger import get_logger
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
self.
|
| 19 |
-
self.
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
self.
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
self.
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
'
|
| 37 |
-
'
|
| 38 |
-
'
|
| 39 |
-
'
|
| 40 |
-
'
|
| 41 |
-
'
|
| 42 |
-
'
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
'
|
| 48 |
-
'
|
| 49 |
-
'
|
| 50 |
-
'
|
| 51 |
-
'
|
| 52 |
-
'
|
| 53 |
-
'
|
| 54 |
-
'
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
'
|
| 58 |
-
'
|
| 59 |
-
'
|
| 60 |
-
'
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
'
|
| 64 |
-
'
|
| 65 |
-
'
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
'
|
| 71 |
-
'
|
| 72 |
-
'
|
| 73 |
-
'
|
| 74 |
-
'
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
'
|
| 78 |
-
'
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
'
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
'
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
'
|
| 88 |
-
'
|
| 89 |
-
'
|
| 90 |
-
'
|
| 91 |
-
'
|
| 92 |
-
'
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
'
|
| 98 |
-
'
|
| 99 |
-
|
| 100 |
-
# Common
|
| 101 |
-
'
|
| 102 |
-
'
|
| 103 |
-
'
|
| 104 |
-
'
|
| 105 |
-
'
|
| 106 |
-
'
|
| 107 |
-
'
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
'
|
| 116 |
-
'
|
| 117 |
-
'
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
'
|
| 121 |
-
'
|
| 122 |
-
'
|
| 123 |
-
'
|
| 124 |
-
'
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
'
|
| 128 |
-
'
|
| 129 |
-
'
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
#
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
return
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
try:
|
| 303 |
-
#
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
'
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
'
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
'
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
'
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
'
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
'
|
| 694 |
-
'
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Enhanced Translation Service for Multi-Language Support
|
| 4 |
+
Handles translation of queries and responses for Sinhala, Tamil, Singlish, and English
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import requests
|
| 8 |
+
import json
|
| 9 |
+
import re
|
| 10 |
+
import openai
|
| 11 |
+
from typing import Dict, Any, Optional
|
| 12 |
+
from config import Config
|
| 13 |
+
from logger import get_logger
|
| 14 |
+
from language_detector import LanguageDetector
|
| 15 |
+
|
| 16 |
+
class TranslationService:
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.config = Config()
|
| 19 |
+
self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
|
| 20 |
+
self.logger = get_logger(self.__class__.__name__)
|
| 21 |
+
self.language_detector = LanguageDetector()
|
| 22 |
+
|
| 23 |
+
# Controls
|
| 24 |
+
import os
|
| 25 |
+
self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
|
| 26 |
+
self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
|
| 27 |
+
self.last_translation_method: Optional[str] = None
|
| 28 |
+
|
| 29 |
+
# Free translation APIs
|
| 30 |
+
self.libre_translate_url = "https://libretranslate.de/translate" # Free public instance
|
| 31 |
+
self.mymemory_url = "https://api.mymemory.translated.net/get"
|
| 32 |
+
|
| 33 |
+
# Tamil transport terms and their English equivalents
|
| 34 |
+
self.tamil_transport_terms = {
|
| 35 |
+
# Fare related
|
| 36 |
+
'எவ்வளவு': 'how much',
|
| 37 |
+
'விலை': 'price',
|
| 38 |
+
'கட்டணம்': 'fare',
|
| 39 |
+
'செலவு': 'cost',
|
| 40 |
+
'பேருந்து கட்டணம்': 'bus fare',
|
| 41 |
+
'ரயில் கட்டணம்': 'train fare',
|
| 42 |
+
'டிக்கெட் விலை': 'ticket price',
|
| 43 |
+
|
| 44 |
+
# Locations
|
| 45 |
+
'கொழும்பு': 'Colombo',
|
| 46 |
+
'கண்டி': 'Kandy',
|
| 47 |
+
'காலி': 'Galle',
|
| 48 |
+
'மாத்தறை': 'Matara',
|
| 49 |
+
'அனுராதபுரம்': 'Anuradhapura',
|
| 50 |
+
'பனதுரை': 'Panadura',
|
| 51 |
+
'அலுத்துகமா': 'Aluthgama',
|
| 52 |
+
'நுகேகோடா': 'Nugegoda',
|
| 53 |
+
'தெஹிவாலா': 'Dehiwala',
|
| 54 |
+
'மொரட்டுவா': 'Moratuwa',
|
| 55 |
+
|
| 56 |
+
# Direction words
|
| 57 |
+
'இருந்து': 'from',
|
| 58 |
+
'வரை': 'to',
|
| 59 |
+
'வழியாக': 'via',
|
| 60 |
+
'மூலம்': 'through',
|
| 61 |
+
|
| 62 |
+
# Question words
|
| 63 |
+
'எங்கே': 'where',
|
| 64 |
+
'எப்போது': 'when',
|
| 65 |
+
'எப்படி': 'how',
|
| 66 |
+
'என்ன': 'what',
|
| 67 |
+
'யார்': 'who',
|
| 68 |
+
|
| 69 |
+
# Comparison words
|
| 70 |
+
'உடன்': 'with',
|
| 71 |
+
'மற்றும்': 'and',
|
| 72 |
+
'அல்லது': 'or',
|
| 73 |
+
'அதிகம்': 'more',
|
| 74 |
+
'குறைவு': 'less',
|
| 75 |
+
'ஒரே': 'same',
|
| 76 |
+
'வேறு': 'different',
|
| 77 |
+
'ஒப்பிடு': 'compare',
|
| 78 |
+
'வித்தியாசம்': 'difference',
|
| 79 |
+
|
| 80 |
+
# Time words
|
| 81 |
+
'இப்போது': 'now',
|
| 82 |
+
'இன்று': 'today',
|
| 83 |
+
'நாளை': 'tomorrow',
|
| 84 |
+
'நேற்று': 'yesterday',
|
| 85 |
+
|
| 86 |
+
# Common verbs
|
| 87 |
+
'போ': 'go',
|
| 88 |
+
'வா': 'come',
|
| 89 |
+
'பார்': 'see',
|
| 90 |
+
'தெரிந்து கொள்': 'know',
|
| 91 |
+
'கண்டுபிடி': 'find',
|
| 92 |
+
'கற்றுக்கொள்': 'learn',
|
| 93 |
+
'பரிந்துரை': 'recommend',
|
| 94 |
+
'கா���்டு': 'show',
|
| 95 |
+
|
| 96 |
+
# Numbers and currency
|
| 97 |
+
'ரூபாய்': 'rupees',
|
| 98 |
+
'ரூ': 'rupees',
|
| 99 |
+
|
| 100 |
+
# Common phrases
|
| 101 |
+
'இடையில்': 'between',
|
| 102 |
+
'உடன்': 'with',
|
| 103 |
+
'பாதைகள்': 'routes',
|
| 104 |
+
'பிரபலமான': 'popular',
|
| 105 |
+
'சராசரி': 'average',
|
| 106 |
+
'தரவு': 'data',
|
| 107 |
+
'புள்ளிவிவரங்கள்': 'statistics'
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
# Common transport terms in Sinhala and their English equivalents
|
| 111 |
+
self.transport_terms = {
|
| 112 |
+
# Fare related
|
| 113 |
+
'කීයද': 'how much',
|
| 114 |
+
'මිල': 'price',
|
| 115 |
+
'වාරික': 'fare',
|
| 116 |
+
'වාරිකය': 'fare',
|
| 117 |
+
'වාරිකව': 'fare',
|
| 118 |
+
'ගාස්තු': 'fare',
|
| 119 |
+
'ගාස්තුව': 'fare',
|
| 120 |
+
'ප්රවාහන ගාස්තු': 'transport fare',
|
| 121 |
+
'බස් ගාස්තු': 'bus fare',
|
| 122 |
+
'බස් ගාස්තුව': 'bus fare',
|
| 123 |
+
'රේල් ගාස්තු': 'train fare',
|
| 124 |
+
'රේල් ගාස්තුව': 'train fare',
|
| 125 |
+
|
| 126 |
+
# Locations
|
| 127 |
+
'කොළඹ': 'Colombo',
|
| 128 |
+
'මහනුවර': 'Kandy',
|
| 129 |
+
'මහනුවරට': 'Kandy',
|
| 130 |
+
'ගාල්ල': 'Galle',
|
| 131 |
+
'ගාල්ලට': 'Galle',
|
| 132 |
+
'මාතර': 'Matara',
|
| 133 |
+
'මාතරට': 'Matara',
|
| 134 |
+
'අනුරාධපුර': 'Anuradhapura',
|
| 135 |
+
'අනුරාධපුරට': 'Anuradhapura',
|
| 136 |
+
'පානදුර': 'Panadura',
|
| 137 |
+
'පානදුරට': 'Panadura',
|
| 138 |
+
'අලුත්ගම': 'Aluthgama',
|
| 139 |
+
'අලුත්ගමට': 'Aluthgama',
|
| 140 |
+
'නුගේගොඩ': 'Nugegoda',
|
| 141 |
+
'නුගේගොඩට': 'Nugegoda',
|
| 142 |
+
'දෙහිවල': 'Dehiwala',
|
| 143 |
+
'දෙහිවලට': 'Dehiwala',
|
| 144 |
+
'මොරටුව': 'Moratuwa',
|
| 145 |
+
'මොරටුවට': 'Moratuwa',
|
| 146 |
+
|
| 147 |
+
# Direction words
|
| 148 |
+
'වලින්': 'from',
|
| 149 |
+
'වල': 'from',
|
| 150 |
+
'ට': 'to',
|
| 151 |
+
'වෙත': 'to',
|
| 152 |
+
'සිට': 'from',
|
| 153 |
+
'දක්වා': 'to',
|
| 154 |
+
'සි': 'from',
|
| 155 |
+
|
| 156 |
+
# Question words
|
| 157 |
+
'කොහෙද': 'where',
|
| 158 |
+
'කවදාද': 'when',
|
| 159 |
+
'කොහොමද': 'how',
|
| 160 |
+
'මොනවාද': 'what',
|
| 161 |
+
'කවුද': 'who',
|
| 162 |
+
|
| 163 |
+
# Comparison words
|
| 164 |
+
'සමඟ': 'with',
|
| 165 |
+
'සහ': 'and',
|
| 166 |
+
'හෝ': 'or',
|
| 167 |
+
'වඩා': 'more',
|
| 168 |
+
'අඩු': 'less',
|
| 169 |
+
'සමාන': 'same',
|
| 170 |
+
'වෙනස': 'different',
|
| 171 |
+
'සසඳන්න': 'compare',
|
| 172 |
+
'සසඳන': 'compare',
|
| 173 |
+
|
| 174 |
+
# Time words
|
| 175 |
+
'දැන්': 'now',
|
| 176 |
+
'අද': 'today',
|
| 177 |
+
'හෙට': 'tomorrow',
|
| 178 |
+
'ඊයේ': 'yesterday',
|
| 179 |
+
|
| 180 |
+
# Common verbs
|
| 181 |
+
'යන්න': 'go',
|
| 182 |
+
'යන': 'go',
|
| 183 |
+
'එන්න': 'come',
|
| 184 |
+
'බලන්න': 'see',
|
| 185 |
+
'දැනගන්න': 'know',
|
| 186 |
+
'සොයන්න': 'find',
|
| 187 |
+
'සොයන': 'find',
|
| 188 |
+
'ඉගෙනගන්න': 'learn',
|
| 189 |
+
'නිර්දේශ': 'recommend',
|
| 190 |
+
'නිර්දේශ කරන්න': 'recommend',
|
| 191 |
+
'පෙන්වන්න': 'show',
|
| 192 |
+
'පෙන්වන': 'show',
|
| 193 |
+
|
| 194 |
+
# Numbers and currency
|
| 195 |
+
'රුපියල්': 'rupees',
|
| 196 |
+
'රු': 'rupees',
|
| 197 |
+
'රුපියල': 'rupees',
|
| 198 |
+
|
| 199 |
+
# Common phrases
|
| 200 |
+
'අතර': 'between',
|
| 201 |
+
'සහිත': 'with',
|
| 202 |
+
'මාර්ග': 'routes',
|
| 203 |
+
'මාර්ගවල': 'routes',
|
| 204 |
+
'ගමනාන්ත': 'destinations',
|
| 205 |
+
'ප්රසිද්ධ': 'popular',
|
| 206 |
+
'සාමාන්ය': 'average',
|
| 207 |
+
'සාමාන්යය': 'average',
|
| 208 |
+
'දත්ත': 'data',
|
| 209 |
+
'සංඛ්යාලේඛන': 'statistics'
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
# Sinhala script detection pattern
|
| 213 |
+
self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
|
| 214 |
+
|
| 215 |
+
def is_sinhala_text(self, text: str) -> bool:
|
| 216 |
+
"""Check if text contains Sinhala characters"""
|
| 217 |
+
detected = bool(self.sinhala_pattern.search(text))
|
| 218 |
+
self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
|
| 219 |
+
return detected
|
| 220 |
+
|
| 221 |
+
def is_tamil_text(self, text: str) -> bool:
|
| 222 |
+
"""Check if text contains Tamil characters"""
|
| 223 |
+
tamil_pattern = re.compile(r'[\u0B80-\u0BFF]')
|
| 224 |
+
detected = bool(tamil_pattern.search(text))
|
| 225 |
+
self.logger.debug(f"Tamil detection: detected={detected}, text='{text}'")
|
| 226 |
+
return detected
|
| 227 |
+
|
| 228 |
+
def is_singlish_text(self, text: str) -> bool:
|
| 229 |
+
"""Check if text is Singlish (Sinhala-English mixed)"""
|
| 230 |
+
detection_result = self.language_detector.detect_language(text)
|
| 231 |
+
return detection_result['language'] == 'singlish'
|
| 232 |
+
|
| 233 |
+
def _map_sinhala_place(self, text: str) -> str:
|
| 234 |
+
"""Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
|
| 235 |
+
candidate = text.strip()
|
| 236 |
+
# Direct map
|
| 237 |
+
if candidate in self.transport_terms:
|
| 238 |
+
return self.transport_terms[candidate]
|
| 239 |
+
# Strip common Sinhala case particles/suffixes and try again
|
| 240 |
+
base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
|
| 241 |
+
if base in self.transport_terms:
|
| 242 |
+
return self.transport_terms[base]
|
| 243 |
+
return candidate
|
| 244 |
+
|
| 245 |
+
def _map_tamil_place(self, text: str) -> str:
|
| 246 |
+
"""Map a Tamil place token to its English equivalent using known terms and suffix stripping."""
|
| 247 |
+
candidate = text.strip()
|
| 248 |
+
# Direct map
|
| 249 |
+
if candidate in self.tamil_transport_terms:
|
| 250 |
+
return self.tamil_transport_terms[candidate]
|
| 251 |
+
# Strip common Tamil case particles/suffixes and try again
|
| 252 |
+
base = re.sub(r'(இருந்து|வரை|வழியாக|மூலம்)$', '', candidate)
|
| 253 |
+
if base in self.tamil_transport_terms:
|
| 254 |
+
return self.tamil_transport_terms[base]
|
| 255 |
+
return candidate
|
| 256 |
+
|
| 257 |
+
def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
|
| 258 |
+
"""Detect simple Sinhala fare queries and build a clean English query.
|
| 259 |
+
Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
|
| 260 |
+
"""
|
| 261 |
+
try:
|
| 262 |
+
# Quick check for fare-related tokens to avoid false positives
|
| 263 |
+
if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
|
| 264 |
+
return None
|
| 265 |
+
# Extract source and destination around Sinhala "from" and "to" particles
|
| 266 |
+
m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
|
| 267 |
+
if not m:
|
| 268 |
+
return None
|
| 269 |
+
src_si = m.group(1).strip()
|
| 270 |
+
dst_si = m.group(2).strip()
|
| 271 |
+
src_en = self._map_sinhala_place(src_si)
|
| 272 |
+
dst_en = self._map_sinhala_place(dst_si)
|
| 273 |
+
return f"What is the fare from {src_en} to {dst_en}?"
|
| 274 |
+
except Exception:
|
| 275 |
+
return None
|
| 276 |
+
|
| 277 |
+
def _parse_tamil_fare_query(self, query: str) -> Optional[str]:
|
| 278 |
+
"""Detect simple Tamil fare queries and build a clean English query.
|
| 279 |
+
Example handled: "கொழும்பு இருந்து கண்டி வரை கட்டணம் எவ்வளவு?" -> "What is the fare from Colombo to Kandy?"
|
| 280 |
+
"""
|
| 281 |
+
try:
|
| 282 |
+
# Quick check for fare-related tokens to avoid false positives
|
| 283 |
+
if not any(tok in query for tok in ['கட்டணம்', 'விலை', 'செலவு', 'எவ்வளவு']):
|
| 284 |
+
return None
|
| 285 |
+
# Extract source and destination around Tamil "from" and "to" particles
|
| 286 |
+
m = re.search(r'([\u0B80-\u0BFF\s]+?)\s*இருந்து\s*([\u0B80-\u0BFF\s]+?)(?:வரை|வழியாக)', query)
|
| 287 |
+
if not m:
|
| 288 |
+
return None
|
| 289 |
+
src_ta = m.group(1).strip()
|
| 290 |
+
dst_ta = m.group(2).strip()
|
| 291 |
+
src_en = self._map_tamil_place(src_ta)
|
| 292 |
+
dst_en = self._map_tamil_place(dst_ta)
|
| 293 |
+
return f"What is the fare from {src_en} to {dst_en}?"
|
| 294 |
+
except Exception:
|
| 295 |
+
return None
|
| 296 |
+
|
| 297 |
+
def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
|
| 298 |
+
"""Translate using OpenAI LLM (new SDK). Preserve original intent (comparison, lists, conjunctions)."""
|
| 299 |
+
if not self.openai_api_key:
|
| 300 |
+
return None
|
| 301 |
+
|
| 302 |
+
try:
|
| 303 |
+
# Determine source language
|
| 304 |
+
if source_lang == 'auto':
|
| 305 |
+
detection_result = self.language_detector.detect_language(text)
|
| 306 |
+
detected_lang = detection_result['language']
|
| 307 |
+
if detected_lang == 'sinhala':
|
| 308 |
+
source_lang = 'si'
|
| 309 |
+
elif detected_lang == 'tamil':
|
| 310 |
+
source_lang = 'ta'
|
| 311 |
+
elif detected_lang == 'singlish':
|
| 312 |
+
source_lang = 'singlish'
|
| 313 |
+
else:
|
| 314 |
+
source_lang = 'en'
|
| 315 |
+
|
| 316 |
+
# Create language mapping
|
| 317 |
+
lang_map = {
|
| 318 |
+
('si', 'en'): 'Sinhala to English',
|
| 319 |
+
('en', 'si'): 'English to Sinhala',
|
| 320 |
+
('ta', 'en'): 'Tamil to English',
|
| 321 |
+
('en', 'ta'): 'English to Tamil',
|
| 322 |
+
('singlish', 'en'): 'Singlish to English'
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
direction = lang_map.get((source_lang, target_lang))
|
| 326 |
+
if not direction:
|
| 327 |
+
return None
|
| 328 |
+
|
| 329 |
+
prompt = f"""
|
| 330 |
+
Translate the following text from {direction}.
|
| 331 |
+
Output only the translated text without quotes or extra commentary.
|
| 332 |
+
Critically: Preserve the original intent and structure. Do not simplify.
|
| 333 |
+
- If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
|
| 334 |
+
- Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
|
| 335 |
+
- Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
|
| 336 |
+
Use standard English city names:
|
| 337 |
+
- මහනුවර = Kandy (not Mahanuwara)
|
| 338 |
+
- කොළඹ = Colombo
|
| 339 |
+
- ගාල්ල = Galle
|
| 340 |
+
- මාතර = Matara
|
| 341 |
+
- අනුරාධපුර = Anuradhapura
|
| 342 |
+
|
| 343 |
+
Text to translate: {text}
|
| 344 |
+
"""
|
| 345 |
+
|
| 346 |
+
# Build few-shot examples to preserve comparison/imperative structure
|
| 347 |
+
examples = []
|
| 348 |
+
|
| 349 |
+
# Add examples based on source language
|
| 350 |
+
if source_lang == 'si':
|
| 351 |
+
examples = [
|
| 352 |
+
(
|
| 353 |
+
"කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
|
| 354 |
+
"What is the bus fare from Colombo to Kandy?"
|
| 355 |
+
),
|
| 356 |
+
(
|
| 357 |
+
"කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
|
| 358 |
+
"What is the ticket price from Colombo to Galle?"
|
| 359 |
+
),
|
| 360 |
+
(
|
| 361 |
+
"කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල்ල දක්වා ගාස්තු සසඳා බලන්න.",
|
| 362 |
+
"Compare fares from Colombo to Panadura and from Colombo to Galle."
|
| 363 |
+
),
|
| 364 |
+
(
|
| 365 |
+
"රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
|
| 366 |
+
"Show routes with fares under 500 rupees."
|
| 367 |
+
),
|
| 368 |
+
(
|
| 369 |
+
"අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
|
| 370 |
+
"Recommend cheap routes."
|
| 371 |
+
),
|
| 372 |
+
]
|
| 373 |
+
elif source_lang == 'ta':
|
| 374 |
+
examples = [
|
| 375 |
+
(
|
| 376 |
+
"கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?",
|
| 377 |
+
"What is the bus fare from Colombo to Kandy?"
|
| 378 |
+
),
|
| 379 |
+
(
|
| 380 |
+
"கொழும்பு இருந்து காலி வரை டிக்கெட் விலை எவ்வளவு?",
|
| 381 |
+
"What is the ticket price from Colombo to Galle?"
|
| 382 |
+
),
|
| 383 |
+
(
|
| 384 |
+
"கொழும்பு இருந்து பனதுரை வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.",
|
| 385 |
+
"Compare fares from Colombo to Panadura and from Colombo to Galle."
|
| 386 |
+
),
|
| 387 |
+
(
|
| 388 |
+
"ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை காட்டு.",
|
| 389 |
+
"Show routes with fares under 500 rupees."
|
| 390 |
+
),
|
| 391 |
+
(
|
| 392 |
+
"குறைந்த விலையில் பாதைகளை பரிந்துரை.",
|
| 393 |
+
"Recommend cheap routes."
|
| 394 |
+
),
|
| 395 |
+
]
|
| 396 |
+
elif source_lang == 'singlish':
|
| 397 |
+
examples = [
|
| 398 |
+
(
|
| 399 |
+
"කොළඹ සිට Kandy ගාස්තුව කීයද?",
|
| 400 |
+
"What is the fare from Colombo to Kandy?"
|
| 401 |
+
),
|
| 402 |
+
(
|
| 403 |
+
"Colombo සිට ගාල්ල�� bus fare කීයද?",
|
| 404 |
+
"What is the bus fare from Colombo to Galle?"
|
| 405 |
+
),
|
| 406 |
+
(
|
| 407 |
+
"කොළඹ සිට Panadura සහ Colombo සිට Galle fares compare කරන්න.",
|
| 408 |
+
"Compare fares from Colombo to Panadura and from Colombo to Galle."
|
| 409 |
+
),
|
| 410 |
+
]
|
| 411 |
+
|
| 412 |
+
# Compose messages with few-shot conditioning
|
| 413 |
+
def build_messages(txt: str):
|
| 414 |
+
msgs = [
|
| 415 |
+
{
|
| 416 |
+
"role": "system",
|
| 417 |
+
"content": (
|
| 418 |
+
"You are a professional translator. Translate accurately and naturally. "
|
| 419 |
+
"Preserve imperative/comparative intent and list structure. Do not paraphrase. "
|
| 420 |
+
"Return only the English translation without quotes. "
|
| 421 |
+
"Canonical phrasing rules (use exactly): \n"
|
| 422 |
+
"- Use 'Compare' for comparison requests.\n"
|
| 423 |
+
"- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
|
| 424 |
+
"- Use 'How much is the' for 'කීයද' fare/price questions.\n"
|
| 425 |
+
"- Use 'cheap' (not 'affordable').\n"
|
| 426 |
+
"- Use 'under' (not 'below') for '< value'.\n"
|
| 427 |
+
),
|
| 428 |
+
},
|
| 429 |
+
{
|
| 430 |
+
"role": "user",
|
| 431 |
+
"content": (
|
| 432 |
+
"Instructions: Preserve structure. Use 'Compare' for 'සසඳ', use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
|
| 433 |
+
"Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura."
|
| 434 |
+
),
|
| 435 |
+
},
|
| 436 |
+
]
|
| 437 |
+
for si, en in examples:
|
| 438 |
+
msgs.append({"role": "user", "content": f"Sinhala: {si}\nEnglish:"})
|
| 439 |
+
msgs.append({"role": "assistant", "content": en})
|
| 440 |
+
msgs.append({"role": "user", "content": f"Sinhala: {txt}\nEnglish:"})
|
| 441 |
+
return msgs
|
| 442 |
+
|
| 443 |
+
# Use new OpenAI SDK
|
| 444 |
+
try:
|
| 445 |
+
from openai import OpenAI
|
| 446 |
+
client = OpenAI(api_key=self.openai_api_key)
|
| 447 |
+
response = client.chat.completions.create(
|
| 448 |
+
model="gpt-3.5-turbo",
|
| 449 |
+
max_tokens=150,
|
| 450 |
+
temperature=0.3,
|
| 451 |
+
messages=build_messages(text)
|
| 452 |
+
)
|
| 453 |
+
translated = response.choices[0].message.content.strip()
|
| 454 |
+
self.last_translation_method = 'llm'
|
| 455 |
+
except Exception as sdk_err:
|
| 456 |
+
# Fallback to legacy API if available
|
| 457 |
+
import openai
|
| 458 |
+
try:
|
| 459 |
+
openai.api_key = self.openai_api_key
|
| 460 |
+
response = openai.ChatCompletion.create(
|
| 461 |
+
model="gpt-3.5-turbo",
|
| 462 |
+
max_tokens=150,
|
| 463 |
+
temperature=0.3,
|
| 464 |
+
messages=build_messages(text)
|
| 465 |
+
)
|
| 466 |
+
translated = response.choices[0].message.content.strip()
|
| 467 |
+
self.last_translation_method = 'llm'
|
| 468 |
+
except Exception:
|
| 469 |
+
raise sdk_err
|
| 470 |
+
|
| 471 |
+
if translated.startswith('"') and translated.endswith('"'):
|
| 472 |
+
translated = translated[1:-1]
|
| 473 |
+
return translated if translated else None
|
| 474 |
+
except Exception as e:
|
| 475 |
+
self.logger.warning(f"LLM translation error: {e}")
|
| 476 |
+
return None
|
| 477 |
+
|
| 478 |
+
def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
|
| 479 |
+
"""Translate using LibreTranslate (free public API)"""
|
| 480 |
+
try:
|
| 481 |
+
# Map language codes
|
| 482 |
+
lang_map = {
|
| 483 |
+
'si': 'si', # Sinhala
|
| 484 |
+
'en': 'en', # English
|
| 485 |
+
'auto': 'auto'
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
source = lang_map.get(source_lang, 'auto')
|
| 489 |
+
target = lang_map.get(target_lang, 'en')
|
| 490 |
+
|
| 491 |
+
payload = {
|
| 492 |
+
'q': text,
|
| 493 |
+
'source': source,
|
| 494 |
+
'target': target,
|
| 495 |
+
'format': 'text'
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
headers = {
|
| 499 |
+
'Content-Type': 'application/json'
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
response = requests.post(
|
| 503 |
+
self.libre_translate_url,
|
| 504 |
+
json=payload,
|
| 505 |
+
headers=headers,
|
| 506 |
+
timeout=10
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
if response.status_code == 200:
|
| 510 |
+
result = response.json()
|
| 511 |
+
translated = result.get('translatedText')
|
| 512 |
+
self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
|
| 513 |
+
self.last_translation_method = 'libretranslate'
|
| 514 |
+
return translated
|
| 515 |
+
|
| 516 |
+
return None
|
| 517 |
+
|
| 518 |
+
except Exception as e:
|
| 519 |
+
self.logger.warning(f"LibreTranslate error: {e}")
|
| 520 |
+
return None
|
| 521 |
+
|
| 522 |
+
def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
|
| 523 |
+
"""Translate using MyMemory (free API)"""
|
| 524 |
+
try:
|
| 525 |
+
# Map language codes
|
| 526 |
+
lang_map = {
|
| 527 |
+
'si': 'si', # Sinhala
|
| 528 |
+
'en': 'en', # English
|
| 529 |
+
'auto': 'auto'
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
source = lang_map.get(source_lang, 'auto')
|
| 533 |
+
langpair = f"{source}|{target_lang}"
|
| 534 |
+
|
| 535 |
+
params = {
|
| 536 |
+
'q': text,
|
| 537 |
+
'langpair': langpair
|
| 538 |
+
}
|
| 539 |
+
|
| 540 |
+
response = requests.get(
|
| 541 |
+
self.mymemory_url,
|
| 542 |
+
params=params,
|
| 543 |
+
timeout=10
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
if response.status_code == 200:
|
| 547 |
+
result = response.json()
|
| 548 |
+
translated = result.get('responseData', {}).get('translatedText')
|
| 549 |
+
self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
|
| 550 |
+
self.last_translation_method = 'mymemory'
|
| 551 |
+
return translated
|
| 552 |
+
|
| 553 |
+
return None
|
| 554 |
+
|
| 555 |
+
except Exception as e:
|
| 556 |
+
self.logger.warning(f"MyMemory translation error: {e}")
|
| 557 |
+
return None
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
def translate_with_dictionary(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
|
| 562 |
+
"""Translate using dictionary-based approach"""
|
| 563 |
+
if target_lang == 'en':
|
| 564 |
+
# Determine source language if auto
|
| 565 |
+
if source_lang == 'auto':
|
| 566 |
+
detection_result = self.language_detector.detect_language(text)
|
| 567 |
+
detected_lang = detection_result['language']
|
| 568 |
+
if detected_lang == 'sinhala':
|
| 569 |
+
source_lang = 'si'
|
| 570 |
+
elif detected_lang == 'tamil':
|
| 571 |
+
source_lang = 'ta'
|
| 572 |
+
else:
|
| 573 |
+
source_lang = 'si' # Default to Sinhala
|
| 574 |
+
|
| 575 |
+
translated = text
|
| 576 |
+
|
| 577 |
+
if source_lang == 'si':
|
| 578 |
+
# Sinhala to English
|
| 579 |
+
for sinhala, english in self.transport_terms.items():
|
| 580 |
+
translated = translated.replace(sinhala, english)
|
| 581 |
+
elif source_lang == 'ta':
|
| 582 |
+
# Tamil to English
|
| 583 |
+
for tamil, english in self.tamil_transport_terms.items():
|
| 584 |
+
translated = translated.replace(tamil, english)
|
| 585 |
+
|
| 586 |
+
return translated
|
| 587 |
+
elif target_lang == 'si':
|
| 588 |
+
# English to Sinhala
|
| 589 |
+
translated = text
|
| 590 |
+
for sinhala, english in self.transport_terms.items():
|
| 591 |
+
translated = translated.replace(english, sinhala)
|
| 592 |
+
return translated
|
| 593 |
+
elif target_lang == 'ta':
|
| 594 |
+
# English to Tamil
|
| 595 |
+
translated = text
|
| 596 |
+
for tamil, english in self.tamil_transport_terms.items():
|
| 597 |
+
translated = translated.replace(english, tamil)
|
| 598 |
+
return translated
|
| 599 |
+
|
| 600 |
+
return text
|
| 601 |
+
|
| 602 |
+
def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
|
| 603 |
+
"""Main translation method with multiple fallbacks"""
|
| 604 |
+
if not text or not text.strip():
|
| 605 |
+
return text
|
| 606 |
+
|
| 607 |
+
# Try translation methods
|
| 608 |
+
if self.force_llm_translation:
|
| 609 |
+
translation_methods = [
|
| 610 |
+
('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
|
| 611 |
+
]
|
| 612 |
+
else:
|
| 613 |
+
translation_methods = [
|
| 614 |
+
('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
|
| 615 |
+
('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
|
| 616 |
+
('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
|
| 617 |
+
('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
|
| 618 |
+
]
|
| 619 |
+
|
| 620 |
+
for method_name, method_func in translation_methods:
|
| 621 |
+
try:
|
| 622 |
+
result = method_func()
|
| 623 |
+
if result and result.strip():
|
| 624 |
+
self.logger.info(f"Translation successful using {method_name}")
|
| 625 |
+
if not self.last_translation_method:
|
| 626 |
+
self.last_translation_method = method_name.lower()
|
| 627 |
+
return result.strip()
|
| 628 |
+
except Exception as e:
|
| 629 |
+
self.logger.warning(f"{method_name} translation failed: {e}")
|
| 630 |
+
continue
|
| 631 |
+
|
| 632 |
+
# Final fallback
|
| 633 |
+
result = self.translate_with_dictionary(text, target_lang, source_lang)
|
| 634 |
+
self.last_translation_method = 'dictionary'
|
| 635 |
+
return result
|
| 636 |
+
|
| 637 |
+
def translate_query(self, query: str) -> Dict[str, Any]:
|
| 638 |
+
"""Translate a user query from any supported language to English"""
|
| 639 |
+
# Detect the language of the input
|
| 640 |
+
detection_result = self.language_detector.detect_language(query)
|
| 641 |
+
detected_language = detection_result['language']
|
| 642 |
+
|
| 643 |
+
# If it's already English, return as is
|
| 644 |
+
if detected_language == 'english':
|
| 645 |
+
return {
|
| 646 |
+
'is_sinhala': False,
|
| 647 |
+
'is_tamil': False,
|
| 648 |
+
'is_singlish': False,
|
| 649 |
+
'detected_language': 'english',
|
| 650 |
+
'original_query': query,
|
| 651 |
+
'translated_query': query,
|
| 652 |
+
'translation_method': 'none',
|
| 653 |
+
'detection_confidence': detection_result['confidence']
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
# Handle pattern-based parsing for specific languages
|
| 657 |
+
if self.use_pattern_translation:
|
| 658 |
+
parsed = None
|
| 659 |
+
if detected_language == 'sinhala':
|
| 660 |
+
parsed = self._parse_sinhala_fare_query(query)
|
| 661 |
+
elif detected_language == 'tamil':
|
| 662 |
+
parsed = self._parse_tamil_fare_query(query)
|
| 663 |
+
|
| 664 |
+
if parsed:
|
| 665 |
+
self.logger.info(f"Pattern-based {detected_language} fare parse: '{query}' -> '{parsed}'")
|
| 666 |
+
return {
|
| 667 |
+
'is_sinhala': detected_language == 'sinhala',
|
| 668 |
+
'is_tamil': detected_language == 'tamil',
|
| 669 |
+
'is_singlish': detected_language == 'singlish',
|
| 670 |
+
'detected_language': detected_language,
|
| 671 |
+
'original_query': query,
|
| 672 |
+
'translated_query': parsed,
|
| 673 |
+
'translation_method': 'pattern',
|
| 674 |
+
'detection_confidence': detection_result['confidence']
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
# Determine source language code for translation
|
| 678 |
+
source_lang = 'si' if detected_language == 'sinhala' else 'ta' if detected_language == 'tamil' else 'si'
|
| 679 |
+
|
| 680 |
+
# Translate to English
|
| 681 |
+
translated = self.translate_text(query, 'en', source_lang)
|
| 682 |
+
# Normalize English synonyms to expected NLP vocabulary
|
| 683 |
+
translated = self._normalize_english_query(translated)
|
| 684 |
+
method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
|
| 685 |
+
|
| 686 |
+
self.logger.info(f"Translated {detected_language} query ({method}): '{query}' -> '{translated}'")
|
| 687 |
+
|
| 688 |
+
return {
|
| 689 |
+
'is_sinhala': detected_language == 'sinhala',
|
| 690 |
+
'is_tamil': detected_language == 'tamil',
|
| 691 |
+
'is_singlish': detected_language == 'singlish',
|
| 692 |
+
'detected_language': detected_language,
|
| 693 |
+
'original_query': query,
|
| 694 |
+
'translated_query': translated,
|
| 695 |
+
'translation_method': method,
|
| 696 |
+
'detection_confidence': detection_result['confidence']
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
def _normalize_english_query(self, text: str) -> str:
|
| 700 |
+
"""Normalize English synonyms to match NLP patterns (fare/price/cost)."""
|
| 701 |
+
if not text:
|
| 702 |
+
return text
|
| 703 |
+
normalized = text
|
| 704 |
+
replacements = {
|
| 705 |
+
'fees': 'fare',
|
| 706 |
+
'fee': 'fare',
|
| 707 |
+
'charges': 'cost',
|
| 708 |
+
'charge': 'cost',
|
| 709 |
+
'ticket price': 'fare',
|
| 710 |
+
'ticket fare': 'fare',
|
| 711 |
+
'bus ticket': 'bus fare',
|
| 712 |
+
}
|
| 713 |
+
# Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
|
| 714 |
+
lower = normalized.lower()
|
| 715 |
+
for old, new in replacements.items():
|
| 716 |
+
lower = lower.replace(old, new)
|
| 717 |
+
return lower
|
| 718 |
+
|
| 719 |
+
def translate_response(self, response: Dict[str, Any], target_language: str = None) -> Dict[str, Any]:
|
| 720 |
+
"""Translate response back to the detected language"""
|
| 721 |
+
translated_response = response.copy()
|
| 722 |
+
|
| 723 |
+
# Determine target language from translation_info if not provided
|
| 724 |
+
if target_language is None and 'translation_info' in response:
|
| 725 |
+
translation_info = response['translation_info']
|
| 726 |
+
if translation_info.get('detected_language'):
|
| 727 |
+
detected_lang = translation_info['detected_language']
|
| 728 |
+
if detected_lang == 'sinhala':
|
| 729 |
+
target_language = 'si'
|
| 730 |
+
elif detected_lang == 'tamil':
|
| 731 |
+
target_language = 'ta'
|
| 732 |
+
else:
|
| 733 |
+
target_language = 'si' # Default to Sinhala
|
| 734 |
+
else:
|
| 735 |
+
target_language = 'si' # Default to Sinhala
|
| 736 |
+
elif target_language is None:
|
| 737 |
+
target_language = 'si' # Default to Sinhala
|
| 738 |
+
|
| 739 |
+
# Translate the main message
|
| 740 |
+
if 'message' in response:
|
| 741 |
+
translated_response['message'] = self.translate_text(
|
| 742 |
+
response['message'], target_language, 'en'
|
| 743 |
+
)
|
| 744 |
+
|
| 745 |
+
# Translate suggestions if any
|
| 746 |
+
if 'suggestions' in response and response['suggestions']:
|
| 747 |
+
translated_response['suggestions'] = [
|
| 748 |
+
self.translate_text(suggestion, target_language, 'en')
|
| 749 |
+
for suggestion in response['suggestions']
|
| 750 |
+
]
|
| 751 |
+
|
| 752 |
+
# Translate corrections if any
|
| 753 |
+
if 'corrections' in response and response['corrections']:
|
| 754 |
+
translated_corrections = []
|
| 755 |
+
for correction in response['corrections']:
|
| 756 |
+
translated_correction = correction.copy()
|
| 757 |
+
if 'original' in correction:
|
| 758 |
+
translated_correction['original'] = self.translate_text(
|
| 759 |
+
correction['original'], target_language, 'en'
|
| 760 |
+
)
|
| 761 |
+
if 'corrected' in correction:
|
| 762 |
+
translated_correction['corrected'] = self.translate_text(
|
| 763 |
+
correction['corrected'], target_language, 'en'
|
| 764 |
+
)
|
| 765 |
+
translated_corrections.append(translated_correction)
|
| 766 |
+
translated_response['corrections'] = translated_corrections
|
| 767 |
+
|
| 768 |
+
# Add translation metadata
|
| 769 |
+
translated_response['translation_info'] = {
|
| 770 |
+
'translated': True,
|
| 771 |
+
'target_language': target_language,
|
| 772 |
+
'translation_method': 'llm' if self.openai_api_key else 'dictionary'
|
| 773 |
+
}
|
| 774 |
+
|
| 775 |
+
return translated_response
|
| 776 |
+
|
| 777 |
+
def get_sinhala_examples(self) -> Dict[str, Any]:
|
| 778 |
+
"""Get example queries in Sinhala"""
|
| 779 |
+
sinhala_examples = {
|
| 780 |
+
'fare_queries': [
|
| 781 |
+
{
|
| 782 |
+
'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
|
| 783 |
+
'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
|
| 784 |
+
},
|
| 785 |
+
{
|
| 786 |
+
'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
|
| 787 |
+
'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
|
| 791 |
+
'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
|
| 792 |
+
}
|
| 793 |
+
],
|
| 794 |
+
'comparison_queries': [
|
| 795 |
+
{
|
| 796 |
+
'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
|
| 797 |
+
'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
|
| 798 |
+
},
|
| 799 |
+
{
|
| 800 |
+
'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
|
| 801 |
+
'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
|
| 802 |
+
}
|
| 803 |
+
],
|
| 804 |
+
'range_queries': [
|
| 805 |
+
{
|
| 806 |
+
'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
|
| 807 |
+
'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
|
| 808 |
+
},
|
| 809 |
+
{
|
| 810 |
+
'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
|
| 811 |
+
'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
|
| 812 |
+
}
|
| 813 |
+
],
|
| 814 |
+
'recommendation_queries': [
|
| 815 |
+
{
|
| 816 |
+
'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
|
| 817 |
+
'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
|
| 818 |
+
},
|
| 819 |
+
{
|
| 820 |
+
'query': 'ප්රසිද්ධ ගමනාන්ත පෙන්වන්න',
|
| 821 |
+
'description': 'ප්රසිද්ධ ගමනාන්ත සොයන්න'
|
| 822 |
+
}
|
| 823 |
+
],
|
| 824 |
+
'statistical_queries': [
|
| 825 |
+
{
|
| 826 |
+
'query': 'සාමාන්ය ගාස්තුව කීයද?',
|
| 827 |
+
'description': 'සාමාන්ය ගාස්තුව සොයන්න'
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
'query': 'දත්ත ගබඩා සංඛ්යාලේඛන',
|
| 831 |
+
'description': 'දත්ත ගබඩා සංඛ්යාලේඛන සොයන්න'
|
| 832 |
+
}
|
| 833 |
+
]
|
| 834 |
+
}
|
| 835 |
+
|
| 836 |
+
return sinhala_examples
|
| 837 |
+
|
| 838 |
+
def get_tamil_examples(self) -> Dict[str, Any]:
|
| 839 |
+
"""Get example queries in Tamil"""
|
| 840 |
+
tamil_examples = {
|
| 841 |
+
'fare_queries': [
|
| 842 |
+
{
|
| 843 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
|
| 844 |
+
'description': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் கண்டுபிடி'
|
| 845 |
+
},
|
| 846 |
+
{
|
| 847 |
+
'query': 'மாத்தறை இருந்து காலி வரை விலை எவ்வளவு?',
|
| 848 |
+
'description': 'மாத்தறை இருந்து காலி வரை விலை கண்டுபிடி'
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
'query': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம்',
|
| 852 |
+
'description': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம் கண்டுபிடி'
|
| 853 |
+
}
|
| 854 |
+
],
|
| 855 |
+
'comparison_queries': [
|
| 856 |
+
{
|
| 857 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு',
|
| 858 |
+
'description': 'வெவ்வேறு பாதைகளின் கட்டணம் ஒப்பிடு'
|
| 859 |
+
},
|
| 860 |
+
{
|
| 861 |
+
'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து அனுராதபுரம் வரை கட்டணத்தின் வித்தியாசம் எவ்வளவு?',
|
| 862 |
+
'description': 'இரண்டு பாதைகளின் கட்டண வித்தியாசம் கண்டுபிடி'
|
| 863 |
+
}
|
| 864 |
+
],
|
| 865 |
+
'range_queries': [
|
| 866 |
+
{
|
| 867 |
+
'query': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி',
|
| 868 |
+
'description': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி'
|
| 869 |
+
},
|
| 870 |
+
{
|
| 871 |
+
'query': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை காட்டு',
|
| 872 |
+
'description': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை கண்டுபிடி'
|
| 873 |
+
}
|
| 874 |
+
],
|
| 875 |
+
'recommendation_queries': [
|
| 876 |
+
{
|
| 877 |
+
'query': 'குறைந்த விலையில் பாதைகளை பரிந்துரை',
|
| 878 |
+
'description': 'குறைந்த விலையில் பாதைகளை பரிந்துரை'
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
'query': 'பிரபலமான இலக்குகளை காட்டு',
|
| 882 |
+
'description': 'பிரபலமான இலக்குகளை கண்டுபிடி'
|
| 883 |
+
}
|
| 884 |
+
],
|
| 885 |
+
'statistical_queries': [
|
| 886 |
+
{
|
| 887 |
+
'query': 'சராசரி கட்டணம் எவ்வளவு?',
|
| 888 |
+
'description': 'சராசரி கட்டணம் கண்டுபிடி'
|
| 889 |
+
},
|
| 890 |
+
{
|
| 891 |
+
'query': 'தரவு சேமிப்பக புள்ளிவிவரங்கள்',
|
| 892 |
+
'description': 'தரவு சேமிப்பக புள்ளிவிவரங்கள் கண்டுபிடி'
|
| 893 |
+
}
|
| 894 |
+
]
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
return tamil_examples
|
| 898 |
+
|
| 899 |
+
def test_translation(self) -> Dict[str, Any]:
|
| 900 |
+
"""Test translation functionality on transportation-related queries in multiple languages."""
|
| 901 |
+
test_cases = [
|
| 902 |
+
# Sinhala test cases
|
| 903 |
+
{
|
| 904 |
+
'language': 'sinhala',
|
| 905 |
+
'original': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
|
| 906 |
+
'expected_english': 'What is the bus fare from Colombo to Kandy?'
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
'language': 'sinhala',
|
| 910 |
+
'original': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
|
| 911 |
+
'expected_english': 'How much is the price from Matara to Galle?'
|
| 912 |
+
},
|
| 913 |
+
{
|
| 914 |
+
'language': 'sinhala',
|
| 915 |
+
'original': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
|
| 916 |
+
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
|
| 917 |
+
},
|
| 918 |
+
{
|
| 919 |
+
'language': 'sinhala',
|
| 920 |
+
'original': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
|
| 921 |
+
'expected_english': 'Show routes with fares under 500 rupees.'
|
| 922 |
+
},
|
| 923 |
+
{
|
| 924 |
+
'language': 'sinhala',
|
| 925 |
+
'original': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
|
| 926 |
+
'expected_english': 'Recommend cheap routes.'
|
| 927 |
+
},
|
| 928 |
+
|
| 929 |
+
# Tamil test cases
|
| 930 |
+
{
|
| 931 |
+
'language': 'tamil',
|
| 932 |
+
'original': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
|
| 933 |
+
'expected_english': 'What is the bus fare from Colombo to Kandy?'
|
| 934 |
+
},
|
| 935 |
+
{
|
| 936 |
+
'language': 'tamil',
|
| 937 |
+
'original': 'மாத்தறை இருந்து காலி வரை விலை எவ்வளவு?',
|
| 938 |
+
'expected_english': 'How much is the price from Matara to Galle?'
|
| 939 |
+
},
|
| 940 |
+
{
|
| 941 |
+
'language': 'tamil',
|
| 942 |
+
'original': 'கொழும்பு இருந்து பனதுரை வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.',
|
| 943 |
+
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
|
| 944 |
+
},
|
| 945 |
+
{
|
| 946 |
+
'language': 'tamil',
|
| 947 |
+
'original': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை காட்டு.',
|
| 948 |
+
'expected_english': 'Show routes with fares under 500 rupees.'
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
'language': 'tamil',
|
| 952 |
+
'original': 'குறைந்த விலையில் பாதைகளை பரிந்துரை.',
|
| 953 |
+
'expected_english': 'Recommend cheap routes.'
|
| 954 |
+
},
|
| 955 |
+
|
| 956 |
+
# Singlish test cases
|
| 957 |
+
{
|
| 958 |
+
'language': 'singlish',
|
| 959 |
+
'original': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
|
| 960 |
+
'expected_english': 'What is the fare from Colombo to Kandy?'
|
| 961 |
+
},
|
| 962 |
+
{
|
| 963 |
+
'language': 'singlish',
|
| 964 |
+
'original': 'Colombo සිට ගාල්ලට bus fare කීයද?',
|
| 965 |
+
'expected_english': 'What is the bus fare from Colombo to Galle?'
|
| 966 |
+
},
|
| 967 |
+
{
|
| 968 |
+
'language': 'singlish',
|
| 969 |
+
'original': 'කොළඹ සිට Panadura සහ Colombo සිට Galle fares compare කරන්න.',
|
| 970 |
+
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
|
| 971 |
+
},
|
| 972 |
+
|
| 973 |
+
# English test cases
|
| 974 |
+
{
|
| 975 |
+
'language': 'english',
|
| 976 |
+
'original': 'What is the fare from Colombo to Kandy?',
|
| 977 |
+
'expected_english': 'What is the fare from Colombo to Kandy?'
|
| 978 |
+
},
|
| 979 |
+
{
|
| 980 |
+
'language': 'english',
|
| 981 |
+
'original': 'Show me routes from Panadura',
|
| 982 |
+
'expected_english': 'Show me routes from Panadura'
|
| 983 |
+
}
|
| 984 |
+
]
|
| 985 |
+
|
| 986 |
+
results = []
|
| 987 |
+
total_exact = 0
|
| 988 |
+
total_good = 0
|
| 989 |
+
total_tests = len(test_cases)
|
| 990 |
+
|
| 991 |
+
for test_case in test_cases:
|
| 992 |
+
original = test_case['original']
|
| 993 |
+
expected = test_case['expected_english']
|
| 994 |
+
language = test_case['language']
|
| 995 |
+
|
| 996 |
+
# Detect language
|
| 997 |
+
detection_result = self.language_detector.detect_language(original)
|
| 998 |
+
detected_language = detection_result['language']
|
| 999 |
+
|
| 1000 |
+
# Reset method tracker and translate
|
| 1001 |
+
self.last_translation_method = None
|
| 1002 |
+
translated = self.translate_text(original, 'en', 'auto') or ''
|
| 1003 |
+
|
| 1004 |
+
tr = translated.strip()
|
| 1005 |
+
ex = expected.strip()
|
| 1006 |
+
tr_low = tr.lower()
|
| 1007 |
+
ex_low = ex.lower()
|
| 1008 |
+
|
| 1009 |
+
# Accuracy heuristic
|
| 1010 |
+
if tr_low == ex_low:
|
| 1011 |
+
accuracy = 'exact'
|
| 1012 |
+
total_exact += 1
|
| 1013 |
+
total_good += 1
|
| 1014 |
+
elif tr_low in ex_low or ex_low in tr_low:
|
| 1015 |
+
accuracy = 'good'
|
| 1016 |
+
total_good += 1
|
| 1017 |
+
else:
|
| 1018 |
+
accuracy = 'partial'
|
| 1019 |
+
|
| 1020 |
+
# Intent preservation check for comparisons
|
| 1021 |
+
intent_preserved = True
|
| 1022 |
+
if language in ['sinhala', 'tamil'] and ('සසඳ' in original or 'ஒப்பிடு' in original):
|
| 1023 |
+
intent_preserved = ('compare' in tr_low)
|
| 1024 |
+
|
| 1025 |
+
results.append({
|
| 1026 |
+
'original_query': original,
|
| 1027 |
+
'language': language,
|
| 1028 |
+
'detected_language': detected_language,
|
| 1029 |
+
'translated_english': tr,
|
| 1030 |
+
'expected_english': ex,
|
| 1031 |
+
'translation_accuracy': accuracy,
|
| 1032 |
+
'intent_preserved': intent_preserved,
|
| 1033 |
+
'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary'),
|
| 1034 |
+
'detection_confidence': detection_result['confidence']
|
| 1035 |
+
})
|
| 1036 |
+
|
| 1037 |
+
summary = {
|
| 1038 |
+
'total_tests': total_tests,
|
| 1039 |
+
'exact_matches': total_exact,
|
| 1040 |
+
'good_or_better': total_good,
|
| 1041 |
+
'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
|
| 1042 |
+
}
|
| 1043 |
+
|
| 1044 |
+
self.logger.info(f"Translation test summary: {summary}")
|
| 1045 |
+
|
| 1046 |
+
return {
|
| 1047 |
+
'translation_service_status': 'active',
|
| 1048 |
+
'supported_languages': ['sinhala', 'tamil', 'singlish', 'english'],
|
| 1049 |
+
'available_methods': {
|
| 1050 |
+
'llm': self.openai_api_key is not None,
|
| 1051 |
+
'libre_translate': True,
|
| 1052 |
+
'mymemory': True,
|
| 1053 |
+
'dictionary': True
|
| 1054 |
+
},
|
| 1055 |
+
'summary': summary,
|
| 1056 |
+
'test_results': results
|
| 1057 |
+
}
|