Spaces:
Sleeping
Sleeping
Upload 15 files
Browse files- .env +14 -0
- .gitattributes +14 -35
- .gitignore +175 -0
- Dockerfile +32 -0
- README.md +107 -11
- app.py +974 -0
- config.py +263 -0
- enhanced_nlp_processor.py +904 -0
- llm_query_processor.py +351 -0
- logger.py +53 -0
- neo4j_service.py +222 -0
- requirements.txt +10 -0
- spell_corrector.py +257 -0
- templates/index.html +977 -0
- translation_service.py +702 -0
.env
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GIT = 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
|
| 2 |
+
NEO4J_URI = bolt://44.201.107.35:7687
|
| 3 |
+
NEO4J_USER = neo4j
|
| 4 |
+
# NEO4J_PASSWORD = "20665130@mM"
|
| 5 |
+
NEO4J_PASSWORD = "securities-arrays-entrapments"
|
| 6 |
+
|
| 7 |
+
# OpenAI Configuration (for LLM)
|
| 8 |
+
OPENAI_API_KEY = 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
|
| 9 |
+
OPENAI_MODEL = gpt-3.5-turbo
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Flask Configuration
|
| 14 |
+
SECRET_KEY = transport-query-app-secret-key
|
.gitattributes
CHANGED
|
@@ -1,35 +1,14 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.
|
| 3 |
-
*.
|
| 4 |
-
*.
|
| 5 |
-
*.
|
| 6 |
-
*.
|
| 7 |
-
*.
|
| 8 |
-
*.
|
| 9 |
-
*.
|
| 10 |
-
*.
|
| 11 |
-
*.
|
| 12 |
-
*.
|
| 13 |
-
*.
|
| 14 |
-
*.
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.py linguist-language=Python
|
| 2 |
+
*.html linguist-language=HTML
|
| 3 |
+
*.css linguist-language=CSS
|
| 4 |
+
*.js linguist-language=JavaScript
|
| 5 |
+
*.md linguist-language=Markdown
|
| 6 |
+
*.txt linguist-language=Text
|
| 7 |
+
*.json linguist-language=JSON
|
| 8 |
+
*.csv linguist-language=CSV
|
| 9 |
+
*.pdf linguist-documentation
|
| 10 |
+
*.png linguist-documentation
|
| 11 |
+
*.jpg linguist-documentation
|
| 12 |
+
*.jpeg linguist-documentation
|
| 13 |
+
*.gif linguist-documentation
|
| 14 |
+
*.svg linguist-documentation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
#uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
#poetry.lock
|
| 109 |
+
|
| 110 |
+
# pdm
|
| 111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 112 |
+
#pdm.lock
|
| 113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 114 |
+
# in version control.
|
| 115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 116 |
+
.pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 121 |
+
__pypackages__/
|
| 122 |
+
|
| 123 |
+
# Celery stuff
|
| 124 |
+
celerybeat-schedule
|
| 125 |
+
celerybeat.pid
|
| 126 |
+
|
| 127 |
+
# SageMath parsed files
|
| 128 |
+
*.sage.py
|
| 129 |
+
|
| 130 |
+
# Environments
|
| 131 |
+
config.py
|
| 132 |
+
.env
|
| 133 |
+
.venv
|
| 134 |
+
env/
|
| 135 |
+
venv/
|
| 136 |
+
ENV/
|
| 137 |
+
env.bak/
|
| 138 |
+
venv.bak/
|
| 139 |
+
|
| 140 |
+
# Spyder project settings
|
| 141 |
+
.spyderproject
|
| 142 |
+
.spyproject
|
| 143 |
+
|
| 144 |
+
# Rope project settings
|
| 145 |
+
.ropeproject
|
| 146 |
+
|
| 147 |
+
# mkdocs documentation
|
| 148 |
+
/site
|
| 149 |
+
|
| 150 |
+
# mypy
|
| 151 |
+
.mypy_cache/
|
| 152 |
+
.dmypy.json
|
| 153 |
+
dmypy.json
|
| 154 |
+
|
| 155 |
+
# Pyre type checker
|
| 156 |
+
.pyre/
|
| 157 |
+
|
| 158 |
+
# pytype static type analyzer
|
| 159 |
+
.pytype/
|
| 160 |
+
|
| 161 |
+
# Cython debug symbols
|
| 162 |
+
cython_debug/
|
| 163 |
+
|
| 164 |
+
# PyCharm
|
| 165 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 166 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 167 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 168 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 169 |
+
#.idea/
|
| 170 |
+
|
| 171 |
+
# Ruff stuff:
|
| 172 |
+
.ruff_cache/
|
| 173 |
+
|
| 174 |
+
# PyPI configuration file
|
| 175 |
+
.pypirc
|
Dockerfile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9-slim
|
| 2 |
+
|
| 3 |
+
# Set working directory
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
gcc \
|
| 9 |
+
g++ \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements first for better caching
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy application code
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# Create logs directory
|
| 22 |
+
RUN mkdir -p logs
|
| 23 |
+
|
| 24 |
+
# Expose port
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
# Set environment variables
|
| 28 |
+
ENV FLASK_ENV=production
|
| 29 |
+
ENV PORT=7860
|
| 30 |
+
|
| 31 |
+
# Run the application
|
| 32 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,11 +1,107 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚌 Natural Language Transport Query System
|
| 2 |
+
|
| 3 |
+
A sophisticated Flask application that provides natural language querying capabilities for Sri Lankan transport information, featuring Sinhala-English translation and Neo4j integration.
|
| 4 |
+
|
| 5 |
+
## 🌟 Features
|
| 6 |
+
|
| 7 |
+
- **Natural Language Processing**: Advanced NLP for understanding transport queries
|
| 8 |
+
- **Multilingual Support**: Sinhala-English translation with LLM integration
|
| 9 |
+
- **Spell Correction**: Fuzzy matching and LLM-based location correction
|
| 10 |
+
- **Neo4j Integration**: Graph database for efficient route and fare queries
|
| 11 |
+
- **Enhanced Query Types**: Support for comparisons, ranges, recommendations
|
| 12 |
+
- **RESTful API**: Comprehensive API endpoints for all functionality
|
| 13 |
+
|
| 14 |
+
## 🚀 Quick Start
|
| 15 |
+
|
| 16 |
+
### Local Development
|
| 17 |
+
```bash
|
| 18 |
+
# Install dependencies
|
| 19 |
+
pip install -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# Set environment variables
|
| 22 |
+
export OPENAI_API_KEY="your_openai_key"
|
| 23 |
+
export NEO4J_URI="your_neo4j_uri"
|
| 24 |
+
export NEO4J_USER="your_neo4j_user"
|
| 25 |
+
export NEO4J_PASSWORD="your_neo4j_password"
|
| 26 |
+
|
| 27 |
+
# Run the application
|
| 28 |
+
python app.py
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### Hugging Face Spaces
|
| 32 |
+
This application is deployed on Hugging Face Spaces and is accessible via the provided URL.
|
| 33 |
+
|
| 34 |
+
## 📡 API Endpoints
|
| 35 |
+
|
| 36 |
+
### Core Query Processing
|
| 37 |
+
- `POST /api/query` - Process natural language transport queries
|
| 38 |
+
- `GET /api/status` - System status and statistics
|
| 39 |
+
- `GET /api/places` - Get all available places
|
| 40 |
+
|
| 41 |
+
### NLP Capabilities
|
| 42 |
+
- `GET /api/nlp/capabilities` - View enhanced NLP capabilities
|
| 43 |
+
- `GET /api/nlp/demo` - Get comprehensive demo queries
|
| 44 |
+
- `POST /api/nlp/test` - Test queries with detailed analysis
|
| 45 |
+
- `GET /api/nlp/test-all-types` - Test all query types
|
| 46 |
+
|
| 47 |
+
### Translation Services
|
| 48 |
+
- `POST /api/translation/translate` - Translate text between languages
|
| 49 |
+
- `GET /api/translation/test` - Test translation functionality
|
| 50 |
+
- `GET /api/sinhala/examples` - Get Sinhala example queries
|
| 51 |
+
|
| 52 |
+
### Utilities
|
| 53 |
+
- `POST /api/suggestions` - Get location suggestions for autocomplete
|
| 54 |
+
- `GET /api/examples` - Get categorized example queries
|
| 55 |
+
|
| 56 |
+
## 🔧 Configuration
|
| 57 |
+
|
| 58 |
+
The application uses environment variables for configuration:
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
# OpenAI Configuration
|
| 62 |
+
OPENAI_API_KEY=your_openai_api_key
|
| 63 |
+
|
| 64 |
+
# Neo4j Configuration
|
| 65 |
+
NEO4J_URI=bolt://localhost:7687
|
| 66 |
+
NEO4J_USER=neo4j
|
| 67 |
+
NEO4J_PASSWORD=password
|
| 68 |
+
|
| 69 |
+
# Translation Configuration
|
| 70 |
+
FORCE_LLM_TRANSLATION=true
|
| 71 |
+
USE_PATTERN_TRANSLATION=false
|
| 72 |
+
|
| 73 |
+
# Logging Configuration
|
| 74 |
+
LOG_LEVEL=INFO
|
| 75 |
+
LOG_DIR=logs
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
## 📊 Query Examples
|
| 79 |
+
|
| 80 |
+
### English Queries
|
| 81 |
+
- "What is the fare from Colombo to Kandy?"
|
| 82 |
+
- "Show me routes from Galle to Matara"
|
| 83 |
+
- "Compare fares from Colombo to Panadura and Colombo to Galle"
|
| 84 |
+
- "Find routes under 500 LKR"
|
| 85 |
+
|
| 86 |
+
### Sinhala Queries
|
| 87 |
+
- "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?"
|
| 88 |
+
- "ගාල්ල සිට මාතර දක්වා මාර්ග පෙන්වන්න"
|
| 89 |
+
- "කොළඹ සිට පානදුර සහ කොළඹ සිට ගාල්ල ගාස්තු සසඳා බලන්න"
|
| 90 |
+
|
| 91 |
+
## 🏗️ Architecture
|
| 92 |
+
|
| 93 |
+
- **Flask**: Web framework
|
| 94 |
+
- **OpenAI GPT**: LLM for translation and query interpretation
|
| 95 |
+
- **Neo4j**: Graph database for transport data
|
| 96 |
+
- **FuzzyWuzzy**: Spell correction and fuzzy matching
|
| 97 |
+
- **Pandas**: Data processing and manipulation
|
| 98 |
+
|
| 99 |
+
## 📝 License
|
| 100 |
+
|
| 101 |
+
This project is licensed under the MIT License.
|
| 102 |
+
|
| 103 |
+
## 🤝 Contributing
|
| 104 |
+
|
| 105 |
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
| 106 |
+
|
| 107 |
+
|
app.py
ADDED
|
@@ -0,0 +1,974 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Main Flask Application for Transport Query System
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from flask import Flask, render_template, request, jsonify, session
|
| 7 |
+
import os
|
| 8 |
+
from llm_query_processor import LLMQueryProcessor
|
| 9 |
+
from enhanced_nlp_processor import EnhancedNLPProcessor
|
| 10 |
+
from spell_corrector import SpellCorrector
|
| 11 |
+
from neo4j_service import Neo4jService
|
| 12 |
+
from translation_service import TranslationService
|
| 13 |
+
from logger import get_logger
|
| 14 |
+
from config import Config
|
| 15 |
+
|
| 16 |
+
app = Flask(__name__)
|
| 17 |
+
app.config.from_object(Config)
|
| 18 |
+
logger = get_logger("FlaskApp")
|
| 19 |
+
|
| 20 |
+
# Initialize services
|
| 21 |
+
query_processor = LLMQueryProcessor()
|
| 22 |
+
enhanced_nlp_processor = EnhancedNLPProcessor()
|
| 23 |
+
spell_corrector = SpellCorrector()
|
| 24 |
+
neo4j_service = Neo4jService()
|
| 25 |
+
translation_service = TranslationService()
|
| 26 |
+
|
| 27 |
+
@app.route('/')
|
| 28 |
+
def index():
|
| 29 |
+
"""Main page"""
|
| 30 |
+
return render_template('index.html')
|
| 31 |
+
|
| 32 |
+
@app.route('/api/query', methods=['POST'])
|
| 33 |
+
def process_query():
|
| 34 |
+
"""Process user query with enhanced NLP and translation support"""
|
| 35 |
+
try:
|
| 36 |
+
data = request.get_json()
|
| 37 |
+
user_query = data.get('query', '').strip()
|
| 38 |
+
use_enhanced_nlp = data.get('enhanced_nlp', True) # Default to enhanced NLP
|
| 39 |
+
|
| 40 |
+
if not user_query:
|
| 41 |
+
return jsonify({
|
| 42 |
+
'success': False,
|
| 43 |
+
'message': 'Please enter a query.'
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
# Check if query is in Sinhala and translate if needed
|
| 47 |
+
translation_info = translation_service.translate_query(user_query)
|
| 48 |
+
|
| 49 |
+
# Use translated query for processing
|
| 50 |
+
query_to_process = translation_info['translated_query']
|
| 51 |
+
|
| 52 |
+
# Log translation info to console
|
| 53 |
+
if translation_info['is_sinhala']:
|
| 54 |
+
logger.info(f"Translation: si->en method={translation_info['translation_method']} original='{translation_info['original_query']}' translated='{translation_info['translated_query']}'")
|
| 55 |
+
else:
|
| 56 |
+
logger.info(f"Processing English Query: '{user_query}'")
|
| 57 |
+
|
| 58 |
+
# Process the query with enhanced NLP or fallback to basic processor
|
| 59 |
+
if use_enhanced_nlp:
|
| 60 |
+
result = enhanced_nlp_processor.process_query(query_to_process)
|
| 61 |
+
else:
|
| 62 |
+
result = query_processor.process_query(query_to_process)
|
| 63 |
+
|
| 64 |
+
# If original query was in Sinhala, translate the response back
|
| 65 |
+
if translation_info['is_sinhala']:
|
| 66 |
+
print(f" English Response: {result.get('message', 'No message')}")
|
| 67 |
+
result = translation_service.translate_response(result)
|
| 68 |
+
result['translation_info'] = translation_info
|
| 69 |
+
print(f" Sinhala Response: {result.get('message', 'No message')}")
|
| 70 |
+
print(f" Translation Complete ✅")
|
| 71 |
+
|
| 72 |
+
logger.info(f"Response success={result.get('success')} type={result.get('query_type','n/a')} message='{result.get('message','')[:120]}'")
|
| 73 |
+
return jsonify(result)
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return jsonify({
|
| 77 |
+
'success': False,
|
| 78 |
+
'message': f'Error processing query: {str(e)}'
|
| 79 |
+
})
|
| 80 |
+
|
| 81 |
+
@app.route('/api/suggestions', methods=['POST'])
|
| 82 |
+
def get_suggestions():
|
| 83 |
+
"""Get location suggestions for autocomplete"""
|
| 84 |
+
try:
|
| 85 |
+
data = request.get_json()
|
| 86 |
+
partial_location = data.get('location', '').strip()
|
| 87 |
+
|
| 88 |
+
if not partial_location:
|
| 89 |
+
return jsonify({'suggestions': []})
|
| 90 |
+
|
| 91 |
+
suggestions = spell_corrector.get_suggestions(partial_location)
|
| 92 |
+
|
| 93 |
+
return jsonify({
|
| 94 |
+
'suggestions': [{'name': name, 'confidence': conf} for name, conf in suggestions]
|
| 95 |
+
})
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
return jsonify({
|
| 99 |
+
'success': False,
|
| 100 |
+
'message': f'Error getting suggestions: {str(e)}'
|
| 101 |
+
})
|
| 102 |
+
|
| 103 |
+
@app.route('/api/status')
|
| 104 |
+
def get_status():
|
| 105 |
+
"""Get system status"""
|
| 106 |
+
try:
|
| 107 |
+
neo4j_connected = neo4j_service.is_connected()
|
| 108 |
+
places = neo4j_service.get_all_places() if neo4j_connected else []
|
| 109 |
+
stats = neo4j_service.get_route_statistics() if neo4j_connected else {}
|
| 110 |
+
|
| 111 |
+
return jsonify({
|
| 112 |
+
'neo4j_connected': neo4j_connected,
|
| 113 |
+
'total_places': len(places),
|
| 114 |
+
'statistics': stats
|
| 115 |
+
})
|
| 116 |
+
|
| 117 |
+
except Exception as e:
|
| 118 |
+
return jsonify({
|
| 119 |
+
'success': False,
|
| 120 |
+
'message': f'Error getting status: {str(e)}'
|
| 121 |
+
})
|
| 122 |
+
|
| 123 |
+
@app.route('/api/places')
|
| 124 |
+
def get_places():
|
| 125 |
+
"""Get all available places"""
|
| 126 |
+
try:
|
| 127 |
+
places = neo4j_service.get_all_places()
|
| 128 |
+
return jsonify({
|
| 129 |
+
'success': True,
|
| 130 |
+
'places': places
|
| 131 |
+
})
|
| 132 |
+
|
| 133 |
+
except Exception as e:
|
| 134 |
+
return jsonify({
|
| 135 |
+
'success': False,
|
| 136 |
+
'message': f'Error getting places: {str(e)}'
|
| 137 |
+
})
|
| 138 |
+
|
| 139 |
+
@app.route('/api/sinhala/examples')
|
| 140 |
+
def get_sinhala_examples():
|
| 141 |
+
"""Get example queries in Sinhala"""
|
| 142 |
+
try:
|
| 143 |
+
sinhala_examples = translation_service.get_sinhala_examples()
|
| 144 |
+
return jsonify({
|
| 145 |
+
'success': True,
|
| 146 |
+
'examples': sinhala_examples
|
| 147 |
+
})
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
return jsonify({
|
| 151 |
+
'success': False,
|
| 152 |
+
'message': f'Error getting Sinhala examples: {str(e)}'
|
| 153 |
+
})
|
| 154 |
+
|
| 155 |
+
@app.route('/api/translation/test')
|
| 156 |
+
def test_translation():
|
| 157 |
+
"""Test translation functionality"""
|
| 158 |
+
try:
|
| 159 |
+
test_results = translation_service.test_translation()
|
| 160 |
+
return jsonify({
|
| 161 |
+
'success': True,
|
| 162 |
+
'test_results': test_results
|
| 163 |
+
})
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
return jsonify({
|
| 167 |
+
'success': False,
|
| 168 |
+
'message': f'Error testing translation: {str(e)}'
|
| 169 |
+
})
|
| 170 |
+
|
| 171 |
+
@app.route('/api/translation/translate', methods=['POST'])
|
| 172 |
+
def translate_text():
|
| 173 |
+
"""Translate text between Sinhala and English"""
|
| 174 |
+
try:
|
| 175 |
+
data = request.get_json()
|
| 176 |
+
text = data.get('text', '').strip()
|
| 177 |
+
target_lang = data.get('target_lang', 'en') # 'en' or 'si'
|
| 178 |
+
source_lang = data.get('source_lang', 'auto')
|
| 179 |
+
|
| 180 |
+
if not text:
|
| 181 |
+
return jsonify({
|
| 182 |
+
'success': False,
|
| 183 |
+
'message': 'Please provide text to translate.'
|
| 184 |
+
})
|
| 185 |
+
|
| 186 |
+
translated_text = translation_service.translate_text(text, target_lang, source_lang)
|
| 187 |
+
is_sinhala = translation_service.is_sinhala_text(text)
|
| 188 |
+
|
| 189 |
+
return jsonify({
|
| 190 |
+
'success': True,
|
| 191 |
+
'original_text': text,
|
| 192 |
+
'translated_text': translated_text,
|
| 193 |
+
'source_language': 'si' if is_sinhala else 'en',
|
| 194 |
+
'target_language': target_lang,
|
| 195 |
+
'translation_method': 'google' if translation_service.google_translate_api_key else 'dictionary'
|
| 196 |
+
})
|
| 197 |
+
|
| 198 |
+
except Exception as e:
|
| 199 |
+
return jsonify({
|
| 200 |
+
'success': False,
|
| 201 |
+
'message': f'Error translating text: {str(e)}'
|
| 202 |
+
})
|
| 203 |
+
|
| 204 |
+
@app.route('/api/nlp/capabilities')
|
| 205 |
+
def get_nlp_capabilities():
|
| 206 |
+
"""Get information about natural language processing capabilities with live examples"""
|
| 207 |
+
|
| 208 |
+
# Test queries for each type to demonstrate actual results
|
| 209 |
+
test_queries = [
|
| 210 |
+
{
|
| 211 |
+
'type': 'fare_inquiry',
|
| 212 |
+
'description': 'Find fare between two specific locations',
|
| 213 |
+
'examples': [
|
| 214 |
+
'What is the fare from Colombo to Kandy?',
|
| 215 |
+
'fare of anuradhapura to kandy',
|
| 216 |
+
'price from panadura to galle',
|
| 217 |
+
'Colombo to Kandy fare'
|
| 218 |
+
]
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
'type': 'comparison',
|
| 222 |
+
'description': 'Compare fares between different routes',
|
| 223 |
+
'examples': [
|
| 224 |
+
'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 225 |
+
'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
|
| 226 |
+
'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
'type': 'range_search',
|
| 231 |
+
'description': 'Find routes within specific price ranges',
|
| 232 |
+
'examples': [
|
| 233 |
+
'Find routes under 500 rupees',
|
| 234 |
+
'Show me routes between 200 and 800 rupees',
|
| 235 |
+
'Routes over 1000 rupees'
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
'type': 'recommendation',
|
| 240 |
+
'description': 'Get route recommendations based on criteria',
|
| 241 |
+
'examples': [
|
| 242 |
+
'Recommend cheap routes',
|
| 243 |
+
'Show me popular destinations',
|
| 244 |
+
'What are the best routes from Colombo?'
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
'type': 'route_inquiry',
|
| 249 |
+
'description': 'Find routes from/to specific locations',
|
| 250 |
+
'examples': [
|
| 251 |
+
'Routes from Colombo',
|
| 252 |
+
'Routes to Galle',
|
| 253 |
+
'What routes depart from Kandy?'
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
'type': 'statistics',
|
| 258 |
+
'description': 'Get database overview and statistics',
|
| 259 |
+
'examples': [
|
| 260 |
+
'What is the average fare?',
|
| 261 |
+
'Database statistics',
|
| 262 |
+
'How many routes are there?'
|
| 263 |
+
]
|
| 264 |
+
}
|
| 265 |
+
]
|
| 266 |
+
|
| 267 |
+
# Process each test query to get actual results
|
| 268 |
+
live_examples = []
|
| 269 |
+
for query_type in test_queries:
|
| 270 |
+
type_examples = []
|
| 271 |
+
for example_query in query_type['examples'][:2]: # Test first 2 examples
|
| 272 |
+
try:
|
| 273 |
+
result = enhanced_nlp_processor.process_query(example_query)
|
| 274 |
+
type_examples.append({
|
| 275 |
+
'query': example_query,
|
| 276 |
+
'result': result
|
| 277 |
+
})
|
| 278 |
+
except Exception as e:
|
| 279 |
+
type_examples.append({
|
| 280 |
+
'query': example_query,
|
| 281 |
+
'result': {
|
| 282 |
+
'success': False,
|
| 283 |
+
'message': f'Error: {str(e)}'
|
| 284 |
+
}
|
| 285 |
+
})
|
| 286 |
+
|
| 287 |
+
live_examples.append({
|
| 288 |
+
'type': query_type['type'],
|
| 289 |
+
'description': query_type['description'],
|
| 290 |
+
'examples': type_examples
|
| 291 |
+
})
|
| 292 |
+
|
| 293 |
+
capabilities = {
|
| 294 |
+
'natural_language_processing': {
|
| 295 |
+
'description': 'Advanced NLP for transport queries with enhanced understanding',
|
| 296 |
+
'features': [
|
| 297 |
+
'Multiple query formats (fare, price, cost)',
|
| 298 |
+
'Natural language patterns (from X to Y, X to Y fare, etc.)',
|
| 299 |
+
'Question formats (What is, How much, Show me, etc.)',
|
| 300 |
+
'Compact formats (Colombo to Kandy fare)',
|
| 301 |
+
'Spell correction and fuzzy matching',
|
| 302 |
+
'Automatic location name correction',
|
| 303 |
+
'LLM-powered query interpretation',
|
| 304 |
+
'Fallback keyword-based processing',
|
| 305 |
+
'Advanced intent classification',
|
| 306 |
+
'Entity extraction and normalization',
|
| 307 |
+
'Confidence scoring for query understanding'
|
| 308 |
+
]
|
| 309 |
+
},
|
| 310 |
+
'query_types': test_queries,
|
| 311 |
+
'live_examples': live_examples,
|
| 312 |
+
'spell_correction': {
|
| 313 |
+
'description': 'Automatic location name correction',
|
| 314 |
+
'methods': [
|
| 315 |
+
'Direct mapping (exact matches)',
|
| 316 |
+
'Fuzzy matching (similar names)',
|
| 317 |
+
'LLM correction (AI-powered)',
|
| 318 |
+
'Partial matching (substring matching)'
|
| 319 |
+
],
|
| 320 |
+
'examples': [
|
| 321 |
+
'panadra → Panadura',
|
| 322 |
+
'gale → Galle',
|
| 323 |
+
'colmbo → Colombo',
|
| 324 |
+
'kandee → Kandy'
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
'llm_integration': {
|
| 328 |
+
'description': 'AI-powered query interpretation with LLM Cypher generation',
|
| 329 |
+
'features': [
|
| 330 |
+
'Automatic query type detection',
|
| 331 |
+
'LLM-powered Cypher query generation',
|
| 332 |
+
'Natural language understanding',
|
| 333 |
+
'Fallback to keyword-based processing',
|
| 334 |
+
'Advanced entity extraction',
|
| 335 |
+
'Intent classification with confidence scoring',
|
| 336 |
+
'Real-time database querying'
|
| 337 |
+
]
|
| 338 |
+
},
|
| 339 |
+
'enhanced_features': {
|
| 340 |
+
'description': 'Advanced NLP capabilities',
|
| 341 |
+
'features': [
|
| 342 |
+
'Multi-intent query understanding',
|
| 343 |
+
'Context-aware responses',
|
| 344 |
+
'Query preprocessing and normalization',
|
| 345 |
+
'Advanced pattern matching',
|
| 346 |
+
'Confidence-based result ranking',
|
| 347 |
+
'Comprehensive query analysis',
|
| 348 |
+
'Live database results for all query types'
|
| 349 |
+
]
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
return jsonify({
|
| 354 |
+
'success': True,
|
| 355 |
+
'capabilities': capabilities
|
| 356 |
+
})
|
| 357 |
+
|
| 358 |
+
@app.route('/api/nlp/test', methods=['POST'])
|
| 359 |
+
def test_nlp_query():
|
| 360 |
+
"""Test a natural language query and return detailed analysis"""
|
| 361 |
+
try:
|
| 362 |
+
data = request.get_json()
|
| 363 |
+
user_query = data.get('query', '').strip()
|
| 364 |
+
use_enhanced_nlp = data.get('enhanced_nlp', True)
|
| 365 |
+
|
| 366 |
+
if not user_query:
|
| 367 |
+
return jsonify({
|
| 368 |
+
'success': False,
|
| 369 |
+
'message': 'Please provide a query to test.'
|
| 370 |
+
})
|
| 371 |
+
|
| 372 |
+
# Get detailed analysis
|
| 373 |
+
analysis = {
|
| 374 |
+
'original_query': user_query,
|
| 375 |
+
'processing_steps': []
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
# Step 1: Extract locations
|
| 379 |
+
locations = spell_corrector.extract_locations_from_query(user_query)
|
| 380 |
+
analysis['processing_steps'].append({
|
| 381 |
+
'step': 'Location Extraction',
|
| 382 |
+
'locations_found': len(locations),
|
| 383 |
+
'details': [
|
| 384 |
+
{
|
| 385 |
+
'original': loc[0],
|
| 386 |
+
'corrected': loc[1],
|
| 387 |
+
'confidence': loc[2],
|
| 388 |
+
'method': loc[3]
|
| 389 |
+
} for loc in locations
|
| 390 |
+
]
|
| 391 |
+
})
|
| 392 |
+
|
| 393 |
+
# Step 2: Process query with enhanced NLP
|
| 394 |
+
if use_enhanced_nlp:
|
| 395 |
+
result = enhanced_nlp_processor.process_query(user_query)
|
| 396 |
+
analysis['processing_steps'].append({
|
| 397 |
+
'step': 'Enhanced NLP Processing',
|
| 398 |
+
'success': result.get('success', False),
|
| 399 |
+
'query_type': result.get('query_type', 'unknown'),
|
| 400 |
+
'message': result.get('message', ''),
|
| 401 |
+
'confidence': result.get('query_analysis', {}).get('confidence', 0),
|
| 402 |
+
'intent': result.get('query_analysis', {}).get('intent', {}),
|
| 403 |
+
'entities': result.get('query_analysis', {}).get('entities', {})
|
| 404 |
+
})
|
| 405 |
+
else:
|
| 406 |
+
result = query_processor.process_query(user_query)
|
| 407 |
+
analysis['processing_steps'].append({
|
| 408 |
+
'step': 'Basic Query Processing',
|
| 409 |
+
'success': result.get('success', False),
|
| 410 |
+
'query_type': result.get('query_type', 'unknown'),
|
| 411 |
+
'message': result.get('message', ''),
|
| 412 |
+
'cypher_query': result.get('cypher_query', ''),
|
| 413 |
+
'corrections': result.get('corrections', [])
|
| 414 |
+
})
|
| 415 |
+
|
| 416 |
+
# Step 3: Results
|
| 417 |
+
if result.get('success') and result.get('data'):
|
| 418 |
+
analysis['processing_steps'].append({
|
| 419 |
+
'step': 'Database Results',
|
| 420 |
+
'results_count': len(result['data']),
|
| 421 |
+
'sample_results': result['data'][:3] # Show first 3 results
|
| 422 |
+
})
|
| 423 |
+
|
| 424 |
+
return jsonify({
|
| 425 |
+
'success': True,
|
| 426 |
+
'analysis': analysis,
|
| 427 |
+
'result': result
|
| 428 |
+
})
|
| 429 |
+
|
| 430 |
+
except Exception as e:
|
| 431 |
+
return jsonify({
|
| 432 |
+
'success': False,
|
| 433 |
+
'message': f'Error testing NLP query: {str(e)}'
|
| 434 |
+
})
|
| 435 |
+
|
| 436 |
+
@app.route('/api/nlp/demo')
|
| 437 |
+
def get_nlp_demo():
|
| 438 |
+
"""Get a comprehensive demo of natural language capabilities"""
|
| 439 |
+
demo_queries = [
|
| 440 |
+
{
|
| 441 |
+
'category': 'Basic Fare Queries',
|
| 442 |
+
'queries': [
|
| 443 |
+
'What is the fare from Colombo to Kandy?',
|
| 444 |
+
'fare of anuradhapura to kandy',
|
| 445 |
+
'price from panadura to galle',
|
| 446 |
+
'Colombo to Kandy fare'
|
| 447 |
+
]
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
'category': 'Comparison Queries',
|
| 451 |
+
'queries': [
|
| 452 |
+
'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 453 |
+
'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
|
| 454 |
+
'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
|
| 455 |
+
]
|
| 456 |
+
},
|
| 457 |
+
{
|
| 458 |
+
'category': 'Range Search Queries',
|
| 459 |
+
'queries': [
|
| 460 |
+
'Find routes under 500 rupees',
|
| 461 |
+
'Show me routes between 200 and 800 rupees',
|
| 462 |
+
'Routes over 1000 rupees'
|
| 463 |
+
]
|
| 464 |
+
},
|
| 465 |
+
{
|
| 466 |
+
'category': 'Recommendation Queries',
|
| 467 |
+
'queries': [
|
| 468 |
+
'Recommend cheap routes',
|
| 469 |
+
'Show me popular destinations',
|
| 470 |
+
'What are the best routes from Colombo?'
|
| 471 |
+
]
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
'category': 'Statistical Queries',
|
| 475 |
+
'queries': [
|
| 476 |
+
'What is the average fare?',
|
| 477 |
+
'Database statistics',
|
| 478 |
+
'How many routes are there?'
|
| 479 |
+
]
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
'category': 'Route Queries',
|
| 483 |
+
'queries': [
|
| 484 |
+
'Show me the cheapest routes',
|
| 485 |
+
'Routes from Colombo',
|
| 486 |
+
'Routes to Galle',
|
| 487 |
+
'What routes depart from Kandy?'
|
| 488 |
+
]
|
| 489 |
+
},
|
| 490 |
+
{
|
| 491 |
+
'category': 'Spell Correction Tests',
|
| 492 |
+
'queries': [
|
| 493 |
+
'price from panadra to gale',
|
| 494 |
+
'fare of colmbo to kandee',
|
| 495 |
+
'cost from anuradapura to kandy'
|
| 496 |
+
]
|
| 497 |
+
}
|
| 498 |
+
]
|
| 499 |
+
|
| 500 |
+
return jsonify({
|
| 501 |
+
'success': True,
|
| 502 |
+
'demo': {
|
| 503 |
+
'title': 'Enhanced Natural Language Transport Query Demo',
|
| 504 |
+
'description': 'Advanced NLP capabilities with comparison, range search, and recommendations',
|
| 505 |
+
'categories': demo_queries
|
| 506 |
+
}
|
| 507 |
+
})
|
| 508 |
+
|
| 509 |
+
@app.route('/api/examples')
|
| 510 |
+
def get_examples():
|
| 511 |
+
"""Get comprehensive example queries showcasing natural language capabilities"""
|
| 512 |
+
examples = [
|
| 513 |
+
# === FARE QUERIES (Various Natural Language Formats) ===
|
| 514 |
+
{
|
| 515 |
+
'category': 'Fare Queries',
|
| 516 |
+
'examples': [
|
| 517 |
+
{
|
| 518 |
+
# 'query': 'What is the fare from Colombo to Kandy?',
|
| 519 |
+
'query': 'කොළඹ සිට මහනුවරට ගාස්තුව කීයද?',
|
| 520 |
+
'description': 'Standard fare query format'
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
|
| 524 |
+
'description': 'Alternative way to ask for fare'
|
| 525 |
+
},
|
| 526 |
+
{
|
| 527 |
+
'query': 'අනුරාධපුර සිට මහනුවර දක්වා ගාස්තුව',
|
| 528 |
+
'description': 'Natural language format'
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
# 'query': 'price from panadura to galle',
|
| 532 |
+
'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
|
| 533 |
+
'description': 'Using "price" instead of "fare"'
|
| 534 |
+
},
|
| 535 |
+
{
|
| 536 |
+
# 'query': 'Colombo to nuwara eliya fare',
|
| 537 |
+
'query': 'බදුල්ල සිට කොළඹට ගාස්තුව කීයද?',
|
| 538 |
+
'description': 'Compact format'
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
# 'query': 'How much is the fare from matara to kandy?',
|
| 542 |
+
'query': 'මහනුවර සිට මාතරට ගාස්තුව කීයද?',
|
| 543 |
+
'description': 'Question format'
|
| 544 |
+
}
|
| 545 |
+
]
|
| 546 |
+
},
|
| 547 |
+
|
| 548 |
+
# === COMPARISON QUERIES ===
|
| 549 |
+
{
|
| 550 |
+
'category': 'Comparison Queries',
|
| 551 |
+
'examples': [
|
| 552 |
+
{
|
| 553 |
+
# 'query': 'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 554 |
+
'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සංසන්දනය කරන්න.',
|
| 555 |
+
'description': 'Compare two different routes'
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
# 'query': 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
|
| 559 |
+
'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට අනුරාධපුර දක්වා ලාභදායී වන්නේ කුමක්ද?',
|
| 560 |
+
'description': 'Find the cheaper option'
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
# 'query': 'What is the difference in fare between Panadura to Galle and Panadura to Matara?',
|
| 564 |
+
'query': 'පානදුර සිට ගාල්ල දක්වා සහ පානදුර සිට මාතර දක්වා ගාස්තුවේ වෙනස කීයද?',
|
| 565 |
+
'description': 'Calculate fare difference'
|
| 566 |
+
}
|
| 567 |
+
]
|
| 568 |
+
},
|
| 569 |
+
|
| 570 |
+
# === RANGE SEARCH QUERIES ===
|
| 571 |
+
{
|
| 572 |
+
'category': 'Range Search Queries',
|
| 573 |
+
'examples': [
|
| 574 |
+
{
|
| 575 |
+
# 'query': 'Find routes under 500 rupees',
|
| 576 |
+
'query': 'රුපියල් 500ට අඩු මාර්ග සොයා ගන්න',
|
| 577 |
+
'description': 'Find affordable routes'
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
# 'query': 'Show me routes between 200 and 800 rupees',
|
| 581 |
+
'query': 'රුපියල් 200 සහ 800 අතර මාර්ග සොයා ගන්න',
|
| 582 |
+
'description': 'Find routes in price range'
|
| 583 |
+
},
|
| 584 |
+
{
|
| 585 |
+
# 'query': 'Routes over 1000 rupees',
|
| 586 |
+
'query': 'රුපියල් 1000ට ඉහළ මාර්ග සොයා ගන්න',
|
| 587 |
+
'description': 'Find expensive routes'
|
| 588 |
+
}
|
| 589 |
+
]
|
| 590 |
+
},
|
| 591 |
+
|
| 592 |
+
# === RECOMMENDATION QUERIES ===
|
| 593 |
+
{
|
| 594 |
+
'category': 'Recommendation Queries',
|
| 595 |
+
'examples': [
|
| 596 |
+
{
|
| 597 |
+
# 'query': 'Recommend cheap routes',
|
| 598 |
+
'query': 'ලාභ මාර්ග නිර්දේශ කරන්න',
|
| 599 |
+
'description': 'Get budget-friendly recommendations'
|
| 600 |
+
},
|
| 601 |
+
{
|
| 602 |
+
# 'query': 'Show me popular destinations',
|
| 603 |
+
'query': 'මට ජනප්රිය ගමනාන්ත පෙන්වන්න',
|
| 604 |
+
'description': 'Find frequently traveled routes'
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
# 'query': 'What are the best routes from Colombo?',
|
| 608 |
+
'query': 'කොළඹ සිට යාමට හොඳම මාර්ග මොනවාද?',
|
| 609 |
+
'description': 'Get optimal route suggestions'
|
| 610 |
+
}
|
| 611 |
+
]
|
| 612 |
+
},
|
| 613 |
+
|
| 614 |
+
# === STATISTICAL QUERIES ===
|
| 615 |
+
{
|
| 616 |
+
'category': 'Statistical Queries',
|
| 617 |
+
'examples': [
|
| 618 |
+
{
|
| 619 |
+
# 'query': 'What is the average fare?',
|
| 620 |
+
'query': 'සාමාන්ය ගාස්තුව කීයද?',
|
| 621 |
+
'description': 'Get average fare statistics'
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
# 'query': 'Database statistics',
|
| 625 |
+
'query': 'දත්ත සමුදා සංඛ්යා ලේඛන',
|
| 626 |
+
'description': 'Get comprehensive database overview'
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
'query': 'මාර්ග කීයක් තිබේද?',
|
| 630 |
+
'description': 'Count total routes'
|
| 631 |
+
}
|
| 632 |
+
]
|
| 633 |
+
},
|
| 634 |
+
|
| 635 |
+
# === ROUTE QUERIES ===
|
| 636 |
+
{
|
| 637 |
+
'category': 'Route Queries',
|
| 638 |
+
'examples': [
|
| 639 |
+
{
|
| 640 |
+
# 'query': 'Show me the cheapest routes',
|
| 641 |
+
'query': 'මට ලාභදායී මාර්ග 10ක් පෙන්වන්න',
|
| 642 |
+
'description': 'Find top 10 cheapest routes'
|
| 643 |
+
},
|
| 644 |
+
{
|
| 645 |
+
# 'query': 'Routes from Colombo',
|
| 646 |
+
'query': 'කොළඹ සිට යාමට මාර්ග මොනවාද?',
|
| 647 |
+
'description': 'Find all routes departing from a location'
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
# 'query': 'Routes to Galle',
|
| 651 |
+
'query': 'ගාල්ල යාමට මාර්ග මොනවාද?',
|
| 652 |
+
'description': 'Find all routes going to a location'
|
| 653 |
+
},
|
| 654 |
+
{
|
| 655 |
+
# 'query': 'What routes depart from Kandy?',
|
| 656 |
+
'query': 'මහනුවර සිට යාමට මාර්ග මොනවාද?',
|
| 657 |
+
'description': 'Question format for routes'
|
| 658 |
+
}
|
| 659 |
+
]
|
| 660 |
+
},
|
| 661 |
+
|
| 662 |
+
# === SPELLING ERROR EXAMPLES ===
|
| 663 |
+
{
|
| 664 |
+
'category': 'Spell Correction Examples',
|
| 665 |
+
'examples': [
|
| 666 |
+
{
|
| 667 |
+
# 'query': 'price from panadra to gale',
|
| 668 |
+
'query': 'පාන්දුරේ ඉඳන් ගාල්ල්ට කීයක් යනවද?',
|
| 669 |
+
'description': 'Test spell correction (Panadura, Galle)'
|
| 670 |
+
},
|
| 671 |
+
{
|
| 672 |
+
# 'query': 'fare of colmbo to kandee',
|
| 673 |
+
'query': 'කොළ්බ්හ සිට මහනුවර්ට ගාස්තුව කීයද?',
|
| 674 |
+
'description': 'Test spell correction (Colombo, Kandy)'
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
# 'query': 'cost from anuradapura to kandy',
|
| 678 |
+
'query': 'අනුරපුර සිට මහනුවර්රට ගාස්තුව කීයද?',
|
| 679 |
+
'description': 'Natural format with correct spelling'
|
| 680 |
+
}
|
| 681 |
+
]
|
| 682 |
+
}
|
| 683 |
+
]
|
| 684 |
+
|
| 685 |
+
return jsonify({
|
| 686 |
+
'success': True,
|
| 687 |
+
'examples': examples
|
| 688 |
+
})
|
| 689 |
+
|
| 690 |
+
@app.route('/api/nlp/advanced', methods=['POST'])
|
| 691 |
+
def advanced_nlp_query():
|
| 692 |
+
"""Advanced NLP query processing with detailed analysis"""
|
| 693 |
+
try:
|
| 694 |
+
data = request.get_json()
|
| 695 |
+
user_query = data.get('query', '').strip()
|
| 696 |
+
|
| 697 |
+
if not user_query:
|
| 698 |
+
return jsonify({
|
| 699 |
+
'success': False,
|
| 700 |
+
'message': 'Please provide a query to process.'
|
| 701 |
+
})
|
| 702 |
+
|
| 703 |
+
# Process with enhanced NLP
|
| 704 |
+
result = enhanced_nlp_processor.process_query(user_query)
|
| 705 |
+
|
| 706 |
+
return jsonify(result)
|
| 707 |
+
|
| 708 |
+
except Exception as e:
|
| 709 |
+
return jsonify({
|
| 710 |
+
'success': False,
|
| 711 |
+
'message': f'Error processing advanced NLP query: {str(e)}'
|
| 712 |
+
})
|
| 713 |
+
|
| 714 |
+
@app.route('/api/nlp/compare', methods=['POST'])
|
| 715 |
+
def compare_routes():
|
| 716 |
+
"""Compare multiple routes"""
|
| 717 |
+
try:
|
| 718 |
+
data = request.get_json()
|
| 719 |
+
routes = data.get('routes', [])
|
| 720 |
+
|
| 721 |
+
if len(routes) < 2:
|
| 722 |
+
return jsonify({
|
| 723 |
+
'success': False,
|
| 724 |
+
'message': 'Please provide at least 2 routes to compare.'
|
| 725 |
+
})
|
| 726 |
+
|
| 727 |
+
# Build comparison query
|
| 728 |
+
comparison_query = "MATCH "
|
| 729 |
+
for i, route in enumerate(routes):
|
| 730 |
+
from_loc = route.get('from')
|
| 731 |
+
to_loc = route.get('to')
|
| 732 |
+
if from_loc and to_loc:
|
| 733 |
+
if i > 0:
|
| 734 |
+
comparison_query += ", "
|
| 735 |
+
comparison_query += f"(a{i}:Place {{name: '{from_loc}'}})-[r{i}:Fare]->(b{i}:Place {{name: '{to_loc}'}})"
|
| 736 |
+
|
| 737 |
+
comparison_query += " RETURN "
|
| 738 |
+
for i, route in enumerate(routes):
|
| 739 |
+
if i > 0:
|
| 740 |
+
comparison_query += ", "
|
| 741 |
+
comparison_query += f"a{i}.name + ' to ' + b{i}.name as route{i+1}, r{i}.fare as fare{i+1}"
|
| 742 |
+
|
| 743 |
+
# Execute query
|
| 744 |
+
with neo4j_service.driver.session() as session:
|
| 745 |
+
result = session.run(comparison_query)
|
| 746 |
+
results = [dict(record) for record in result]
|
| 747 |
+
|
| 748 |
+
return jsonify({
|
| 749 |
+
'success': True,
|
| 750 |
+
'data': results,
|
| 751 |
+
'message': f'Comparison of {len(routes)} routes completed'
|
| 752 |
+
})
|
| 753 |
+
|
| 754 |
+
except Exception as e:
|
| 755 |
+
return jsonify({
|
| 756 |
+
'success': False,
|
| 757 |
+
'message': f'Error comparing routes: {str(e)}'
|
| 758 |
+
})
|
| 759 |
+
|
| 760 |
+
@app.route('/api/nlp/range', methods=['POST'])
|
| 761 |
+
def search_by_range():
|
| 762 |
+
"""Search routes by price range"""
|
| 763 |
+
try:
|
| 764 |
+
data = request.get_json()
|
| 765 |
+
min_price = data.get('min_price')
|
| 766 |
+
max_price = data.get('max_price')
|
| 767 |
+
|
| 768 |
+
if min_price is None and max_price is None:
|
| 769 |
+
return jsonify({
|
| 770 |
+
'success': False,
|
| 771 |
+
'message': 'Please provide min_price or max_price or both.'
|
| 772 |
+
})
|
| 773 |
+
|
| 774 |
+
# Build range query
|
| 775 |
+
range_query = "MATCH (a:Place)-[r:Fare]->(b:Place) WHERE "
|
| 776 |
+
conditions = []
|
| 777 |
+
|
| 778 |
+
if min_price is not None:
|
| 779 |
+
conditions.append(f"r.fare >= {min_price}")
|
| 780 |
+
if max_price is not None:
|
| 781 |
+
conditions.append(f"r.fare <= {max_price}")
|
| 782 |
+
|
| 783 |
+
range_query += " AND ".join(conditions)
|
| 784 |
+
range_query += " RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare"
|
| 785 |
+
|
| 786 |
+
# Execute query
|
| 787 |
+
with neo4j_service.driver.session() as session:
|
| 788 |
+
result = session.run(range_query)
|
| 789 |
+
results = [dict(record) for record in result]
|
| 790 |
+
|
| 791 |
+
return jsonify({
|
| 792 |
+
'success': True,
|
| 793 |
+
'data': results,
|
| 794 |
+
'message': f'Found {len(results)} routes in the specified range'
|
| 795 |
+
})
|
| 796 |
+
|
| 797 |
+
except Exception as e:
|
| 798 |
+
return jsonify({
|
| 799 |
+
'success': False,
|
| 800 |
+
'message': f'Error searching by range: {str(e)}'
|
| 801 |
+
})
|
| 802 |
+
|
| 803 |
+
@app.route('/api/nlp/test-all-types')
|
| 804 |
+
def test_all_query_types():
|
| 805 |
+
"""Test all query types with live results from Neo4j database"""
|
| 806 |
+
try:
|
| 807 |
+
# Define test queries for each type
|
| 808 |
+
test_queries = {
|
| 809 |
+
'fare_inquiry': [
|
| 810 |
+
'What is the fare from Colombo to Kandy?',
|
| 811 |
+
'fare of anuradhapura to kandy',
|
| 812 |
+
'price from panadura to galle'
|
| 813 |
+
],
|
| 814 |
+
'comparison': [
|
| 815 |
+
'Compare fares from Colombo to Kandy vs Colombo to Galle',
|
| 816 |
+
'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?'
|
| 817 |
+
],
|
| 818 |
+
'range_search': [
|
| 819 |
+
'Find routes under 500 rupees',
|
| 820 |
+
'Show me routes between 200 and 800 rupees',
|
| 821 |
+
'Routes over 1000 rupees'
|
| 822 |
+
],
|
| 823 |
+
'recommendation': [
|
| 824 |
+
'Recommend cheap routes',
|
| 825 |
+
'Show me popular destinations',
|
| 826 |
+
'What are the best routes from Colombo?'
|
| 827 |
+
],
|
| 828 |
+
'route_inquiry': [
|
| 829 |
+
'Routes from Colombo',
|
| 830 |
+
'Routes to Galle',
|
| 831 |
+
'What routes depart from Kandy?'
|
| 832 |
+
],
|
| 833 |
+
'statistics': [
|
| 834 |
+
'What is the average fare?',
|
| 835 |
+
'Database statistics',
|
| 836 |
+
'How many routes are there?'
|
| 837 |
+
]
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
+
results = {}
|
| 841 |
+
|
| 842 |
+
for query_type, queries in test_queries.items():
|
| 843 |
+
type_results = []
|
| 844 |
+
for query in queries:
|
| 845 |
+
try:
|
| 846 |
+
# Process with enhanced NLP (uses LLM for Cypher generation)
|
| 847 |
+
result = enhanced_nlp_processor.process_query(query)
|
| 848 |
+
type_results.append({
|
| 849 |
+
'query': query,
|
| 850 |
+
'result': result,
|
| 851 |
+
'success': result.get('success', False)
|
| 852 |
+
})
|
| 853 |
+
except Exception as e:
|
| 854 |
+
type_results.append({
|
| 855 |
+
'query': query,
|
| 856 |
+
'result': {
|
| 857 |
+
'success': False,
|
| 858 |
+
'message': f'Error processing query: {str(e)}'
|
| 859 |
+
},
|
| 860 |
+
'success': False
|
| 861 |
+
})
|
| 862 |
+
|
| 863 |
+
results[query_type] = {
|
| 864 |
+
'description': f'Test results for {query_type} queries',
|
| 865 |
+
'total_queries': len(queries),
|
| 866 |
+
'successful_queries': sum(1 for r in type_results if r['success']),
|
| 867 |
+
'examples': type_results
|
| 868 |
+
}
|
| 869 |
+
|
| 870 |
+
# Summary statistics
|
| 871 |
+
total_queries = sum(len(queries) for queries in test_queries.values())
|
| 872 |
+
total_successful = sum(
|
| 873 |
+
results[query_type]['successful_queries']
|
| 874 |
+
for query_type in results
|
| 875 |
+
)
|
| 876 |
+
|
| 877 |
+
return jsonify({
|
| 878 |
+
'success': True,
|
| 879 |
+
'message': f'Tested {total_queries} queries across {len(test_queries)} types. {total_successful} successful.',
|
| 880 |
+
'summary': {
|
| 881 |
+
'total_query_types': len(test_queries),
|
| 882 |
+
'total_queries_tested': total_queries,
|
| 883 |
+
'successful_queries': total_successful,
|
| 884 |
+
'success_rate': round((total_successful / total_queries) * 100, 2) if total_queries > 0 else 0
|
| 885 |
+
},
|
| 886 |
+
'results': results,
|
| 887 |
+
'neo4j_connected': neo4j_service.is_connected()
|
| 888 |
+
})
|
| 889 |
+
|
| 890 |
+
except Exception as e:
|
| 891 |
+
return jsonify({
|
| 892 |
+
'success': False,
|
| 893 |
+
'message': f'Error testing query types: {str(e)}',
|
| 894 |
+
'neo4j_connected': neo4j_service.is_connected()
|
| 895 |
+
})
|
| 896 |
+
|
| 897 |
+
@app.errorhandler(404)
|
| 898 |
+
def not_found(error):
|
| 899 |
+
return jsonify({
|
| 900 |
+
'success': False,
|
| 901 |
+
'message': 'Endpoint not found'
|
| 902 |
+
}), 404
|
| 903 |
+
|
| 904 |
+
@app.errorhandler(500)
|
| 905 |
+
def internal_error(error):
|
| 906 |
+
return jsonify({
|
| 907 |
+
'success': False,
|
| 908 |
+
'message': 'Internal server error'
|
| 909 |
+
}), 500
|
| 910 |
+
|
| 911 |
+
if __name__ == '__main__':
|
| 912 |
+
port = int(os.getenv('PORT', 7860)) # Hugging Face Spaces uses port 7860 by default
|
| 913 |
+
|
| 914 |
+
print("🚌 Natural Language Transport Query System")
|
| 915 |
+
print("=" * 60)
|
| 916 |
+
print(f"🚀 Starting on port {port}")
|
| 917 |
+
print(f"🌐 Open your browser and go to: http://localhost:{port}")
|
| 918 |
+
|
| 919 |
+
# Check Neo4j connection
|
| 920 |
+
if neo4j_service.is_connected():
|
| 921 |
+
print("✅ Connected to Neo4j database")
|
| 922 |
+
stats = neo4j_service.get_route_statistics()
|
| 923 |
+
if stats:
|
| 924 |
+
print(f"📊 Database: {stats.get('total_places', 0)} places, {stats.get('total_routes', 0)} routes")
|
| 925 |
+
else:
|
| 926 |
+
print("⚠️ Neo4j not connected - some features may not work")
|
| 927 |
+
|
| 928 |
+
# Check LLM availability
|
| 929 |
+
if spell_corrector.llm_available:
|
| 930 |
+
print("🤖 LLM integration available for spell correction")
|
| 931 |
+
else:
|
| 932 |
+
print("⚠️ LLM not available - using fuzzy matching only")
|
| 933 |
+
|
| 934 |
+
print("\n🎯 Enhanced Natural Language Capabilities:")
|
| 935 |
+
print(" • Multiple query formats (fare, price, cost)")
|
| 936 |
+
print(" • Natural language patterns (from X to Y, X to Y fare)")
|
| 937 |
+
print(" • Question formats (What is, How much, Show me)")
|
| 938 |
+
print(" • Compact formats (Colombo to Kandy fare)")
|
| 939 |
+
print(" • Spell correction and fuzzy matching")
|
| 940 |
+
print(" • LLM-powered query interpretation")
|
| 941 |
+
print(" • Automatic Cypher query generation")
|
| 942 |
+
print(" • Advanced intent classification")
|
| 943 |
+
print(" • Entity extraction and normalization")
|
| 944 |
+
print(" • Comparison queries (vs, versus, compare)")
|
| 945 |
+
print(" • Range search queries (under, over, between)")
|
| 946 |
+
print(" • Recommendation queries (recommend, suggest)")
|
| 947 |
+
print(" • Confidence scoring for query understanding")
|
| 948 |
+
print(" • Sinhala language support with translation")
|
| 949 |
+
print(" • Automatic Sinhala-English translation")
|
| 950 |
+
print(" • Dictionary-based and Google Translate fallback")
|
| 951 |
+
|
| 952 |
+
print("\n🔗 Available API Endpoints:")
|
| 953 |
+
print(" • /api/query - Process natural language queries (enhanced NLP)")
|
| 954 |
+
print(" • /api/nlp/capabilities - View enhanced NLP capabilities with live examples")
|
| 955 |
+
print(" • /api/nlp/test-all-types - Test all query types with live results")
|
| 956 |
+
print(" • /api/nlp/test - Test queries with detailed analysis")
|
| 957 |
+
print(" • /api/nlp/demo - Get comprehensive demo queries")
|
| 958 |
+
print(" • /api/examples - Get categorized example queries")
|
| 959 |
+
print(" • /api/sinhala/examples - Get Sinhala example queries")
|
| 960 |
+
print(" • /api/translation/test - Test translation functionality")
|
| 961 |
+
print(" • /api/translation/translate - Translate text between languages")
|
| 962 |
+
print(" • /api/status - System status and statistics")
|
| 963 |
+
print(" • /api/suggestions - Get location suggestions")
|
| 964 |
+
print(" • /api/places - Get all available places")
|
| 965 |
+
|
| 966 |
+
print("=" * 60)
|
| 967 |
+
|
| 968 |
+
try:
|
| 969 |
+
app.run(debug=False, port=port, host='0.0.0.0') # Set debug=False for production
|
| 970 |
+
except Exception as e:
|
| 971 |
+
print(f"❌ Error starting application: {e}")
|
| 972 |
+
print("💡 Try running as administrator or check if another application is using the port")
|
| 973 |
+
|
| 974 |
+
|
config.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Configuration file for Transport Query Application
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# Load environment variables
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
class Config:
|
| 13 |
+
"""Application configuration"""
|
| 14 |
+
|
| 15 |
+
# Neo4j Configuration
|
| 16 |
+
# NEO4J_URI = "bolt://localhost:7687"
|
| 17 |
+
# NEO4J_URI = "bolt://44.201.107.35:7687"
|
| 18 |
+
NEO4J_URI = os.getenv("NEO4J_URI")
|
| 19 |
+
# NEO4J_USER = "neo4j"
|
| 20 |
+
NEO4J_USER = os.getenv("NEO4J_USER")
|
| 21 |
+
# NEO4J_PASSWORD = "20665130@mM"
|
| 22 |
+
# NEO4J_PASSWORD = "securities-arrays-entrapments"
|
| 23 |
+
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
|
| 24 |
+
|
| 25 |
+
# OpenAI Configuration (for LLM)
|
| 26 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 27 |
+
OPENAI_MODEL = "gpt-3.5-turbo"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# Flask Configuration
|
| 32 |
+
# SECRET_KEY = os.getenv("SECRET_KEY", "transport-query-app-secret-key")
|
| 33 |
+
SECRET_KEY = os.getenv("SECRET_KEY")
|
| 34 |
+
DEBUG = True
|
| 35 |
+
|
| 36 |
+
# Spell Correction Configuration
|
| 37 |
+
SIMILARITY_THRESHOLD = 0.8
|
| 38 |
+
MAX_SUGGESTIONS = 5
|
| 39 |
+
|
| 40 |
+
# Location Mapping for Common Misspellings
|
| 41 |
+
LOCATION_MAPPING = {
|
| 42 |
+
'colombo': 'Colombo',
|
| 43 |
+
'colmbo': 'Colombo',
|
| 44 |
+
'kandy': 'Kandy',
|
| 45 |
+
'panadura': 'Panadura',
|
| 46 |
+
'panaduwa': 'Panadura',
|
| 47 |
+
'galkissa': 'Mount Lavinia',
|
| 48 |
+
'mount lavinia': 'Mount Lavinia',
|
| 49 |
+
'kalutara': 'Kalutara',
|
| 50 |
+
'aluthgama': 'Aluthgama',
|
| 51 |
+
'balapitiya': 'Balapitiya',
|
| 52 |
+
'ambalangoda': 'Ambalangoda',
|
| 53 |
+
'hikkaduwa': 'Hikkaduwa',
|
| 54 |
+
'galle': 'Galle',
|
| 55 |
+
'koggala': 'Koggala',
|
| 56 |
+
'waligama': 'Waligama',
|
| 57 |
+
'matara': 'Matara',
|
| 58 |
+
'anuradapura': 'Anuradapura',
|
| 59 |
+
'anuradhapura': 'Anuradapura',
|
| 60 |
+
'kurunagala': 'Kurunagala',
|
| 61 |
+
'kurunegala': 'Kurunagala',
|
| 62 |
+
'trincomalee': 'Trincomalee',
|
| 63 |
+
'tricomalee': 'Trincomalee',
|
| 64 |
+
'jaffna': 'Jaffna',
|
| 65 |
+
'vavuniya': 'Vavuniya',
|
| 66 |
+
'vavniyava': 'Vavuniya',
|
| 67 |
+
'vavniyawa': 'Vavuniya',
|
| 68 |
+
'chilaw': 'Chilaw',
|
| 69 |
+
'chillaw': 'Chilaw',
|
| 70 |
+
'puthalama': 'Puththalama',
|
| 71 |
+
'puttalama': 'Puththalama',
|
| 72 |
+
'thangalle': 'Thangalle',
|
| 73 |
+
'thangalla': 'Thangalle',
|
| 74 |
+
'bandarawela': 'Bandarawela',
|
| 75 |
+
'bandatrawela': 'Bandarawela',
|
| 76 |
+
'nuwaraeliya': 'Nuwaraeliya',
|
| 77 |
+
'nuwara eliya': 'Nuwaraeliya',
|
| 78 |
+
'badulla': 'Badulla',
|
| 79 |
+
'monaragala': 'Monaragala',
|
| 80 |
+
'ratnapura': 'Rathnapura',
|
| 81 |
+
'rathnapura': 'Rathnapura',
|
| 82 |
+
'kegalle': 'Kegalle',
|
| 83 |
+
'mawanella': 'Mawanella',
|
| 84 |
+
'mavanalla': 'Mawanella',
|
| 85 |
+
'awissawella': 'Awissawella',
|
| 86 |
+
'awisswella': 'Awissawella',
|
| 87 |
+
'kaduwela': 'Kaduwela',
|
| 88 |
+
'kaduruwela': 'Kaduwela',
|
| 89 |
+
'maharagama': 'Maharagama',
|
| 90 |
+
'dehiwala': 'Dehiwala',
|
| 91 |
+
'moratuwa': 'Moratuwa',
|
| 92 |
+
'kalutara': 'Kalutara',
|
| 93 |
+
'beruwala': 'Beruwala',
|
| 94 |
+
'bentota': 'Bentota',
|
| 95 |
+
'induruwa': 'Induruwa',
|
| 96 |
+
'kosgoda': 'Kosgoda',
|
| 97 |
+
'ahungalla': 'Ahungalla',
|
| 98 |
+
'karandeniya': 'Karandeniya',
|
| 99 |
+
'eladuwa': 'Eladuwa',
|
| 100 |
+
'gintota': 'Gintota',
|
| 101 |
+
'boossa': 'Boossa',
|
| 102 |
+
'katunayake': 'Katunayake',
|
| 103 |
+
'negombo': 'Negombo',
|
| 104 |
+
'seeduwa': 'Seeduwa',
|
| 105 |
+
'ja-ela': 'Ja-ela',
|
| 106 |
+
'wattala': 'Wattala',
|
| 107 |
+
'kelaniya': 'Kelaniya',
|
| 108 |
+
'kiribathgoda': 'Kiribathgoda',
|
| 109 |
+
'kiribathgodas': 'Kiribathgoda',
|
| 110 |
+
'ganemulla': 'Ganemulla',
|
| 111 |
+
'mirigama': 'Mirigama',
|
| 112 |
+
'polgahawela': 'Polgahawela',
|
| 113 |
+
'warakapola': 'Warakapola',
|
| 114 |
+
'galigamuwa': 'Galigamuwa',
|
| 115 |
+
'galgamuwa': 'Galigamuwa',
|
| 116 |
+
'ambepussa': 'Ambepussa',
|
| 117 |
+
'alawwa': 'Alawwa',
|
| 118 |
+
'kandy': 'Kandy',
|
| 119 |
+
'peradeniya': 'Peradeniya',
|
| 120 |
+
'gampola': 'Gampola',
|
| 121 |
+
'nawalapitiya': 'Nawalapitiya',
|
| 122 |
+
'teldeniya': 'Teldeniya',
|
| 123 |
+
'kundasale': 'Kundasale',
|
| 124 |
+
'katugastota': 'Katugastota',
|
| 125 |
+
'pilimatalawa': 'Pilimatalawa',
|
| 126 |
+
'harispattuwa': 'Harispattuwa',
|
| 127 |
+
'akurana': 'Akurana',
|
| 128 |
+
'matale': 'Matale',
|
| 129 |
+
'dambulla': 'Dambulla',
|
| 130 |
+
'sigiriya': 'Sigiriya',
|
| 131 |
+
'habarana': 'Habarana',
|
| 132 |
+
'polonnaruwa': 'Polonnaruwa',
|
| 133 |
+
'minneriya': 'Minneriya',
|
| 134 |
+
'galoya': 'Galoya',
|
| 135 |
+
'batticaloa': 'Batticaloa',
|
| 136 |
+
'batticolo': 'Batticaloa',
|
| 137 |
+
'ampara': 'Ampara',
|
| 138 |
+
'mahiyanganaya': 'Mahiyanganaya',
|
| 139 |
+
'bibile': 'Bibile',
|
| 140 |
+
'monaragala': 'Monaragala',
|
| 141 |
+
'wellawaya': 'Wellawaya',
|
| 142 |
+
'bandarawela': 'Bandarawela',
|
| 143 |
+
'hali-ela': 'Hali-ela',
|
| 144 |
+
'passara': 'Passara',
|
| 145 |
+
'badulla': 'Badulla',
|
| 146 |
+
'mahiyanganaya': 'Mahiyanganaya',
|
| 147 |
+
'kandy': 'Kandy',
|
| 148 |
+
'nuwaraeliya': 'Nuwaraeliya',
|
| 149 |
+
'hatton': 'Hatton',
|
| 150 |
+
'talawakele': 'Talawakele',
|
| 151 |
+
'nanuoya': 'Nanuoya',
|
| 152 |
+
'ambewela': 'Ambewela',
|
| 153 |
+
'pattipola': 'Pattipola',
|
| 154 |
+
'oya': 'Oya',
|
| 155 |
+
'ella': 'Ella',
|
| 156 |
+
'demodara': 'Demodara',
|
| 157 |
+
'hali-ela': 'Hali-ela',
|
| 158 |
+
'badulla': 'Badulla',
|
| 159 |
+
'mahiyanganaya': 'Mahiyanganaya',
|
| 160 |
+
'bibile': 'Bibile',
|
| 161 |
+
'monaragala': 'Monaragala',
|
| 162 |
+
'wellawaya': 'Wellawaya',
|
| 163 |
+
'kataragama': 'Kataragama',
|
| 164 |
+
'tissamaharama': 'Tissamaharama',
|
| 165 |
+
'hambantota': 'Hambantota',
|
| 166 |
+
'tangalle': 'Tangalle',
|
| 167 |
+
'thangalle': 'Tangalle',
|
| 168 |
+
'beliatta': 'Beliatta',
|
| 169 |
+
'ambalantota': 'Ambalantota',
|
| 170 |
+
'matara': 'Matara',
|
| 171 |
+
'weligama': 'Weligama',
|
| 172 |
+
'mirissa': 'Mirissa',
|
| 173 |
+
'dikwella': 'Dikwella',
|
| 174 |
+
'kamburupitiya': 'Kamburupitiya',
|
| 175 |
+
'deniyaya': 'Deniyaya',
|
| 176 |
+
'akurassa': 'Akurassa',
|
| 177 |
+
'akuressa': 'Akurassa',
|
| 178 |
+
'galle': 'Galle',
|
| 179 |
+
'hikkaduwa': 'Hikkaduwa',
|
| 180 |
+
'koggala': 'Koggala',
|
| 181 |
+
'ahangama': 'Ahangama',
|
| 182 |
+
'midigama': 'Midigama',
|
| 183 |
+
'talpe': 'Talpe',
|
| 184 |
+
'unawatuna': 'Unawatuna',
|
| 185 |
+
'gintota': 'Gintota',
|
| 186 |
+
'boossa': 'Boossa',
|
| 187 |
+
'karandeniya': 'Karandeniya',
|
| 188 |
+
'eladuwa': 'Eladuwa',
|
| 189 |
+
'bentota': 'Bentota',
|
| 190 |
+
'induruwa': 'Induruwa',
|
| 191 |
+
'kosgoda': 'Kosgoda',
|
| 192 |
+
'ahungalla': 'Ahungalla',
|
| 193 |
+
'beruwala': 'Beruwala',
|
| 194 |
+
'kalutara': 'Kalutara',
|
| 195 |
+
'panadura': 'Panadura',
|
| 196 |
+
'moratuwa': 'Moratuwa',
|
| 197 |
+
'dehiwala': 'Dehiwala',
|
| 198 |
+
'maharagama': 'Maharagama',
|
| 199 |
+
'kaduwela': 'Kaduwela',
|
| 200 |
+
'awissawella': 'Awissawella',
|
| 201 |
+
'kegalle': 'Kegalle',
|
| 202 |
+
'mawanella': 'Mawanella',
|
| 203 |
+
'peradeniya': 'Peradeniya',
|
| 204 |
+
'gampola': 'Gampola',
|
| 205 |
+
'nawalapitiya': 'Nawalapitiya',
|
| 206 |
+
'teldeniya': 'Teldeniya',
|
| 207 |
+
'kundasale': 'Kundasale',
|
| 208 |
+
'katugastota': 'Katugastota',
|
| 209 |
+
'pilimatalawa': 'Pilimatalawa',
|
| 210 |
+
'harispattuwa': 'Harispattuwa',
|
| 211 |
+
'akurana': 'Akurana',
|
| 212 |
+
'dambulla': 'Dambulla',
|
| 213 |
+
'sigiriya': 'Sigiriya',
|
| 214 |
+
'habarana': 'Habarana',
|
| 215 |
+
'polonnaruwa': 'Polonnaruwa',
|
| 216 |
+
'minneriya': 'Minneriya',
|
| 217 |
+
'galoya': 'Galoya',
|
| 218 |
+
'batticaloa': 'Batticaloa',
|
| 219 |
+
'ampara': 'Ampara',
|
| 220 |
+
'mahiyanganaya': 'Mahiyanganaya',
|
| 221 |
+
'bibile': 'Bibile',
|
| 222 |
+
'monaragala': 'Monaragala',
|
| 223 |
+
'wellawaya': 'Wellawaya',
|
| 224 |
+
'bandarawela': 'Bandarawela',
|
| 225 |
+
'hali-ela': 'Hali-ela',
|
| 226 |
+
'passara': 'Passara',
|
| 227 |
+
'badulla': 'Badulla',
|
| 228 |
+
'hatton': 'Hatton',
|
| 229 |
+
'talawakele': 'Talawakele',
|
| 230 |
+
'nanuoya': 'Nanuoya',
|
| 231 |
+
'ambewela': 'Ambewela',
|
| 232 |
+
'pattipola': 'Pattipola',
|
| 233 |
+
'oya': 'Oya',
|
| 234 |
+
'ella': 'Ella',
|
| 235 |
+
'demodara': 'Demodara',
|
| 236 |
+
'kataragama': 'Kataragama',
|
| 237 |
+
'tissamaharama': 'Tissamaharama',
|
| 238 |
+
'hambantota': 'Hambantota',
|
| 239 |
+
'tangalle': 'Tangalle',
|
| 240 |
+
'beliatta': 'Beliatta',
|
| 241 |
+
'ambalantota': 'Ambalantota',
|
| 242 |
+
'weligama': 'Weligama',
|
| 243 |
+
'kamburupitiya': 'Kamburupitiya',
|
| 244 |
+
'deniyaya': 'Deniyaya',
|
| 245 |
+
'akurassa': 'Akurassa',
|
| 246 |
+
'ahangama': 'Ahangama',
|
| 247 |
+
'seeduwa': 'Seeduwa',
|
| 248 |
+
'ja-ela': 'Ja-ela',
|
| 249 |
+
'wattala': 'Wattala',
|
| 250 |
+
'kelaniya': 'Kelaniya',
|
| 251 |
+
'kiribathgoda': 'Kiribathgoda',
|
| 252 |
+
'ganemulla': 'Ganemulla',
|
| 253 |
+
'mirigama': 'Mirigama',
|
| 254 |
+
'polgahawela': 'Polgahawela',
|
| 255 |
+
'warakapola': 'Warakapola',
|
| 256 |
+
'galigamuwa': 'Galigamuwa',
|
| 257 |
+
'ambepussa': 'Ambepussa',
|
| 258 |
+
'alawwa': 'Alawwa',
|
| 259 |
+
'peradeniya': 'Peradeniya',
|
| 260 |
+
'gampola': 'Gampola',
|
| 261 |
+
'matale': 'Matale',
|
| 262 |
+
'polonnaruwa': 'Polonnaruwa'
|
| 263 |
+
}
|
enhanced_nlp_processor.py
ADDED
|
@@ -0,0 +1,904 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Enhanced NLP Processor for Transport Query Application
|
| 4 |
+
Advanced natural language understanding and query processing
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
import json
|
| 9 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from spell_corrector import SpellCorrector
|
| 12 |
+
from neo4j_service import Neo4jService
|
| 13 |
+
from config import Config
|
| 14 |
+
from logger import get_logger
|
| 15 |
+
|
| 16 |
+
class EnhancedNLPProcessor:
|
| 17 |
+
"""Advanced NLP processor with sophisticated query understanding"""
|
| 18 |
+
|
| 19 |
+
def __init__(self):
|
| 20 |
+
self.config = Config()
|
| 21 |
+
self.spell_corrector = SpellCorrector()
|
| 22 |
+
self.neo4j_service = Neo4jService()
|
| 23 |
+
self.logger = get_logger(self.__class__.__name__)
|
| 24 |
+
|
| 25 |
+
# Query patterns and templates
|
| 26 |
+
self.query_patterns = {
|
| 27 |
+
'fare_queries': [
|
| 28 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 29 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 30 |
+
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 31 |
+
r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
|
| 32 |
+
r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 33 |
+
r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 34 |
+
r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 35 |
+
r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 36 |
+
],
|
| 37 |
+
'comparison_queries': [
|
| 38 |
+
r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 39 |
+
r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 40 |
+
],
|
| 41 |
+
'range_queries': [
|
| 42 |
+
r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
|
| 43 |
+
r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
|
| 44 |
+
r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
|
| 45 |
+
],
|
| 46 |
+
'route_queries': [
|
| 47 |
+
r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
|
| 48 |
+
r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
|
| 49 |
+
r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
|
| 50 |
+
r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
|
| 51 |
+
],
|
| 52 |
+
'statistical_queries': [
|
| 53 |
+
r'(?:average|mean|median)\s+(?:fare|price|cost)',
|
| 54 |
+
r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
|
| 55 |
+
r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
|
| 56 |
+
r'(?:database|system)\s+(?:statistics?|stats?|overview)',
|
| 57 |
+
r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
|
| 58 |
+
],
|
| 59 |
+
'recommendation_queries': [
|
| 60 |
+
r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
|
| 61 |
+
r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
|
| 62 |
+
r'(?:popular|frequent)\s+(?:routes?|destinations?)',
|
| 63 |
+
r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
|
| 64 |
+
]
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# Query intent classification
|
| 68 |
+
self.intent_keywords = {
|
| 69 |
+
'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
|
| 70 |
+
'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
|
| 71 |
+
'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
|
| 72 |
+
'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
|
| 73 |
+
'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
|
| 74 |
+
'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
|
| 75 |
+
'availability': ['available', 'exist', 'have', 'is there', 'can i']
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
def process_query(self, user_query: str) -> Dict[str, Any]:
|
| 79 |
+
"""
|
| 80 |
+
Process natural language query with advanced NLP understanding
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
user_query: Natural language query string
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
Dictionary with comprehensive query analysis and results
|
| 87 |
+
"""
|
| 88 |
+
try:
|
| 89 |
+
# Step 1: Preprocess query
|
| 90 |
+
processed_query = self._preprocess_query(user_query)
|
| 91 |
+
self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
|
| 92 |
+
|
| 93 |
+
# Step 2: Extract entities and intent
|
| 94 |
+
entities = self._extract_entities(processed_query)
|
| 95 |
+
intent = self._classify_intent(processed_query, entities)
|
| 96 |
+
|
| 97 |
+
# Step 3: Generate Cypher query
|
| 98 |
+
cypher_query = self._generate_cypher_query(intent, entities, processed_query)
|
| 99 |
+
self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Step 4: Execute query and format results
|
| 104 |
+
if cypher_query:
|
| 105 |
+
results = self._execute_query(cypher_query)
|
| 106 |
+
self.logger.info(f"Query results count: {len(results)}")
|
| 107 |
+
response = self._format_response(intent, entities, results, processed_query)
|
| 108 |
+
else:
|
| 109 |
+
response = self._handle_unclear_query(processed_query)
|
| 110 |
+
|
| 111 |
+
# Step 5: Add metadata
|
| 112 |
+
response.update({
|
| 113 |
+
'query_analysis': {
|
| 114 |
+
'original_query': user_query,
|
| 115 |
+
'processed_query': processed_query,
|
| 116 |
+
'intent': intent,
|
| 117 |
+
'entities': entities,
|
| 118 |
+
'confidence': self._calculate_confidence(intent, entities)
|
| 119 |
+
}
|
| 120 |
+
})
|
| 121 |
+
|
| 122 |
+
return response
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return {
|
| 126 |
+
'success': False,
|
| 127 |
+
'message': f'Error processing query: {str(e)}',
|
| 128 |
+
'suggestions': self._get_suggestions()
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
def _preprocess_query(self, query: str) -> str:
|
| 132 |
+
"""Preprocess and normalize the query"""
|
| 133 |
+
# Convert to lowercase
|
| 134 |
+
query = query.lower().strip()
|
| 135 |
+
|
| 136 |
+
# Remove extra whitespace
|
| 137 |
+
query = re.sub(r'\s+', ' ', query)
|
| 138 |
+
|
| 139 |
+
# Normalize common variations
|
| 140 |
+
replacements = {
|
| 141 |
+
'rs.': 'rupees',
|
| 142 |
+
'rs': 'rupees',
|
| 143 |
+
'lkr': 'rupees',
|
| 144 |
+
'→': 'to',
|
| 145 |
+
'->': 'to',
|
| 146 |
+
'vs': 'versus',
|
| 147 |
+
'&': 'and',
|
| 148 |
+
'w/': 'with',
|
| 149 |
+
'w/o': 'without'
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
for old, new in replacements.items():
|
| 153 |
+
query = query.replace(old, new)
|
| 154 |
+
|
| 155 |
+
return query
|
| 156 |
+
|
| 157 |
+
def _extract_entities(self, query: str) -> Dict[str, Any]:
|
| 158 |
+
"""Extract entities from the query"""
|
| 159 |
+
entities = {
|
| 160 |
+
'locations': [],
|
| 161 |
+
'numbers': [],
|
| 162 |
+
'currencies': [],
|
| 163 |
+
'comparators': [],
|
| 164 |
+
'time_expressions': []
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
# Extract locations with priority for different query types
|
| 168 |
+
comparison_patterns = [
|
| 169 |
+
r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 170 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 171 |
+
r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 172 |
+
# Simpler patterns for comparison
|
| 173 |
+
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 174 |
+
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
fare_patterns = [
|
| 178 |
+
r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 179 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 180 |
+
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 181 |
+
]
|
| 182 |
+
|
| 183 |
+
general_patterns = [
|
| 184 |
+
r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 185 |
+
r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
|
| 186 |
+
r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
|
| 187 |
+
]
|
| 188 |
+
|
| 189 |
+
# Use a set to avoid duplicates
|
| 190 |
+
seen_locations = set()
|
| 191 |
+
|
| 192 |
+
# Try comparison patterns first (highest priority)
|
| 193 |
+
for pattern in comparison_patterns:
|
| 194 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 195 |
+
for match in matches:
|
| 196 |
+
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 197 |
+
for loc in locations:
|
| 198 |
+
# Skip if we've already processed this location
|
| 199 |
+
if loc.lower() in seen_locations:
|
| 200 |
+
continue
|
| 201 |
+
seen_locations.add(loc.lower())
|
| 202 |
+
|
| 203 |
+
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 204 |
+
if confidence > 0.5:
|
| 205 |
+
entities['locations'].append({
|
| 206 |
+
'original': loc,
|
| 207 |
+
'corrected': corrected,
|
| 208 |
+
'confidence': confidence,
|
| 209 |
+
'method': method
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
# If no locations found with comparison patterns, try fare patterns
|
| 213 |
+
if not entities['locations']:
|
| 214 |
+
for pattern in fare_patterns:
|
| 215 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 216 |
+
for match in matches:
|
| 217 |
+
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 218 |
+
for loc in locations:
|
| 219 |
+
# Skip if we've already processed this location
|
| 220 |
+
if loc.lower() in seen_locations:
|
| 221 |
+
continue
|
| 222 |
+
seen_locations.add(loc.lower())
|
| 223 |
+
|
| 224 |
+
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 225 |
+
if confidence > 0.5:
|
| 226 |
+
entities['locations'].append({
|
| 227 |
+
'original': loc,
|
| 228 |
+
'corrected': corrected,
|
| 229 |
+
'confidence': confidence,
|
| 230 |
+
'method': method
|
| 231 |
+
})
|
| 232 |
+
|
| 233 |
+
# If no locations found with fare patterns, try general patterns
|
| 234 |
+
if not entities['locations']:
|
| 235 |
+
for pattern in general_patterns:
|
| 236 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 237 |
+
for match in matches:
|
| 238 |
+
locations = [loc.strip() for loc in match.groups() if loc.strip()]
|
| 239 |
+
for loc in locations:
|
| 240 |
+
# Skip if we've already processed this location
|
| 241 |
+
if loc.lower() in seen_locations:
|
| 242 |
+
continue
|
| 243 |
+
seen_locations.add(loc.lower())
|
| 244 |
+
|
| 245 |
+
corrected, confidence, method = self.spell_corrector.correct_location(loc)
|
| 246 |
+
if confidence > 0.5:
|
| 247 |
+
entities['locations'].append({
|
| 248 |
+
'original': loc,
|
| 249 |
+
'corrected': corrected,
|
| 250 |
+
'confidence': confidence,
|
| 251 |
+
'method': method
|
| 252 |
+
})
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
# Extract numbers and currencies
|
| 259 |
+
number_patterns = [
|
| 260 |
+
r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
|
| 261 |
+
r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
|
| 262 |
+
r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
|
| 263 |
+
]
|
| 264 |
+
|
| 265 |
+
for pattern in number_patterns:
|
| 266 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 267 |
+
for match in matches:
|
| 268 |
+
groups = match.groups()
|
| 269 |
+
if len(groups) >= 2:
|
| 270 |
+
if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
|
| 271 |
+
# Pattern: (under|below|less than|over|above|more than) (number) (currency)
|
| 272 |
+
comparator = groups[0]
|
| 273 |
+
number = groups[1]
|
| 274 |
+
currency = groups[2] if len(groups) >= 3 else 'rupees'
|
| 275 |
+
|
| 276 |
+
entities['numbers'].append({
|
| 277 |
+
'value': float(number.replace(',', '')),
|
| 278 |
+
'currency': currency,
|
| 279 |
+
'comparator': comparator
|
| 280 |
+
})
|
| 281 |
+
elif 'between' in pattern:
|
| 282 |
+
# Pattern: between (number1) and (number2) (currency)
|
| 283 |
+
min_number = groups[0]
|
| 284 |
+
max_number = groups[1]
|
| 285 |
+
currency = groups[2] if len(groups) >= 3 else 'rupees'
|
| 286 |
+
|
| 287 |
+
entities['numbers'].append({
|
| 288 |
+
'value': float(min_number.replace(',', '')),
|
| 289 |
+
'currency': currency,
|
| 290 |
+
'comparator': 'between_min'
|
| 291 |
+
})
|
| 292 |
+
entities['numbers'].append({
|
| 293 |
+
'value': float(max_number.replace(',', '')),
|
| 294 |
+
'currency': currency,
|
| 295 |
+
'comparator': 'between_max'
|
| 296 |
+
})
|
| 297 |
+
else:
|
| 298 |
+
# Pattern: (number) (currency)
|
| 299 |
+
number = groups[0]
|
| 300 |
+
currency = groups[1] if len(groups) >= 2 else 'rupees'
|
| 301 |
+
|
| 302 |
+
entities['numbers'].append({
|
| 303 |
+
'value': float(number.replace(',', '')),
|
| 304 |
+
'currency': currency,
|
| 305 |
+
'comparator': None
|
| 306 |
+
})
|
| 307 |
+
|
| 308 |
+
# Extract comparators
|
| 309 |
+
comparator_patterns = [
|
| 310 |
+
r'(cheaper|more\s+expensive|better|worse|faster|slower)',
|
| 311 |
+
r'(compare|difference|vs|versus)',
|
| 312 |
+
r'(under|below|less\s+than|over|above|more\s+than)'
|
| 313 |
+
]
|
| 314 |
+
|
| 315 |
+
for pattern in comparator_patterns:
|
| 316 |
+
matches = re.finditer(pattern, query, re.IGNORECASE)
|
| 317 |
+
for match in matches:
|
| 318 |
+
entities['comparators'].append(match.group(1).lower())
|
| 319 |
+
|
| 320 |
+
return entities
|
| 321 |
+
|
| 322 |
+
def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
|
| 323 |
+
"""Classify the intent of the query"""
|
| 324 |
+
intent_scores = {}
|
| 325 |
+
|
| 326 |
+
for intent, keywords in self.intent_keywords.items():
|
| 327 |
+
score = 0
|
| 328 |
+
for keyword in keywords:
|
| 329 |
+
if keyword in query:
|
| 330 |
+
score += 1
|
| 331 |
+
intent_scores[intent] = score
|
| 332 |
+
|
| 333 |
+
# Get primary intent
|
| 334 |
+
primary_intent = max(intent_scores.items(), key=lambda x: x[1])
|
| 335 |
+
|
| 336 |
+
# Check for specific patterns with priority
|
| 337 |
+
if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
|
| 338 |
+
primary_intent = ('comparison', 10)
|
| 339 |
+
elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
|
| 340 |
+
primary_intent = ('recommendation', 10)
|
| 341 |
+
elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
|
| 342 |
+
primary_intent = ('range_search', 10)
|
| 343 |
+
elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
|
| 344 |
+
# Check if we have at least 2 locations
|
| 345 |
+
if entities and len(entities.get('locations', [])) >= 2:
|
| 346 |
+
primary_intent = ('fare_inquiry', 10)
|
| 347 |
+
elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
|
| 348 |
+
primary_intent = ('route_inquiry', 10)
|
| 349 |
+
|
| 350 |
+
return {
|
| 351 |
+
'primary': primary_intent[0],
|
| 352 |
+
'confidence': primary_intent[1] / 10,
|
| 353 |
+
'all_scores': intent_scores
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
|
| 357 |
+
"""Generate Cypher query using LLM for better understanding"""
|
| 358 |
+
try:
|
| 359 |
+
# Try LLM-based query generation first
|
| 360 |
+
llm_query = self._generate_cypher_with_llm(query, intent, entities)
|
| 361 |
+
if llm_query:
|
| 362 |
+
return llm_query
|
| 363 |
+
except Exception as e:
|
| 364 |
+
print(f"LLM query generation failed: {e}")
|
| 365 |
+
|
| 366 |
+
# Fallback to rule-based generation
|
| 367 |
+
primary_intent = intent['primary']
|
| 368 |
+
|
| 369 |
+
if primary_intent == 'fare_inquiry':
|
| 370 |
+
return self._generate_fare_query(entities)
|
| 371 |
+
elif primary_intent == 'comparison':
|
| 372 |
+
return self._generate_comparison_query(entities)
|
| 373 |
+
elif primary_intent == 'route_inquiry':
|
| 374 |
+
return self._generate_route_query(entities, query)
|
| 375 |
+
elif primary_intent == 'statistics':
|
| 376 |
+
return self._generate_statistics_query(entities)
|
| 377 |
+
elif primary_intent == 'recommendation':
|
| 378 |
+
return self._generate_recommendation_query(entities, query)
|
| 379 |
+
elif primary_intent == 'range_search':
|
| 380 |
+
return self._generate_range_query(entities)
|
| 381 |
+
else:
|
| 382 |
+
return self._generate_fallback_query(query)
|
| 383 |
+
|
| 384 |
+
def _generate_fare_query(self, entities: Dict) -> Optional[str]:
|
| 385 |
+
"""Generate fare inquiry Cypher query"""
|
| 386 |
+
locations = entities.get('locations', [])
|
| 387 |
+
|
| 388 |
+
if len(locations) >= 2:
|
| 389 |
+
from_loc = locations[0]['corrected']
|
| 390 |
+
to_loc = locations[1]['corrected']
|
| 391 |
+
|
| 392 |
+
return f"""
|
| 393 |
+
MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
|
| 394 |
+
RETURN
|
| 395 |
+
a.name as from_place,
|
| 396 |
+
b.name as to_place,
|
| 397 |
+
r.fare as fare,
|
| 398 |
+
'Direct route' as route_type
|
| 399 |
+
"""
|
| 400 |
+
|
| 401 |
+
return None
|
| 402 |
+
|
| 403 |
+
def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
|
| 404 |
+
"""Generate comparison Cypher query"""
|
| 405 |
+
locations = entities.get('locations', [])
|
| 406 |
+
|
| 407 |
+
if len(locations) >= 3:
|
| 408 |
+
# Handle case where we have same origin, different destinations
|
| 409 |
+
if len(locations) == 3:
|
| 410 |
+
# Pattern: "Colombo to Kandy and Colombo to Anuradapura"
|
| 411 |
+
route1_from = locations[0]['corrected']
|
| 412 |
+
route1_to = locations[1]['corrected']
|
| 413 |
+
route2_from = locations[0]['corrected'] # Same origin
|
| 414 |
+
route2_to = locations[2]['corrected']
|
| 415 |
+
elif len(locations) >= 4:
|
| 416 |
+
# Pattern: "Colombo to Kandy and Anuradapura to Galle"
|
| 417 |
+
route1_from = locations[0]['corrected']
|
| 418 |
+
route1_to = locations[1]['corrected']
|
| 419 |
+
route2_from = locations[2]['corrected']
|
| 420 |
+
route2_to = locations[3]['corrected']
|
| 421 |
+
else:
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
+
return f"""
|
| 425 |
+
MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
|
| 426 |
+
MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
|
| 427 |
+
RETURN
|
| 428 |
+
a1.name + ' to ' + b1.name as route1,
|
| 429 |
+
r1.fare as fare1,
|
| 430 |
+
a2.name + ' to ' + b2.name as route2,
|
| 431 |
+
r2.fare as fare2,
|
| 432 |
+
r1.fare - r2.fare as difference,
|
| 433 |
+
CASE
|
| 434 |
+
WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
|
| 435 |
+
WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
|
| 436 |
+
ELSE 'Both routes have the same fare'
|
| 437 |
+
END as comparison
|
| 438 |
+
"""
|
| 439 |
+
|
| 440 |
+
return None
|
| 441 |
+
|
| 442 |
+
def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
|
| 443 |
+
"""Generate route inquiry Cypher query"""
|
| 444 |
+
locations = entities.get('locations', [])
|
| 445 |
+
|
| 446 |
+
if 'from' in query and locations:
|
| 447 |
+
location = locations[0]['corrected']
|
| 448 |
+
return f"""
|
| 449 |
+
MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
|
| 450 |
+
RETURN
|
| 451 |
+
a.name as from_place,
|
| 452 |
+
b.name as to_place,
|
| 453 |
+
r.fare as fare
|
| 454 |
+
ORDER BY r.fare
|
| 455 |
+
"""
|
| 456 |
+
elif 'to' in query and locations:
|
| 457 |
+
location = locations[0]['corrected']
|
| 458 |
+
return f"""
|
| 459 |
+
MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
|
| 460 |
+
RETURN
|
| 461 |
+
a.name as from_place,
|
| 462 |
+
b.name as to_place,
|
| 463 |
+
r.fare as fare
|
| 464 |
+
ORDER BY r.fare
|
| 465 |
+
"""
|
| 466 |
+
|
| 467 |
+
return None
|
| 468 |
+
|
| 469 |
+
def _generate_statistics_query(self, entities: Dict) -> str:
|
| 470 |
+
"""Generate statistics Cypher query"""
|
| 471 |
+
return """
|
| 472 |
+
MATCH (p:Place)
|
| 473 |
+
MATCH ()-[r:Fare]->()
|
| 474 |
+
RETURN
|
| 475 |
+
count(DISTINCT p) as total_places,
|
| 476 |
+
count(r) as total_routes,
|
| 477 |
+
round(avg(r.fare), 2) as average_fare,
|
| 478 |
+
min(r.fare) as minimum_fare,
|
| 479 |
+
max(r.fare) as maximum_fare,
|
| 480 |
+
round(stdDev(r.fare), 2) as fare_standard_deviation
|
| 481 |
+
"""
|
| 482 |
+
|
| 483 |
+
def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
|
| 484 |
+
"""Generate recommendation Cypher query"""
|
| 485 |
+
if 'cheap' in query or 'budget' in query or 'affordable' in query:
|
| 486 |
+
return """
|
| 487 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 488 |
+
RETURN
|
| 489 |
+
a.name as from_place,
|
| 490 |
+
b.name as to_place,
|
| 491 |
+
r.fare as fare
|
| 492 |
+
ORDER BY r.fare ASC
|
| 493 |
+
LIMIT 10
|
| 494 |
+
"""
|
| 495 |
+
elif 'popular' in query or 'frequent' in query:
|
| 496 |
+
return """
|
| 497 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 498 |
+
RETURN
|
| 499 |
+
a.name as from_place,
|
| 500 |
+
b.name as to_place,
|
| 501 |
+
r.fare as fare
|
| 502 |
+
ORDER BY r.fare DESC
|
| 503 |
+
LIMIT 10
|
| 504 |
+
"""
|
| 505 |
+
else:
|
| 506 |
+
return """
|
| 507 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 508 |
+
RETURN
|
| 509 |
+
a.name as from_place,
|
| 510 |
+
b.name as to_place,
|
| 511 |
+
r.fare as fare
|
| 512 |
+
ORDER BY r.fare ASC
|
| 513 |
+
LIMIT 5
|
| 514 |
+
"""
|
| 515 |
+
|
| 516 |
+
def _generate_range_query(self, entities: Dict) -> Optional[str]:
|
| 517 |
+
"""Generate range search Cypher query"""
|
| 518 |
+
numbers = entities.get('numbers', [])
|
| 519 |
+
|
| 520 |
+
if numbers:
|
| 521 |
+
# Check for between range
|
| 522 |
+
between_min = None
|
| 523 |
+
between_max = None
|
| 524 |
+
single_value = None
|
| 525 |
+
single_comparator = None
|
| 526 |
+
|
| 527 |
+
for number in numbers:
|
| 528 |
+
comparator = number.get('comparator', '')
|
| 529 |
+
value = number['value']
|
| 530 |
+
|
| 531 |
+
if comparator == 'between_min':
|
| 532 |
+
between_min = value
|
| 533 |
+
elif comparator == 'between_max':
|
| 534 |
+
between_max = value
|
| 535 |
+
elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
|
| 536 |
+
single_value = value
|
| 537 |
+
single_comparator = comparator
|
| 538 |
+
|
| 539 |
+
# Generate query based on type
|
| 540 |
+
if between_min is not None and between_max is not None:
|
| 541 |
+
return f"""
|
| 542 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 543 |
+
WHERE r.fare >= {between_min} AND r.fare <= {between_max}
|
| 544 |
+
RETURN
|
| 545 |
+
a.name as from_place,
|
| 546 |
+
b.name as to_place,
|
| 547 |
+
r.fare as fare
|
| 548 |
+
ORDER BY r.fare ASC
|
| 549 |
+
"""
|
| 550 |
+
elif single_value is not None and single_comparator is not None:
|
| 551 |
+
if single_comparator in ['under', 'below', 'less than']:
|
| 552 |
+
return f"""
|
| 553 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 554 |
+
WHERE r.fare < {single_value}
|
| 555 |
+
RETURN
|
| 556 |
+
a.name as from_place,
|
| 557 |
+
b.name as to_place,
|
| 558 |
+
r.fare as fare
|
| 559 |
+
ORDER BY r.fare ASC
|
| 560 |
+
"""
|
| 561 |
+
elif single_comparator in ['over', 'above', 'more than']:
|
| 562 |
+
return f"""
|
| 563 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 564 |
+
WHERE r.fare > {single_value}
|
| 565 |
+
RETURN
|
| 566 |
+
a.name as from_place,
|
| 567 |
+
b.name as to_place,
|
| 568 |
+
r.fare as fare
|
| 569 |
+
ORDER BY r.fare DESC
|
| 570 |
+
"""
|
| 571 |
+
|
| 572 |
+
return None
|
| 573 |
+
|
| 574 |
+
def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
|
| 575 |
+
"""Generate Cypher query using LLM for better understanding"""
|
| 576 |
+
try:
|
| 577 |
+
if not self.config.OPENAI_API_KEY:
|
| 578 |
+
return None
|
| 579 |
+
|
| 580 |
+
# Get available places for context
|
| 581 |
+
available_places = list(self.neo4j_service.get_all_places())
|
| 582 |
+
|
| 583 |
+
# Create comprehensive prompt for Cypher generation
|
| 584 |
+
prompt = f"""
|
| 585 |
+
You are a Neo4j Cypher query generator for a transport database.
|
| 586 |
+
|
| 587 |
+
Database Schema:
|
| 588 |
+
- Nodes: Place (with property 'name')
|
| 589 |
+
- Relationships: Fare (with property 'fare')
|
| 590 |
+
|
| 591 |
+
Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
|
| 592 |
+
|
| 593 |
+
User Query: "{query}"
|
| 594 |
+
Detected Intent: {intent.get('primary', 'unknown')}
|
| 595 |
+
Extracted Entities: {entities}
|
| 596 |
+
|
| 597 |
+
Your task is to generate a valid Cypher query that answers the user's question.
|
| 598 |
+
|
| 599 |
+
Query Types and Examples:
|
| 600 |
+
|
| 601 |
+
1. FARE INQUIRY:
|
| 602 |
+
- "What is the fare from Colombo to Kandy?"
|
| 603 |
+
- Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 604 |
+
|
| 605 |
+
2. COMPARISON:
|
| 606 |
+
- "Compare fares from Colombo to Kandy vs Colombo to Galle"
|
| 607 |
+
- Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
|
| 608 |
+
|
| 609 |
+
3. RANGE SEARCH:
|
| 610 |
+
- "Find routes under 500 rupees"
|
| 611 |
+
- Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
|
| 612 |
+
|
| 613 |
+
4. RECOMMENDATION:
|
| 614 |
+
- "Recommend cheap routes"
|
| 615 |
+
- Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
|
| 616 |
+
|
| 617 |
+
5. STATISTICS:
|
| 618 |
+
- "What is the average fare?"
|
| 619 |
+
- Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 620 |
+
|
| 621 |
+
6. ROUTE INQUIRY:
|
| 622 |
+
- "Routes from Colombo"
|
| 623 |
+
- Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
|
| 624 |
+
|
| 625 |
+
Important Rules:
|
| 626 |
+
1. Always use proper Cypher syntax
|
| 627 |
+
2. Use exact place names from the available places list
|
| 628 |
+
3. For comparisons, use multiple MATCH clauses
|
| 629 |
+
4. For ranges, use WHERE clauses with appropriate operators
|
| 630 |
+
5. For statistics, use aggregation functions
|
| 631 |
+
6. Always include meaningful column aliases
|
| 632 |
+
7. Use ORDER BY for sorted results
|
| 633 |
+
8. Use LIMIT for large result sets
|
| 634 |
+
|
| 635 |
+
Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
|
| 636 |
+
"""
|
| 637 |
+
|
| 638 |
+
cypher_query = None
|
| 639 |
+
# Prefer new SDK
|
| 640 |
+
try:
|
| 641 |
+
from openai import OpenAI
|
| 642 |
+
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 643 |
+
response = client.chat.completions.create(
|
| 644 |
+
model=self.config.OPENAI_MODEL,
|
| 645 |
+
messages=[
|
| 646 |
+
{"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
|
| 647 |
+
{"role": "user", "content": prompt}
|
| 648 |
+
],
|
| 649 |
+
max_tokens=300,
|
| 650 |
+
temperature=0.1
|
| 651 |
+
)
|
| 652 |
+
cypher_query = response.choices[0].message.content.strip()
|
| 653 |
+
except Exception as sdk_err:
|
| 654 |
+
import openai
|
| 655 |
+
try:
|
| 656 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 657 |
+
response = openai.ChatCompletion.create(
|
| 658 |
+
model=self.config.OPENAI_MODEL,
|
| 659 |
+
messages=[
|
| 660 |
+
{"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
|
| 661 |
+
{"role": "user", "content": prompt}
|
| 662 |
+
],
|
| 663 |
+
max_tokens=300,
|
| 664 |
+
temperature=0.1
|
| 665 |
+
)
|
| 666 |
+
cypher_query = response.choices[0].message.content.strip()
|
| 667 |
+
except Exception:
|
| 668 |
+
raise sdk_err
|
| 669 |
+
|
| 670 |
+
# Validate the response
|
| 671 |
+
if cypher_query.upper() == "FALLBACK":
|
| 672 |
+
return None
|
| 673 |
+
|
| 674 |
+
# Basic validation - check if it starts with MATCH
|
| 675 |
+
if cypher_query.upper().startswith('MATCH'):
|
| 676 |
+
return cypher_query
|
| 677 |
+
|
| 678 |
+
return None
|
| 679 |
+
|
| 680 |
+
except Exception as e:
|
| 681 |
+
print(f"LLM Cypher generation error: {e}")
|
| 682 |
+
return None
|
| 683 |
+
|
| 684 |
+
def _generate_fallback_query(self, query: str) -> Optional[str]:
|
| 685 |
+
"""Generate fallback query when intent is unclear"""
|
| 686 |
+
# Try to extract locations using spell corrector
|
| 687 |
+
locations = self.spell_corrector.extract_locations_from_query(query)
|
| 688 |
+
|
| 689 |
+
if len(locations) >= 2:
|
| 690 |
+
from_loc = locations[0][1]
|
| 691 |
+
to_loc = locations[1][1]
|
| 692 |
+
return f"""
|
| 693 |
+
MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
|
| 694 |
+
RETURN
|
| 695 |
+
a.name as from_place,
|
| 696 |
+
b.name as to_place,
|
| 697 |
+
r.fare as fare
|
| 698 |
+
"""
|
| 699 |
+
|
| 700 |
+
# Additional fallback: direct pattern matching for fare queries
|
| 701 |
+
if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
|
| 702 |
+
import re
|
| 703 |
+
fare_patterns = [
|
| 704 |
+
r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 705 |
+
r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 706 |
+
r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 707 |
+
r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
|
| 708 |
+
r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
|
| 709 |
+
]
|
| 710 |
+
|
| 711 |
+
for pattern in fare_patterns:
|
| 712 |
+
match = re.search(pattern, query.lower())
|
| 713 |
+
if match:
|
| 714 |
+
from_loc = match.group(1).strip()
|
| 715 |
+
to_loc = match.group(2).strip()
|
| 716 |
+
|
| 717 |
+
# Correct locations
|
| 718 |
+
from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
|
| 719 |
+
to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
|
| 720 |
+
|
| 721 |
+
if from_conf > 0.5 and to_conf > 0.5:
|
| 722 |
+
return f"""
|
| 723 |
+
MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
|
| 724 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 725 |
+
"""
|
| 726 |
+
|
| 727 |
+
return None
|
| 728 |
+
|
| 729 |
+
def _execute_query(self, cypher_query: str) -> List[Dict]:
|
| 730 |
+
"""Execute Cypher query and return results"""
|
| 731 |
+
try:
|
| 732 |
+
with self.neo4j_service.driver.session() as session:
|
| 733 |
+
result = session.run(cypher_query)
|
| 734 |
+
return [dict(record) for record in result]
|
| 735 |
+
except Exception as e:
|
| 736 |
+
print(f"Query execution error: {e}")
|
| 737 |
+
return []
|
| 738 |
+
|
| 739 |
+
def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
|
| 740 |
+
"""Format the response based on intent and results"""
|
| 741 |
+
primary_intent = intent['primary']
|
| 742 |
+
|
| 743 |
+
if not results:
|
| 744 |
+
return {
|
| 745 |
+
'success': False,
|
| 746 |
+
'message': 'No results found for your query.',
|
| 747 |
+
'suggestions': self._get_suggestions()
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
if primary_intent == 'fare_inquiry':
|
| 751 |
+
return self._format_fare_response(results, entities)
|
| 752 |
+
elif primary_intent == 'comparison':
|
| 753 |
+
return self._format_comparison_response(results, entities)
|
| 754 |
+
elif primary_intent == 'route_inquiry':
|
| 755 |
+
return self._format_route_response(results, entities)
|
| 756 |
+
elif primary_intent == 'statistics':
|
| 757 |
+
return self._format_statistics_response(results)
|
| 758 |
+
elif primary_intent == 'recommendation':
|
| 759 |
+
return self._format_recommendation_response(results, query)
|
| 760 |
+
elif primary_intent == 'range_search':
|
| 761 |
+
return self._format_range_response(results, entities)
|
| 762 |
+
else:
|
| 763 |
+
return self._format_generic_response(results)
|
| 764 |
+
|
| 765 |
+
def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 766 |
+
"""Format fare inquiry response"""
|
| 767 |
+
if results:
|
| 768 |
+
result = results[0]
|
| 769 |
+
return {
|
| 770 |
+
'success': True,
|
| 771 |
+
'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
|
| 772 |
+
'data': results,
|
| 773 |
+
'query_type': 'fare_inquiry',
|
| 774 |
+
'summary': {
|
| 775 |
+
'from_place': result['from_place'],
|
| 776 |
+
'to_place': result['to_place'],
|
| 777 |
+
'fare': result['fare']
|
| 778 |
+
}
|
| 779 |
+
}
|
| 780 |
+
return {'success': False, 'message': 'Fare information not found.'}
|
| 781 |
+
|
| 782 |
+
def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 783 |
+
"""Format comparison response"""
|
| 784 |
+
if results:
|
| 785 |
+
result = results[0]
|
| 786 |
+
return {
|
| 787 |
+
'success': True,
|
| 788 |
+
'message': result.get('comparison', 'Comparison completed'),
|
| 789 |
+
'data': results,
|
| 790 |
+
'query_type': 'comparison',
|
| 791 |
+
'summary': {
|
| 792 |
+
'route1': result.get('route1'),
|
| 793 |
+
'route2': result.get('route2'),
|
| 794 |
+
'difference': result.get('difference')
|
| 795 |
+
}
|
| 796 |
+
}
|
| 797 |
+
return {'success': False, 'message': 'Comparison not possible.'}
|
| 798 |
+
|
| 799 |
+
def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 800 |
+
"""Format route inquiry response"""
|
| 801 |
+
return {
|
| 802 |
+
'success': True,
|
| 803 |
+
'message': f"Found {len(results)} routes",
|
| 804 |
+
'data': results,
|
| 805 |
+
'query_type': 'route_inquiry',
|
| 806 |
+
'summary': {
|
| 807 |
+
'total_routes': len(results),
|
| 808 |
+
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 809 |
+
}
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
|
| 813 |
+
"""Format statistics response"""
|
| 814 |
+
if results:
|
| 815 |
+
stats = results[0]
|
| 816 |
+
return {
|
| 817 |
+
'success': True,
|
| 818 |
+
'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
|
| 819 |
+
'data': results,
|
| 820 |
+
'query_type': 'statistics',
|
| 821 |
+
'summary': {
|
| 822 |
+
'total_places': stats['total_places'],
|
| 823 |
+
'total_routes': stats['total_routes'],
|
| 824 |
+
'average_fare': stats['average_fare'],
|
| 825 |
+
'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
|
| 826 |
+
}
|
| 827 |
+
}
|
| 828 |
+
return {'success': False, 'message': 'Statistics not available.'}
|
| 829 |
+
|
| 830 |
+
def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
|
| 831 |
+
"""Format recommendation response"""
|
| 832 |
+
return {
|
| 833 |
+
'success': True,
|
| 834 |
+
'message': f"Here are {len(results)} recommended routes",
|
| 835 |
+
'data': results,
|
| 836 |
+
'query_type': 'recommendation',
|
| 837 |
+
'summary': {
|
| 838 |
+
'recommendations_count': len(results),
|
| 839 |
+
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 840 |
+
}
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
|
| 844 |
+
"""Format range search response"""
|
| 845 |
+
return {
|
| 846 |
+
'success': True,
|
| 847 |
+
'message': f"Found {len(results)} routes in your specified range",
|
| 848 |
+
'data': results,
|
| 849 |
+
'query_type': 'range_search',
|
| 850 |
+
'summary': {
|
| 851 |
+
'routes_found': len(results),
|
| 852 |
+
'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
|
| 853 |
+
}
|
| 854 |
+
}
|
| 855 |
+
|
| 856 |
+
def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
|
| 857 |
+
"""Format generic response"""
|
| 858 |
+
return {
|
| 859 |
+
'success': True,
|
| 860 |
+
'message': f"Found {len(results)} results",
|
| 861 |
+
'data': results,
|
| 862 |
+
'query_type': 'generic'
|
| 863 |
+
}
|
| 864 |
+
|
| 865 |
+
def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
|
| 866 |
+
"""Handle unclear or ambiguous queries"""
|
| 867 |
+
return {
|
| 868 |
+
'success': False,
|
| 869 |
+
'message': 'I could not understand your query. Please try rephrasing it.',
|
| 870 |
+
'suggestions': self._get_suggestions(),
|
| 871 |
+
'query_type': 'unclear'
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
|
| 875 |
+
"""Calculate confidence score for the query interpretation"""
|
| 876 |
+
confidence = 0.0
|
| 877 |
+
|
| 878 |
+
# Intent confidence
|
| 879 |
+
confidence += intent.get('confidence', 0) * 0.4
|
| 880 |
+
|
| 881 |
+
# Entity confidence
|
| 882 |
+
locations = entities.get('locations', [])
|
| 883 |
+
if locations:
|
| 884 |
+
avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
|
| 885 |
+
confidence += avg_location_confidence * 0.4
|
| 886 |
+
|
| 887 |
+
# Query complexity bonus
|
| 888 |
+
if len(locations) >= 2:
|
| 889 |
+
confidence += 0.2
|
| 890 |
+
|
| 891 |
+
return min(confidence, 1.0)
|
| 892 |
+
|
| 893 |
+
def _get_suggestions(self) -> List[str]:
|
| 894 |
+
"""Get query suggestions"""
|
| 895 |
+
return [
|
| 896 |
+
"What is the fare from Colombo to Kandy?",
|
| 897 |
+
"Compare fares from Colombo to Kandy vs Colombo to Galle",
|
| 898 |
+
"Show me routes from Panadura",
|
| 899 |
+
"Find routes under 500 rupees",
|
| 900 |
+
"What are the cheapest routes?",
|
| 901 |
+
"Show me popular destinations",
|
| 902 |
+
"Give me database statistics",
|
| 903 |
+
"Recommend affordable routes"
|
| 904 |
+
]
|
llm_query_processor.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
LLM-Based Query Processor for Transport Query Application
|
| 4 |
+
Uses AI to interpret queries and generate Cypher queries
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from typing import Dict, List, Tuple, Optional
|
| 9 |
+
from spell_corrector import SpellCorrector
|
| 10 |
+
from neo4j_service import Neo4jService
|
| 11 |
+
from config import Config
|
| 12 |
+
|
| 13 |
+
class LLMQueryProcessor:
|
| 14 |
+
"""Process natural language queries using LLM for interpretation and Cypher generation"""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.config = Config()
|
| 18 |
+
self.spell_corrector = SpellCorrector()
|
| 19 |
+
self.neo4j_service = Neo4jService()
|
| 20 |
+
|
| 21 |
+
def process_query(self, user_query: str) -> Dict:
|
| 22 |
+
"""
|
| 23 |
+
Process a natural language query using LLM for interpretation
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
Dictionary with query results and metadata
|
| 27 |
+
"""
|
| 28 |
+
try:
|
| 29 |
+
# First, extract and correct locations from the query
|
| 30 |
+
locations = self.spell_corrector.extract_locations_from_query(user_query)
|
| 31 |
+
|
| 32 |
+
# Use LLM to interpret the query and generate Cypher
|
| 33 |
+
interpretation = self._interpret_query_with_llm(user_query, locations)
|
| 34 |
+
|
| 35 |
+
if interpretation['success']:
|
| 36 |
+
# Execute the generated Cypher query
|
| 37 |
+
result = self._execute_cypher_query(interpretation['cypher_query'])
|
| 38 |
+
|
| 39 |
+
return {
|
| 40 |
+
'success': True,
|
| 41 |
+
'message': interpretation['message'],
|
| 42 |
+
'cypher_query': interpretation['cypher_query'],
|
| 43 |
+
'data': result,
|
| 44 |
+
'corrections': self._format_corrections(locations),
|
| 45 |
+
'query_type': interpretation['query_type']
|
| 46 |
+
}
|
| 47 |
+
else:
|
| 48 |
+
return {
|
| 49 |
+
'success': False,
|
| 50 |
+
'message': interpretation['message'],
|
| 51 |
+
'suggestions': self._get_query_suggestions()
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Query processing error: {e}")
|
| 56 |
+
return {
|
| 57 |
+
'success': False,
|
| 58 |
+
'message': 'An error occurred while processing your query.',
|
| 59 |
+
'suggestions': self._get_query_suggestions()
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
def _interpret_query_with_llm(self, query: str, locations: List[Tuple]) -> Dict:
|
| 63 |
+
"""Use LLM to interpret the query and generate appropriate Cypher"""
|
| 64 |
+
try:
|
| 65 |
+
if not self.config.OPENAI_API_KEY:
|
| 66 |
+
return self._fallback_interpretation(query, locations)
|
| 67 |
+
|
| 68 |
+
# Get available places for context
|
| 69 |
+
available_places = list(self.neo4j_service.get_all_places())
|
| 70 |
+
|
| 71 |
+
# Create comprehensive prompt for query interpretation
|
| 72 |
+
prompt = f"""
|
| 73 |
+
You are an intelligent transport query interpreter for a Neo4j database containing Sri Lankan transport data.
|
| 74 |
+
|
| 75 |
+
Database Schema:
|
| 76 |
+
- Nodes: Place (with property 'name')
|
| 77 |
+
- Relationships: Fare (with property 'fare')
|
| 78 |
+
|
| 79 |
+
Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
|
| 80 |
+
|
| 81 |
+
User Query: "{query}"
|
| 82 |
+
|
| 83 |
+
Extracted Locations: {[f"{orig}->{corr}" for orig, corr, conf, method in locations]}
|
| 84 |
+
|
| 85 |
+
Your task is to:
|
| 86 |
+
1. Determine the query type (fare, cheapest, expensive, places, routes_from, routes_to, statistics, lowest_fare)
|
| 87 |
+
2. Generate the appropriate Cypher query
|
| 88 |
+
3. Provide a clear response message
|
| 89 |
+
|
| 90 |
+
Query Types:
|
| 91 |
+
- fare: Find fare between two specific locations
|
| 92 |
+
- cheapest: Find cheapest routes (top 10)
|
| 93 |
+
- expensive: Find most expensive routes (top 10)
|
| 94 |
+
- places: List all places
|
| 95 |
+
- routes_from: Find routes departing from a location
|
| 96 |
+
- routes_to: Find routes arriving at a location
|
| 97 |
+
- statistics: Get database statistics
|
| 98 |
+
- lowest_fare: Find the single lowest fare with route details
|
| 99 |
+
|
| 100 |
+
Return your response in this exact JSON format:
|
| 101 |
+
{{
|
| 102 |
+
"query_type": "fare|cheapest|expensive|places|routes_from|routes_to|statistics|lowest_fare",
|
| 103 |
+
"cypher_query": "MATCH ... RETURN ...",
|
| 104 |
+
"message": "Clear response message for the user"
|
| 105 |
+
}}
|
| 106 |
+
|
| 107 |
+
Examples:
|
| 108 |
+
- "What is the fare from Colombo to Kandy?" → fare query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 109 |
+
- "fare of anuradhapura to kandy?" → fare query: MATCH (a:Place {name: 'Anuradapura'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 110 |
+
- "Show me the cheapest routes" → cheapest query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
|
| 111 |
+
- "What is the lowest fare?" → lowest_fare query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 1
|
| 112 |
+
- "List all places" → places query: MATCH (p:Place) RETURN DISTINCT p.name as place ORDER BY p.name
|
| 113 |
+
- "Routes from Colombo" → routes_from query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
|
| 114 |
+
- "Database statistics" → statistics query: MATCH (p:Place) MATCH ()-[r:Fare]->() RETURN count(DISTINCT p) as total_places, count(r) as total_routes, avg(r.fare) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 115 |
+
|
| 116 |
+
Keep Cypher queries simple and avoid complex functions like shortestPath. Use direct relationships only.
|
| 117 |
+
|
| 118 |
+
For fare queries, recognize various formats like "fare of X to Y", "fare from X to Y", "price from X to Y", etc.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
# Call LLM using new SDK first, legacy as fallback
|
| 122 |
+
import json
|
| 123 |
+
interpretation = None
|
| 124 |
+
try:
|
| 125 |
+
from openai import OpenAI
|
| 126 |
+
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 127 |
+
response = client.chat.completions.create(
|
| 128 |
+
model=self.config.OPENAI_MODEL,
|
| 129 |
+
messages=[
|
| 130 |
+
{"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
|
| 131 |
+
{"role": "user", "content": prompt}
|
| 132 |
+
],
|
| 133 |
+
max_tokens=500,
|
| 134 |
+
temperature=0.1
|
| 135 |
+
)
|
| 136 |
+
interpretation = json.loads(response.choices[0].message.content.strip())
|
| 137 |
+
except Exception as sdk_err:
|
| 138 |
+
try:
|
| 139 |
+
import openai
|
| 140 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 141 |
+
response = openai.ChatCompletion.create(
|
| 142 |
+
model=self.config.OPENAI_MODEL,
|
| 143 |
+
messages=[
|
| 144 |
+
{"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
|
| 145 |
+
{"role": "user", "content": prompt}
|
| 146 |
+
],
|
| 147 |
+
max_tokens=500,
|
| 148 |
+
temperature=0.1
|
| 149 |
+
)
|
| 150 |
+
interpretation = json.loads(response.choices[0].message.content.strip())
|
| 151 |
+
except Exception:
|
| 152 |
+
raise sdk_err
|
| 153 |
+
|
| 154 |
+
# Validate the response
|
| 155 |
+
if interpretation and 'query_type' in interpretation and 'cypher_query' in interpretation and 'message' in interpretation:
|
| 156 |
+
return {
|
| 157 |
+
'success': True,
|
| 158 |
+
'query_type': interpretation['query_type'],
|
| 159 |
+
'cypher_query': interpretation['cypher_query'],
|
| 160 |
+
'message': interpretation['message']
|
| 161 |
+
}
|
| 162 |
+
else:
|
| 163 |
+
return self._fallback_interpretation(query, locations)
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"LLM interpretation error: {e}")
|
| 167 |
+
return self._fallback_interpretation(query, locations)
|
| 168 |
+
|
| 169 |
+
def _fallback_interpretation(self, query: str, locations: List[Tuple]) -> Dict:
|
| 170 |
+
"""Fallback interpretation when LLM is not available"""
|
| 171 |
+
query_lower = query.lower()
|
| 172 |
+
|
| 173 |
+
# Simple keyword-based interpretation
|
| 174 |
+
if 'lowest' in query_lower or 'minimum' in query_lower or 'cheapest' in query_lower:
|
| 175 |
+
if 'lowest fare' in query_lower or 'minimum fare' in query_lower:
|
| 176 |
+
return {
|
| 177 |
+
'success': True,
|
| 178 |
+
'query_type': 'lowest_fare',
|
| 179 |
+
'cypher_query': """
|
| 180 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 181 |
+
WITH a, b, r, r.fare as fare
|
| 182 |
+
ORDER BY r.fare ASC
|
| 183 |
+
LIMIT 1
|
| 184 |
+
RETURN a.name as from_place, b.name as to_place, fare
|
| 185 |
+
""",
|
| 186 |
+
'message': 'Finding the lowest fare in the database...'
|
| 187 |
+
}
|
| 188 |
+
else:
|
| 189 |
+
return {
|
| 190 |
+
'success': True,
|
| 191 |
+
'query_type': 'cheapest',
|
| 192 |
+
'cypher_query': """
|
| 193 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 194 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 195 |
+
ORDER BY r.fare ASC
|
| 196 |
+
LIMIT 10
|
| 197 |
+
""",
|
| 198 |
+
'message': 'Finding the cheapest routes...'
|
| 199 |
+
}
|
| 200 |
+
elif 'expensive' in query_lower or 'highest' in query_lower or 'maximum' in query_lower:
|
| 201 |
+
return {
|
| 202 |
+
'success': True,
|
| 203 |
+
'query_type': 'expensive',
|
| 204 |
+
'cypher_query': """
|
| 205 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 206 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 207 |
+
ORDER BY r.fare DESC
|
| 208 |
+
LIMIT 10
|
| 209 |
+
""",
|
| 210 |
+
'message': 'Finding the most expensive routes...'
|
| 211 |
+
}
|
| 212 |
+
elif 'places' in query_lower or 'locations' in query_lower or 'list all' in query_lower:
|
| 213 |
+
return {
|
| 214 |
+
'success': True,
|
| 215 |
+
'query_type': 'places',
|
| 216 |
+
'cypher_query': """
|
| 217 |
+
MATCH (p:Place)
|
| 218 |
+
RETURN DISTINCT p.name as place
|
| 219 |
+
ORDER BY p.name
|
| 220 |
+
""",
|
| 221 |
+
'message': 'Listing all places...'
|
| 222 |
+
}
|
| 223 |
+
elif 'statistics' in query_lower or 'stats' in query_lower:
|
| 224 |
+
return {
|
| 225 |
+
'success': True,
|
| 226 |
+
'query_type': 'statistics',
|
| 227 |
+
'cypher_query': """
|
| 228 |
+
MATCH (p:Place)
|
| 229 |
+
MATCH ()-[r:Fare]->()
|
| 230 |
+
RETURN
|
| 231 |
+
count(DISTINCT p) as total_places,
|
| 232 |
+
count(r) as total_routes,
|
| 233 |
+
avg(r.fare) as average_fare,
|
| 234 |
+
min(r.fare) as min_fare,
|
| 235 |
+
max(r.fare) as max_fare
|
| 236 |
+
""",
|
| 237 |
+
'message': 'Getting database statistics...'
|
| 238 |
+
}
|
| 239 |
+
elif len(locations) >= 2:
|
| 240 |
+
# Fare query between two locations
|
| 241 |
+
from_location = locations[0][1]
|
| 242 |
+
to_location = locations[1][1]
|
| 243 |
+
return {
|
| 244 |
+
'success': True,
|
| 245 |
+
'query_type': 'fare',
|
| 246 |
+
'cypher_query': f"""
|
| 247 |
+
MATCH (a:Place {{name: '{from_location}'}})-[r:Fare]->(b:Place {{name: '{to_location}'}})
|
| 248 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 249 |
+
""",
|
| 250 |
+
'message': f'Finding fare from {from_location} to {to_location}...'
|
| 251 |
+
}
|
| 252 |
+
elif 'fare' in query_lower and 'to' in query_lower:
|
| 253 |
+
# Handle queries like "fare of X to Y" where locations might not be extracted properly
|
| 254 |
+
# Try to extract locations using a simpler pattern
|
| 255 |
+
import re
|
| 256 |
+
fare_patterns = [
|
| 257 |
+
r'fare\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 258 |
+
r'price\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 259 |
+
r'cost\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 260 |
+
r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 261 |
+
r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
|
| 262 |
+
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)',
|
| 263 |
+
r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)'
|
| 264 |
+
]
|
| 265 |
+
|
| 266 |
+
for pattern in fare_patterns:
|
| 267 |
+
match = re.search(pattern, query_lower)
|
| 268 |
+
if match:
|
| 269 |
+
from_loc = match.group(1).strip()
|
| 270 |
+
to_loc = match.group(2).strip()
|
| 271 |
+
|
| 272 |
+
# Correct the locations
|
| 273 |
+
from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
|
| 274 |
+
to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
|
| 275 |
+
|
| 276 |
+
if from_conf > 0.5 and to_conf > 0.5:
|
| 277 |
+
return {
|
| 278 |
+
'success': True,
|
| 279 |
+
'query_type': 'fare',
|
| 280 |
+
'cypher_query': f"""
|
| 281 |
+
MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
|
| 282 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 283 |
+
""",
|
| 284 |
+
'message': f'Finding fare from {from_corrected} to {to_corrected}...'
|
| 285 |
+
}
|
| 286 |
+
elif len(locations) == 1:
|
| 287 |
+
# Routes from/to a single location
|
| 288 |
+
location = locations[0][1]
|
| 289 |
+
if 'from' in query_lower:
|
| 290 |
+
return {
|
| 291 |
+
'success': True,
|
| 292 |
+
'query_type': 'routes_from',
|
| 293 |
+
'cypher_query': f"""
|
| 294 |
+
MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
|
| 295 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 296 |
+
ORDER BY r.fare
|
| 297 |
+
""",
|
| 298 |
+
'message': f'Finding routes from {location}...'
|
| 299 |
+
}
|
| 300 |
+
else:
|
| 301 |
+
return {
|
| 302 |
+
'success': True,
|
| 303 |
+
'query_type': 'routes_to',
|
| 304 |
+
'cypher_query': f"""
|
| 305 |
+
MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
|
| 306 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 307 |
+
ORDER BY r.fare
|
| 308 |
+
""",
|
| 309 |
+
'message': f'Finding routes to {location}...'
|
| 310 |
+
}
|
| 311 |
+
else:
|
| 312 |
+
return {
|
| 313 |
+
'success': False,
|
| 314 |
+
'message': 'I could not understand your query. Please try rephrasing it.'
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
def _execute_cypher_query(self, cypher_query: str) -> List[Dict]:
|
| 318 |
+
"""Execute the generated Cypher query"""
|
| 319 |
+
try:
|
| 320 |
+
with self.neo4j_service.driver.session() as session:
|
| 321 |
+
result = session.run(cypher_query)
|
| 322 |
+
return [dict(record) for record in result]
|
| 323 |
+
except Exception as e:
|
| 324 |
+
print(f"Cypher execution error: {e}")
|
| 325 |
+
return []
|
| 326 |
+
|
| 327 |
+
def _format_corrections(self, locations: List[Tuple]) -> List[Dict]:
|
| 328 |
+
"""Format location corrections for display"""
|
| 329 |
+
corrections = []
|
| 330 |
+
for original, corrected, confidence, method in locations:
|
| 331 |
+
if original.lower() != corrected.lower():
|
| 332 |
+
corrections.append({
|
| 333 |
+
'original': original,
|
| 334 |
+
'corrected': corrected,
|
| 335 |
+
'confidence': confidence,
|
| 336 |
+
'method': method
|
| 337 |
+
})
|
| 338 |
+
return corrections
|
| 339 |
+
|
| 340 |
+
def _get_query_suggestions(self) -> List[str]:
|
| 341 |
+
"""Get query suggestions"""
|
| 342 |
+
return [
|
| 343 |
+
"What is the fare from Colombo to Kandy?",
|
| 344 |
+
"What is the lowest fare price?",
|
| 345 |
+
"Show me the cheapest routes",
|
| 346 |
+
"Show me the most expensive routes",
|
| 347 |
+
"List all places",
|
| 348 |
+
"Routes from Panadura",
|
| 349 |
+
"Routes to Galle",
|
| 350 |
+
"Database statistics"
|
| 351 |
+
]
|
logger.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Centralized logging setup for the Transport Query Application.
|
| 4 |
+
Provides a rotating file handler and console output.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
from logging.handlers import RotatingFileHandler
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_logger(name: str) -> logging.Logger:
|
| 13 |
+
"""Create or retrieve a configured logger with file and console handlers."""
|
| 14 |
+
logger = logging.getLogger(name)
|
| 15 |
+
|
| 16 |
+
if getattr(logger, "_configured", False):
|
| 17 |
+
return logger
|
| 18 |
+
|
| 19 |
+
log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
|
| 20 |
+
log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
|
| 21 |
+
try:
|
| 22 |
+
log_dir = os.path.abspath(log_dir)
|
| 23 |
+
os.makedirs(log_dir, exist_ok=True)
|
| 24 |
+
except Exception:
|
| 25 |
+
# Fallback to current directory if path invalid
|
| 26 |
+
log_dir = os.getcwd()
|
| 27 |
+
|
| 28 |
+
log_path = os.path.join(log_dir, "app.log")
|
| 29 |
+
|
| 30 |
+
formatter = logging.Formatter(
|
| 31 |
+
fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
|
| 32 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# File handler with rotation (1 MB, keep 5 backups)
|
| 36 |
+
file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
|
| 37 |
+
file_handler.setFormatter(formatter)
|
| 38 |
+
|
| 39 |
+
# Console handler
|
| 40 |
+
console_handler = logging.StreamHandler()
|
| 41 |
+
console_handler.setFormatter(formatter)
|
| 42 |
+
|
| 43 |
+
# Configure logger
|
| 44 |
+
logger.setLevel(getattr(logging, log_level_str, logging.INFO))
|
| 45 |
+
logger.addHandler(file_handler)
|
| 46 |
+
logger.addHandler(console_handler)
|
| 47 |
+
logger.propagate = False
|
| 48 |
+
|
| 49 |
+
logger._configured = True # type: ignore[attr-defined]
|
| 50 |
+
logger.debug(f"Logger initialized. Level={log_level_str}, File={log_path}")
|
| 51 |
+
return logger
|
| 52 |
+
|
| 53 |
+
|
neo4j_service.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Neo4j Service for Transport Query Application
|
| 4 |
+
Handles all database operations
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from neo4j import GraphDatabase
|
| 8 |
+
from typing import List, Dict, Optional, Tuple
|
| 9 |
+
from config import Config
|
| 10 |
+
|
| 11 |
+
class Neo4jService:
|
| 12 |
+
"""Neo4j database service"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.config = Config()
|
| 16 |
+
self.driver = None
|
| 17 |
+
self._connect()
|
| 18 |
+
|
| 19 |
+
def _connect(self):
|
| 20 |
+
"""Connect to Neo4j database"""
|
| 21 |
+
try:
|
| 22 |
+
self.driver = GraphDatabase.driver(
|
| 23 |
+
self.config.NEO4J_URI,
|
| 24 |
+
auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
|
| 25 |
+
)
|
| 26 |
+
# Test connection
|
| 27 |
+
with self.driver.session() as session:
|
| 28 |
+
session.run("RETURN 1")
|
| 29 |
+
print("✅ Connected to Neo4j database")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"❌ Failed to connect to Neo4j: {e}")
|
| 32 |
+
self.driver = None
|
| 33 |
+
|
| 34 |
+
def is_connected(self) -> bool:
|
| 35 |
+
"""Check if connected to Neo4j"""
|
| 36 |
+
return self.driver is not None
|
| 37 |
+
|
| 38 |
+
def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
|
| 39 |
+
"""Get fare between two locations"""
|
| 40 |
+
if not self.is_connected():
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
with self.driver.session() as session:
|
| 45 |
+
result = session.run("""
|
| 46 |
+
MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
|
| 47 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 48 |
+
""", from_location=from_location, to_location=to_location)
|
| 49 |
+
|
| 50 |
+
record = result.single()
|
| 51 |
+
if record:
|
| 52 |
+
return {
|
| 53 |
+
'from_place': record['from_place'],
|
| 54 |
+
'to_place': record['to_place'],
|
| 55 |
+
'fare': record['fare']
|
| 56 |
+
}
|
| 57 |
+
return None
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Error getting fare: {e}")
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
def get_all_places(self) -> List[str]:
|
| 64 |
+
"""Get all available places"""
|
| 65 |
+
if not self.is_connected():
|
| 66 |
+
return []
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
with self.driver.session() as session:
|
| 70 |
+
result = session.run("""
|
| 71 |
+
MATCH (p:Place)
|
| 72 |
+
RETURN DISTINCT p.name as place
|
| 73 |
+
ORDER BY p.name
|
| 74 |
+
""")
|
| 75 |
+
|
| 76 |
+
return [record['place'] for record in result]
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"Error getting places: {e}")
|
| 80 |
+
return []
|
| 81 |
+
|
| 82 |
+
def get_routes_from_location(self, from_location: str) -> List[Dict]:
|
| 83 |
+
"""Get all routes from a specific location"""
|
| 84 |
+
if not self.is_connected():
|
| 85 |
+
return []
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
with self.driver.session() as session:
|
| 89 |
+
result = session.run("""
|
| 90 |
+
MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
|
| 91 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 92 |
+
ORDER BY r.fare
|
| 93 |
+
""", from_location=from_location)
|
| 94 |
+
|
| 95 |
+
return [dict(record) for record in result]
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"Error getting routes from location: {e}")
|
| 99 |
+
return []
|
| 100 |
+
|
| 101 |
+
def get_routes_to_location(self, to_location: str) -> List[Dict]:
|
| 102 |
+
"""Get all routes to a specific location"""
|
| 103 |
+
if not self.is_connected():
|
| 104 |
+
return []
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
with self.driver.session() as session:
|
| 108 |
+
result = session.run("""
|
| 109 |
+
MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
|
| 110 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 111 |
+
ORDER BY r.fare
|
| 112 |
+
""", to_location=to_location)
|
| 113 |
+
|
| 114 |
+
return [dict(record) for record in result]
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f"Error getting routes to location: {e}")
|
| 118 |
+
return []
|
| 119 |
+
|
| 120 |
+
def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
|
| 121 |
+
"""Get cheapest routes"""
|
| 122 |
+
if not self.is_connected():
|
| 123 |
+
return []
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
with self.driver.session() as session:
|
| 127 |
+
result = session.run("""
|
| 128 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 129 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 130 |
+
ORDER BY r.fare ASC
|
| 131 |
+
LIMIT $limit
|
| 132 |
+
""", limit=limit)
|
| 133 |
+
|
| 134 |
+
return [dict(record) for record in result]
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"Error getting cheapest routes: {e}")
|
| 138 |
+
return []
|
| 139 |
+
|
| 140 |
+
def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
|
| 141 |
+
"""Get most expensive routes"""
|
| 142 |
+
if not self.is_connected():
|
| 143 |
+
return []
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
with self.driver.session() as session:
|
| 147 |
+
result = session.run("""
|
| 148 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 149 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 150 |
+
ORDER BY r.fare DESC
|
| 151 |
+
LIMIT $limit
|
| 152 |
+
""", limit=limit)
|
| 153 |
+
|
| 154 |
+
return [dict(record) for record in result]
|
| 155 |
+
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"Error getting most expensive routes: {e}")
|
| 158 |
+
return []
|
| 159 |
+
|
| 160 |
+
def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
|
| 161 |
+
"""Search routes within a fare range"""
|
| 162 |
+
if not self.is_connected():
|
| 163 |
+
return []
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
with self.driver.session() as session:
|
| 167 |
+
result = session.run("""
|
| 168 |
+
MATCH (a:Place)-[r:Fare]->(b:Place)
|
| 169 |
+
WHERE r.fare >= $min_fare AND r.fare <= $max_fare
|
| 170 |
+
RETURN a.name as from_place, b.name as to_place, r.fare as fare
|
| 171 |
+
ORDER BY r.fare
|
| 172 |
+
""", min_fare=min_fare, max_fare=max_fare)
|
| 173 |
+
|
| 174 |
+
return [dict(record) for record in result]
|
| 175 |
+
|
| 176 |
+
except Exception as e:
|
| 177 |
+
print(f"Error searching routes by fare range: {e}")
|
| 178 |
+
return []
|
| 179 |
+
|
| 180 |
+
def get_route_statistics(self) -> Dict:
|
| 181 |
+
"""Get database statistics"""
|
| 182 |
+
if not self.is_connected():
|
| 183 |
+
return {}
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
with self.driver.session() as session:
|
| 187 |
+
# Count places
|
| 188 |
+
places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
|
| 189 |
+
place_count = places_result.single()['place_count']
|
| 190 |
+
|
| 191 |
+
# Count routes
|
| 192 |
+
routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
|
| 193 |
+
route_count = routes_result.single()['route_count']
|
| 194 |
+
|
| 195 |
+
# Average fare
|
| 196 |
+
avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
|
| 197 |
+
avg_fare = avg_result.single()['avg_fare']
|
| 198 |
+
|
| 199 |
+
# Min and max fares
|
| 200 |
+
fare_range_result = session.run("""
|
| 201 |
+
MATCH ()-[r:Fare]->()
|
| 202 |
+
RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
|
| 203 |
+
""")
|
| 204 |
+
fare_range = fare_range_result.single()
|
| 205 |
+
|
| 206 |
+
return {
|
| 207 |
+
'total_places': place_count,
|
| 208 |
+
'total_routes': route_count,
|
| 209 |
+
'average_fare': round(avg_fare, 2) if avg_fare else 0,
|
| 210 |
+
'min_fare': fare_range['min_fare'],
|
| 211 |
+
'max_fare': fare_range['max_fare']
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
print(f"Error getting statistics: {e}")
|
| 216 |
+
return {}
|
| 217 |
+
|
| 218 |
+
def close(self):
|
| 219 |
+
"""Close database connection"""
|
| 220 |
+
if self.driver:
|
| 221 |
+
self.driver.close()
|
| 222 |
+
self.driver = None
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==2.3.3
|
| 2 |
+
neo4j==5.14.1
|
| 3 |
+
requests==2.31.0
|
| 4 |
+
openai==1.3.0
|
| 5 |
+
python-dotenv==1.0.0
|
| 6 |
+
fuzzywuzzy==0.18.0
|
| 7 |
+
python-Levenshtein==0.23.0
|
| 8 |
+
pandas==2.1.3
|
| 9 |
+
numpy==1.24.3
|
| 10 |
+
Werkzeug==2.3.7
|
spell_corrector.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Spell Correction Module for Transport Query Application
|
| 4 |
+
Handles location name corrections using fuzzy matching and LLM
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from fuzzywuzzy import fuzz
|
| 9 |
+
from typing import List, Tuple, Optional
|
| 10 |
+
import openai
|
| 11 |
+
from config import Config
|
| 12 |
+
|
| 13 |
+
class SpellCorrector:
|
| 14 |
+
"""Spell correction for location names"""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.config = Config()
|
| 18 |
+
self.location_mapping = self.config.LOCATION_MAPPING
|
| 19 |
+
self.available_locations = set(self.location_mapping.values())
|
| 20 |
+
|
| 21 |
+
# Initialize OpenAI if API key is available
|
| 22 |
+
if self.config.OPENAI_API_KEY:
|
| 23 |
+
try:
|
| 24 |
+
# Prefer new SDK client if installed; otherwise set legacy api key
|
| 25 |
+
try:
|
| 26 |
+
from openai import OpenAI # noqa: F401
|
| 27 |
+
self.llm_available = True
|
| 28 |
+
except Exception:
|
| 29 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 30 |
+
self.llm_available = True
|
| 31 |
+
except Exception:
|
| 32 |
+
self.llm_available = False
|
| 33 |
+
else:
|
| 34 |
+
self.llm_available = False
|
| 35 |
+
|
| 36 |
+
def correct_location(self, location: str) -> Tuple[str, float, str]:
|
| 37 |
+
"""
|
| 38 |
+
Correct a location name using multiple methods
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Tuple of (corrected_name, confidence_score, correction_method)
|
| 42 |
+
"""
|
| 43 |
+
location = location.strip().lower()
|
| 44 |
+
|
| 45 |
+
# Method 1: Direct mapping
|
| 46 |
+
if location in self.location_mapping:
|
| 47 |
+
corrected = self.location_mapping[location]
|
| 48 |
+
return corrected, 1.0, "direct_mapping"
|
| 49 |
+
|
| 50 |
+
# Method 2: Fuzzy matching
|
| 51 |
+
best_match, confidence = self._fuzzy_match(location)
|
| 52 |
+
if confidence >= self.config.SIMILARITY_THRESHOLD:
|
| 53 |
+
return best_match, confidence, "fuzzy_matching"
|
| 54 |
+
|
| 55 |
+
# Method 3: LLM correction (if available)
|
| 56 |
+
if self.llm_available:
|
| 57 |
+
llm_corrected = self._llm_correct(location)
|
| 58 |
+
if llm_corrected:
|
| 59 |
+
# Verify LLM suggestion with fuzzy matching
|
| 60 |
+
llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
|
| 61 |
+
if llm_confidence >= 0.6: # Lower threshold for LLM suggestions
|
| 62 |
+
return llm_corrected, llm_confidence, "llm_correction"
|
| 63 |
+
|
| 64 |
+
# Method 4: Partial matching
|
| 65 |
+
partial_match = self._partial_match(location)
|
| 66 |
+
if partial_match:
|
| 67 |
+
return partial_match, 0.7, "partial_matching"
|
| 68 |
+
|
| 69 |
+
# No correction found
|
| 70 |
+
return location.title(), 0.0, "no_correction"
|
| 71 |
+
|
| 72 |
+
def _fuzzy_match(self, location: str) -> Tuple[str, float]:
|
| 73 |
+
"""Find best fuzzy match for location"""
|
| 74 |
+
best_match = None
|
| 75 |
+
best_score = 0
|
| 76 |
+
|
| 77 |
+
for available_location in self.available_locations:
|
| 78 |
+
score = fuzz.ratio(location.lower(), available_location.lower()) / 100
|
| 79 |
+
if score > best_score:
|
| 80 |
+
best_score = score
|
| 81 |
+
best_match = available_location
|
| 82 |
+
|
| 83 |
+
return best_match, best_score
|
| 84 |
+
|
| 85 |
+
def _partial_match(self, location: str) -> Optional[str]:
|
| 86 |
+
"""Find partial matches (substring matching)"""
|
| 87 |
+
location_lower = location.lower()
|
| 88 |
+
|
| 89 |
+
for available_location in self.available_locations:
|
| 90 |
+
available_lower = available_location.lower()
|
| 91 |
+
|
| 92 |
+
# Check if location is contained in available location
|
| 93 |
+
if location_lower in available_lower or available_lower in location_lower:
|
| 94 |
+
return available_location
|
| 95 |
+
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
def _llm_correct(self, location: str) -> Optional[str]:
|
| 99 |
+
"""Use LLM to correct location name"""
|
| 100 |
+
try:
|
| 101 |
+
prompt = f"""
|
| 102 |
+
You are a location name correction system for Sri Lankan cities and towns.
|
| 103 |
+
Given a potentially misspelled location name, return the correct spelling.
|
| 104 |
+
|
| 105 |
+
Available locations include: {', '.join(sorted(self.available_locations))}
|
| 106 |
+
|
| 107 |
+
Input location: "{location}"
|
| 108 |
+
|
| 109 |
+
Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
corrected = None
|
| 113 |
+
# Try new SDK first
|
| 114 |
+
try:
|
| 115 |
+
from openai import OpenAI
|
| 116 |
+
client = OpenAI(api_key=self.config.OPENAI_API_KEY)
|
| 117 |
+
response = client.chat.completions.create(
|
| 118 |
+
model=self.config.OPENAI_MODEL,
|
| 119 |
+
messages=[
|
| 120 |
+
{"role": "system", "content": "You are a helpful assistant that corrects location names."},
|
| 121 |
+
{"role": "user", "content": prompt}
|
| 122 |
+
],
|
| 123 |
+
max_tokens=50,
|
| 124 |
+
temperature=0.1
|
| 125 |
+
)
|
| 126 |
+
corrected = response.choices[0].message.content.strip()
|
| 127 |
+
except Exception as sdk_err:
|
| 128 |
+
# Fallback to legacy API if present
|
| 129 |
+
import openai
|
| 130 |
+
try:
|
| 131 |
+
openai.api_key = self.config.OPENAI_API_KEY
|
| 132 |
+
response = openai.ChatCompletion.create(
|
| 133 |
+
model=self.config.OPENAI_MODEL,
|
| 134 |
+
messages=[
|
| 135 |
+
{"role": "system", "content": "You are a helpful assistant that corrects location names."},
|
| 136 |
+
{"role": "user", "content": prompt}
|
| 137 |
+
],
|
| 138 |
+
max_tokens=50,
|
| 139 |
+
temperature=0.1
|
| 140 |
+
)
|
| 141 |
+
corrected = response.choices[0].message.content.strip()
|
| 142 |
+
except Exception:
|
| 143 |
+
raise sdk_err
|
| 144 |
+
|
| 145 |
+
# Validate LLM response
|
| 146 |
+
if corrected.upper() == "UNKNOWN":
|
| 147 |
+
return None
|
| 148 |
+
|
| 149 |
+
# Check if corrected location exists in our database
|
| 150 |
+
if corrected in self.available_locations:
|
| 151 |
+
return corrected
|
| 152 |
+
|
| 153 |
+
# Try fuzzy matching on LLM response
|
| 154 |
+
llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
|
| 155 |
+
if confidence >= 0.8:
|
| 156 |
+
return llm_fuzzy_match
|
| 157 |
+
|
| 158 |
+
return None
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f"LLM correction error: {e}")
|
| 162 |
+
return None
|
| 163 |
+
|
| 164 |
+
def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
|
| 165 |
+
"""
|
| 166 |
+
Extract and correct locations from a natural language query
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
List of tuples: (original, corrected, confidence, method)
|
| 170 |
+
"""
|
| 171 |
+
# Common patterns for location extraction
|
| 172 |
+
patterns = [
|
| 173 |
+
r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 174 |
+
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 175 |
+
r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 176 |
+
r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 177 |
+
r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 178 |
+
r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 179 |
+
r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 180 |
+
r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
|
| 181 |
+
r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
|
| 182 |
+
r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
|
| 183 |
+
]
|
| 184 |
+
|
| 185 |
+
locations = []
|
| 186 |
+
|
| 187 |
+
# Try all patterns to find locations
|
| 188 |
+
for pattern in patterns:
|
| 189 |
+
match = re.search(pattern, query, re.IGNORECASE)
|
| 190 |
+
if match:
|
| 191 |
+
# Extract locations from the match
|
| 192 |
+
groups = match.groups()
|
| 193 |
+
if len(groups) >= 2:
|
| 194 |
+
from_location = groups[0].strip()
|
| 195 |
+
to_location = groups[1].strip()
|
| 196 |
+
|
| 197 |
+
# Skip if locations are too short or common words
|
| 198 |
+
if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
|
| 199 |
+
from_corrected, from_confidence, from_method = self.correct_location(from_location)
|
| 200 |
+
if from_confidence > 0.5:
|
| 201 |
+
locations.append((
|
| 202 |
+
from_location,
|
| 203 |
+
from_corrected,
|
| 204 |
+
from_confidence,
|
| 205 |
+
from_method
|
| 206 |
+
))
|
| 207 |
+
|
| 208 |
+
if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
|
| 209 |
+
to_corrected, to_confidence, to_method = self.correct_location(to_location)
|
| 210 |
+
if to_confidence > 0.5:
|
| 211 |
+
locations.append((
|
| 212 |
+
to_location,
|
| 213 |
+
to_corrected,
|
| 214 |
+
to_confidence,
|
| 215 |
+
to_method
|
| 216 |
+
))
|
| 217 |
+
|
| 218 |
+
# If we found locations, break to avoid duplicates
|
| 219 |
+
if len(locations) >= 2:
|
| 220 |
+
break
|
| 221 |
+
|
| 222 |
+
return locations
|
| 223 |
+
|
| 224 |
+
def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
|
| 225 |
+
"""Get location suggestions for autocomplete"""
|
| 226 |
+
suggestions = []
|
| 227 |
+
partial_lower = partial_location.lower()
|
| 228 |
+
|
| 229 |
+
for location in self.available_locations:
|
| 230 |
+
location_lower = location.lower()
|
| 231 |
+
|
| 232 |
+
# Check if partial location is a prefix
|
| 233 |
+
if location_lower.startswith(partial_lower):
|
| 234 |
+
suggestions.append((location, 1.0))
|
| 235 |
+
# Check fuzzy similarity
|
| 236 |
+
elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
|
| 237 |
+
suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
|
| 238 |
+
|
| 239 |
+
# Sort by confidence and return top suggestions
|
| 240 |
+
suggestions.sort(key=lambda x: x[1], reverse=True)
|
| 241 |
+
return suggestions[:self.config.MAX_SUGGESTIONS]
|
| 242 |
+
|
| 243 |
+
def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
|
| 244 |
+
"""Validate if a route exists in the database"""
|
| 245 |
+
from_corrected, from_confidence, _ = self.correct_location(from_location)
|
| 246 |
+
to_corrected, to_confidence, _ = self.correct_location(to_location)
|
| 247 |
+
|
| 248 |
+
if from_confidence < 0.5:
|
| 249 |
+
return False, f"Could not identify departure location: '{from_location}'"
|
| 250 |
+
|
| 251 |
+
if to_confidence < 0.5:
|
| 252 |
+
return False, f"Could not identify destination location: '{to_location}'"
|
| 253 |
+
|
| 254 |
+
if from_corrected == to_corrected:
|
| 255 |
+
return False, f"Departure and destination cannot be the same: '{from_corrected}'"
|
| 256 |
+
|
| 257 |
+
return True, f"Route: {from_corrected} → {to_corrected}"
|
templates/index.html
ADDED
|
@@ -0,0 +1,977 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>🚌 Transport Query System</title>
|
| 7 |
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
|
| 8 |
+
<style>
|
| 9 |
+
* {
|
| 10 |
+
margin: 0;
|
| 11 |
+
padding: 0;
|
| 12 |
+
box-sizing: border-box;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
body {
|
| 16 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 17 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 18 |
+
min-height: 100vh;
|
| 19 |
+
color: #333;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.container {
|
| 23 |
+
max-width: 1200px;
|
| 24 |
+
margin: 0 auto;
|
| 25 |
+
padding: 20px;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.header {
|
| 29 |
+
text-align: center;
|
| 30 |
+
margin-bottom: 30px;
|
| 31 |
+
color: white;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.header h1 {
|
| 35 |
+
font-size: 2.5rem;
|
| 36 |
+
margin-bottom: 10px;
|
| 37 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.header p {
|
| 41 |
+
font-size: 1.1rem;
|
| 42 |
+
opacity: 0.9;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.main-content {
|
| 46 |
+
display: grid;
|
| 47 |
+
grid-template-columns: 1fr 1fr;
|
| 48 |
+
gap: 30px;
|
| 49 |
+
margin-bottom: 30px;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.query-section {
|
| 53 |
+
background: white;
|
| 54 |
+
border-radius: 15px;
|
| 55 |
+
padding: 30px;
|
| 56 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.query-section h2 {
|
| 60 |
+
color: #667eea;
|
| 61 |
+
margin-bottom: 20px;
|
| 62 |
+
font-size: 1.5rem;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.query-input {
|
| 66 |
+
position: relative;
|
| 67 |
+
margin-bottom: 20px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.query-input input {
|
| 71 |
+
width: 100%;
|
| 72 |
+
padding: 15px 20px;
|
| 73 |
+
border: 2px solid #e1e5e9;
|
| 74 |
+
border-radius: 10px;
|
| 75 |
+
font-size: 1rem;
|
| 76 |
+
transition: all 0.3s ease;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.query-input input:focus {
|
| 80 |
+
outline: none;
|
| 81 |
+
border-color: #667eea;
|
| 82 |
+
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.query-button {
|
| 86 |
+
width: 100%;
|
| 87 |
+
padding: 15px;
|
| 88 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 89 |
+
color: white;
|
| 90 |
+
border: none;
|
| 91 |
+
border-radius: 10px;
|
| 92 |
+
font-size: 1.1rem;
|
| 93 |
+
font-weight: 600;
|
| 94 |
+
cursor: pointer;
|
| 95 |
+
transition: all 0.3s ease;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.query-button:hover {
|
| 99 |
+
transform: translateY(-2px);
|
| 100 |
+
box-shadow: 0 5px 15px rgba(0,0,0,0.2);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
.query-button:disabled {
|
| 104 |
+
opacity: 0.6;
|
| 105 |
+
cursor: not-allowed;
|
| 106 |
+
transform: none;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
.results-section {
|
| 110 |
+
background: white;
|
| 111 |
+
border-radius: 15px;
|
| 112 |
+
padding: 30px;
|
| 113 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
|
| 114 |
+
max-height: 600px;
|
| 115 |
+
overflow-y: auto;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.results-section h2 {
|
| 119 |
+
color: #667eea;
|
| 120 |
+
margin-bottom: 20px;
|
| 121 |
+
font-size: 1.5rem;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.result-item {
|
| 125 |
+
background: #f8f9fa;
|
| 126 |
+
border-radius: 10px;
|
| 127 |
+
padding: 20px;
|
| 128 |
+
margin-bottom: 15px;
|
| 129 |
+
border-left: 4px solid #667eea;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.result-item.success {
|
| 133 |
+
border-left-color: #28a745;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.result-item.error {
|
| 137 |
+
border-left-color: #dc3545;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.result-message {
|
| 141 |
+
font-size: 1.1rem;
|
| 142 |
+
margin-bottom: 10px;
|
| 143 |
+
font-weight: 500;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.result-data {
|
| 147 |
+
background: #e9ecef;
|
| 148 |
+
border-radius: 8px;
|
| 149 |
+
padding: 15px;
|
| 150 |
+
margin: 10px 0;
|
| 151 |
+
font-family: 'Courier New', monospace;
|
| 152 |
+
font-size: 0.9rem;
|
| 153 |
+
overflow-x: auto;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.corrections {
|
| 157 |
+
margin-top: 15px;
|
| 158 |
+
padding: 10px;
|
| 159 |
+
background: #fff3cd;
|
| 160 |
+
border-radius: 8px;
|
| 161 |
+
border: 1px solid #ffeaa7;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.correction-item {
|
| 165 |
+
margin: 5px 0;
|
| 166 |
+
font-size: 0.9rem;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
.suggestions {
|
| 170 |
+
margin-top: 15px;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.suggestion-item {
|
| 174 |
+
background: #e3f2fd;
|
| 175 |
+
border-radius: 5px;
|
| 176 |
+
padding: 8px 12px;
|
| 177 |
+
margin: 5px 0;
|
| 178 |
+
cursor: pointer;
|
| 179 |
+
transition: background 0.3s ease;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.suggestion-item:hover {
|
| 183 |
+
background: #bbdefb;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.chat-container {
|
| 187 |
+
background: white;
|
| 188 |
+
border-radius: 15px;
|
| 189 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
|
| 190 |
+
margin-bottom: 30px;
|
| 191 |
+
overflow: hidden;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.chat-header {
|
| 195 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 196 |
+
color: white;
|
| 197 |
+
padding: 20px 30px;
|
| 198 |
+
display: flex;
|
| 199 |
+
justify-content: space-between;
|
| 200 |
+
align-items: center;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
.chat-header h2 {
|
| 204 |
+
margin: 0;
|
| 205 |
+
font-size: 1.5rem;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.chat-status {
|
| 209 |
+
font-size: 0.9rem;
|
| 210 |
+
opacity: 0.9;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.chat-body {
|
| 214 |
+
display: grid;
|
| 215 |
+
grid-template-columns: 1fr 300px;
|
| 216 |
+
min-height: 500px;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.chat-messages {
|
| 220 |
+
padding: 20px;
|
| 221 |
+
max-height: 400px;
|
| 222 |
+
overflow-y: auto;
|
| 223 |
+
border-right: 1px solid #e9ecef;
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
.message {
|
| 227 |
+
margin-bottom: 20px;
|
| 228 |
+
display: flex;
|
| 229 |
+
align-items: flex-start;
|
| 230 |
+
gap: 10px;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.message.user {
|
| 234 |
+
flex-direction: row-reverse;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.message-avatar {
|
| 238 |
+
width: 40px;
|
| 239 |
+
height: 40px;
|
| 240 |
+
border-radius: 50%;
|
| 241 |
+
display: flex;
|
| 242 |
+
align-items: center;
|
| 243 |
+
justify-content: center;
|
| 244 |
+
font-size: 1.2rem;
|
| 245 |
+
flex-shrink: 0;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
.message.user .message-avatar {
|
| 249 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 250 |
+
color: white;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.message.bot .message-avatar {
|
| 254 |
+
background: #f8f9fa;
|
| 255 |
+
color: #667eea;
|
| 256 |
+
border: 2px solid #667eea;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
.message-content {
|
| 260 |
+
flex: 1;
|
| 261 |
+
max-width: 70%;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
.message.user .message-content {
|
| 265 |
+
text-align: right;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.message-bubble {
|
| 269 |
+
background: #f8f9fa;
|
| 270 |
+
border-radius: 15px;
|
| 271 |
+
padding: 15px;
|
| 272 |
+
display: inline-block;
|
| 273 |
+
max-width: 100%;
|
| 274 |
+
word-wrap: break-word;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
.message.user .message-bubble {
|
| 278 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 279 |
+
color: white;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.message-time {
|
| 283 |
+
font-size: 0.8rem;
|
| 284 |
+
color: #6c757d;
|
| 285 |
+
margin-top: 5px;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
.message.user .message-time {
|
| 289 |
+
text-align: right;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
.chat-input-section {
|
| 293 |
+
padding: 20px;
|
| 294 |
+
border-top: 1px solid #e9ecef;
|
| 295 |
+
background: #f8f9fa;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.chat-input-container {
|
| 299 |
+
display: flex;
|
| 300 |
+
gap: 10px;
|
| 301 |
+
align-items: center;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.chat-input {
|
| 305 |
+
flex: 1;
|
| 306 |
+
padding: 12px 15px;
|
| 307 |
+
border: 2px solid #e1e5e9;
|
| 308 |
+
border-radius: 25px;
|
| 309 |
+
font-size: 1rem;
|
| 310 |
+
transition: all 0.3s ease;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
.chat-input:focus {
|
| 314 |
+
outline: none;
|
| 315 |
+
border-color: #667eea;
|
| 316 |
+
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.chat-send-btn {
|
| 320 |
+
width: 45px;
|
| 321 |
+
height: 45px;
|
| 322 |
+
border-radius: 50%;
|
| 323 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 324 |
+
color: white;
|
| 325 |
+
border: none;
|
| 326 |
+
cursor: pointer;
|
| 327 |
+
transition: all 0.3s ease;
|
| 328 |
+
display: flex;
|
| 329 |
+
align-items: center;
|
| 330 |
+
justify-content: center;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
.chat-send-btn:hover {
|
| 334 |
+
transform: scale(1.1);
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
.chat-send-btn:disabled {
|
| 338 |
+
opacity: 0.6;
|
| 339 |
+
cursor: not-allowed;
|
| 340 |
+
transform: none;
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
.examples-sidebar {
|
| 344 |
+
background: #f8f9fa;
|
| 345 |
+
padding: 20px;
|
| 346 |
+
border-left: 1px solid #e9ecef;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.examples-sidebar h3 {
|
| 350 |
+
color: #667eea;
|
| 351 |
+
margin-bottom: 15px;
|
| 352 |
+
font-size: 1.2rem;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.example-categories {
|
| 356 |
+
display: flex;
|
| 357 |
+
flex-direction: column;
|
| 358 |
+
gap: 10px;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
.example-category {
|
| 362 |
+
background: white;
|
| 363 |
+
border-radius: 10px;
|
| 364 |
+
padding: 15px;
|
| 365 |
+
border: 2px solid transparent;
|
| 366 |
+
transition: all 0.3s ease;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
.example-category:hover {
|
| 370 |
+
border-color: #667eea;
|
| 371 |
+
transform: translateX(5px);
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
.category-title {
|
| 375 |
+
font-weight: 600;
|
| 376 |
+
color: #667eea;
|
| 377 |
+
margin-bottom: 10px;
|
| 378 |
+
font-size: 0.9rem;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
.example-queries {
|
| 382 |
+
display: flex;
|
| 383 |
+
flex-direction: column;
|
| 384 |
+
gap: 8px;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
.example-query-btn {
|
| 388 |
+
background: #e3f2fd;
|
| 389 |
+
border: none;
|
| 390 |
+
border-radius: 8px;
|
| 391 |
+
padding: 8px 12px;
|
| 392 |
+
text-align: left;
|
| 393 |
+
cursor: pointer;
|
| 394 |
+
transition: all 0.3s ease;
|
| 395 |
+
font-size: 0.85rem;
|
| 396 |
+
color: #333;
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
.example-query-btn:hover {
|
| 400 |
+
background: #bbdefb;
|
| 401 |
+
transform: translateX(3px);
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
.example-query-btn i {
|
| 405 |
+
margin-right: 5px;
|
| 406 |
+
color: #667eea;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
.data-display {
|
| 410 |
+
background: #f8f9fa;
|
| 411 |
+
border-radius: 10px;
|
| 412 |
+
padding: 15px;
|
| 413 |
+
margin-top: 10px;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
.data-display h4 {
|
| 417 |
+
color: #667eea;
|
| 418 |
+
margin-bottom: 10px;
|
| 419 |
+
font-size: 1rem;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.data-table {
|
| 423 |
+
width: 100%;
|
| 424 |
+
border-collapse: collapse;
|
| 425 |
+
background: white;
|
| 426 |
+
border-radius: 8px;
|
| 427 |
+
overflow: hidden;
|
| 428 |
+
font-size: 0.85rem;
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
.data-table th,
|
| 432 |
+
.data-table td {
|
| 433 |
+
padding: 8px 12px;
|
| 434 |
+
text-align: left;
|
| 435 |
+
border-bottom: 1px solid #e9ecef;
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
.data-table th {
|
| 439 |
+
background: #667eea;
|
| 440 |
+
color: white;
|
| 441 |
+
font-weight: 600;
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
.data-table tr:hover {
|
| 445 |
+
background: #f8f9fa;
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
.corrections {
|
| 449 |
+
background: #fff3cd;
|
| 450 |
+
border: 1px solid #ffeaa7;
|
| 451 |
+
border-radius: 8px;
|
| 452 |
+
padding: 10px;
|
| 453 |
+
margin-top: 10px;
|
| 454 |
+
font-size: 0.85rem;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
.corrections strong {
|
| 458 |
+
color: #856404;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
.correction-item {
|
| 462 |
+
margin: 3px 0;
|
| 463 |
+
color: #856404;
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
.suggestions {
|
| 467 |
+
margin-top: 10px;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
.suggestion-item {
|
| 471 |
+
background: #e3f2fd;
|
| 472 |
+
border-radius: 5px;
|
| 473 |
+
padding: 6px 10px;
|
| 474 |
+
margin: 3px 0;
|
| 475 |
+
cursor: pointer;
|
| 476 |
+
transition: background 0.3s ease;
|
| 477 |
+
font-size: 0.85rem;
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
.suggestion-item:hover {
|
| 481 |
+
background: #bbdefb;
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
@media (max-width: 768px) {
|
| 485 |
+
.chat-body {
|
| 486 |
+
grid-template-columns: 1fr;
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
.examples-sidebar {
|
| 490 |
+
border-left: none;
|
| 491 |
+
border-top: 1px solid #e9ecef;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
.message-content {
|
| 495 |
+
max-width: 85%;
|
| 496 |
+
}
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
.status-bar {
|
| 500 |
+
background: white;
|
| 501 |
+
border-radius: 15px;
|
| 502 |
+
padding: 20px;
|
| 503 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
|
| 504 |
+
margin-bottom: 20px;
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
.status-grid {
|
| 508 |
+
display: grid;
|
| 509 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 510 |
+
gap: 20px;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
.status-item {
|
| 514 |
+
text-align: center;
|
| 515 |
+
padding: 15px;
|
| 516 |
+
background: #f8f9fa;
|
| 517 |
+
border-radius: 10px;
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
.status-label {
|
| 521 |
+
font-size: 0.9rem;
|
| 522 |
+
color: #6c757d;
|
| 523 |
+
margin-bottom: 5px;
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
.status-value {
|
| 527 |
+
font-size: 1.5rem;
|
| 528 |
+
font-weight: 600;
|
| 529 |
+
color: #667eea;
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
.status-value.success {
|
| 533 |
+
color: #28a745;
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
.status-value.error {
|
| 537 |
+
color: #dc3545;
|
| 538 |
+
}
|
| 539 |
+
|
| 540 |
+
.loading {
|
| 541 |
+
display: none;
|
| 542 |
+
text-align: center;
|
| 543 |
+
padding: 20px;
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
.loading i {
|
| 547 |
+
font-size: 2rem;
|
| 548 |
+
color: #667eea;
|
| 549 |
+
animation: spin 1s linear infinite;
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
@keyframes spin {
|
| 553 |
+
0% { transform: rotate(0deg); }
|
| 554 |
+
100% { transform: rotate(360deg); }
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
.table-container {
|
| 558 |
+
overflow-x: auto;
|
| 559 |
+
margin-top: 15px;
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.data-table {
|
| 563 |
+
width: 100%;
|
| 564 |
+
border-collapse: collapse;
|
| 565 |
+
background: white;
|
| 566 |
+
border-radius: 8px;
|
| 567 |
+
overflow: hidden;
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
.data-table th,
|
| 571 |
+
.data-table td {
|
| 572 |
+
padding: 12px;
|
| 573 |
+
text-align: left;
|
| 574 |
+
border-bottom: 1px solid #e9ecef;
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
.data-table th {
|
| 578 |
+
background: #667eea;
|
| 579 |
+
color: white;
|
| 580 |
+
font-weight: 600;
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
.data-table tr:hover {
|
| 584 |
+
background: #f8f9fa;
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
@media (max-width: 768px) {
|
| 588 |
+
.main-content {
|
| 589 |
+
grid-template-columns: 1fr;
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
.header h1 {
|
| 593 |
+
font-size: 2rem;
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
.container {
|
| 597 |
+
padding: 10px;
|
| 598 |
+
}
|
| 599 |
+
}
|
| 600 |
+
</style>
|
| 601 |
+
</head>
|
| 602 |
+
<body>
|
| 603 |
+
<div class="container">
|
| 604 |
+
<div class="header">
|
| 605 |
+
<h1><i class="fas fa-bus"></i> Transport Query System</h1>
|
| 606 |
+
<p>Ask questions about transport fares and routes with intelligent spell correction</p>
|
| 607 |
+
</div>
|
| 608 |
+
|
| 609 |
+
<div class="status-bar">
|
| 610 |
+
<div class="status-grid">
|
| 611 |
+
<div class="status-item">
|
| 612 |
+
<div class="status-label">Neo4j Status</div>
|
| 613 |
+
<div class="status-value" id="neo4j-status">Checking...</div>
|
| 614 |
+
</div>
|
| 615 |
+
<div class="status-item">
|
| 616 |
+
<div class="status-label">Total Places</div>
|
| 617 |
+
<div class="status-value" id="total-places">-</div>
|
| 618 |
+
</div>
|
| 619 |
+
<div class="status-item">
|
| 620 |
+
<div class="status-label">Total Routes</div>
|
| 621 |
+
<div class="status-value" id="total-routes">-</div>
|
| 622 |
+
</div>
|
| 623 |
+
<div class="status-item">
|
| 624 |
+
<div class="status-label">Average Fare</div>
|
| 625 |
+
<div class="status-value" id="avg-fare">-</div>
|
| 626 |
+
</div>
|
| 627 |
+
</div>
|
| 628 |
+
</div>
|
| 629 |
+
|
| 630 |
+
<div class="chat-container">
|
| 631 |
+
<div class="chat-header">
|
| 632 |
+
<h2><i class="fas fa-comments"></i> Transport Query Chat</h2>
|
| 633 |
+
<div class="chat-status">
|
| 634 |
+
<i class="fas fa-circle" id="status-indicator"></i>
|
| 635 |
+
<span id="status-text">Ready</span>
|
| 636 |
+
</div>
|
| 637 |
+
</div>
|
| 638 |
+
|
| 639 |
+
<div class="chat-body">
|
| 640 |
+
<div class="chat-messages" id="chat-messages">
|
| 641 |
+
<div class="message bot">
|
| 642 |
+
<div class="message-avatar">
|
| 643 |
+
<i class="fas fa-robot"></i>
|
| 644 |
+
</div>
|
| 645 |
+
<div class="message-content">
|
| 646 |
+
<div class="message-bubble">
|
| 647 |
+
Hello! I'm your transport assistant. I can help you find fares, compare routes, and get transport information. Try clicking on an example query or type your own question!
|
| 648 |
+
</div>
|
| 649 |
+
<div class="message-time" id="welcome-time"></div>
|
| 650 |
+
</div>
|
| 651 |
+
</div>
|
| 652 |
+
</div>
|
| 653 |
+
|
| 654 |
+
<div class="examples-sidebar">
|
| 655 |
+
<h3><i class="fas fa-lightbulb"></i> Example Queries</h3>
|
| 656 |
+
<div class="example-categories" id="example-categories">
|
| 657 |
+
<!-- Example categories will be loaded here -->
|
| 658 |
+
</div>
|
| 659 |
+
</div>
|
| 660 |
+
</div>
|
| 661 |
+
|
| 662 |
+
<div class="chat-input-section">
|
| 663 |
+
<div class="chat-input-container">
|
| 664 |
+
<input type="text" class="chat-input" id="chat-input" placeholder="Type your transport question here..." autocomplete="off">
|
| 665 |
+
<button class="chat-send-btn" id="chat-send-btn" onclick="sendChatMessage()">
|
| 666 |
+
<i class="fas fa-paper-plane"></i>
|
| 667 |
+
</button>
|
| 668 |
+
</div>
|
| 669 |
+
</div>
|
| 670 |
+
</div>
|
| 671 |
+
</div>
|
| 672 |
+
|
| 673 |
+
<script>
|
| 674 |
+
// Global variables
|
| 675 |
+
let currentQuery = '';
|
| 676 |
+
|
| 677 |
+
// Initialize the application
|
| 678 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 679 |
+
loadStatus();
|
| 680 |
+
loadExampleCategories();
|
| 681 |
+
setupChatEventListeners();
|
| 682 |
+
setWelcomeTime();
|
| 683 |
+
});
|
| 684 |
+
|
| 685 |
+
function setupChatEventListeners() {
|
| 686 |
+
const chatInput = document.getElementById('chat-input');
|
| 687 |
+
const chatSendBtn = document.getElementById('chat-send-btn');
|
| 688 |
+
|
| 689 |
+
// Enter key to send message
|
| 690 |
+
chatInput.addEventListener('keypress', function(e) {
|
| 691 |
+
if (e.key === 'Enter') {
|
| 692 |
+
sendChatMessage();
|
| 693 |
+
}
|
| 694 |
+
});
|
| 695 |
+
|
| 696 |
+
// Input validation
|
| 697 |
+
chatInput.addEventListener('input', function() {
|
| 698 |
+
chatSendBtn.disabled = !this.value.trim();
|
| 699 |
+
});
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
function setWelcomeTime() {
|
| 703 |
+
const now = new Date();
|
| 704 |
+
const timeString = now.toLocaleTimeString();
|
| 705 |
+
document.getElementById('welcome-time').textContent = timeString;
|
| 706 |
+
}
|
| 707 |
+
|
| 708 |
+
async function loadStatus() {
|
| 709 |
+
try {
|
| 710 |
+
const response = await fetch('/api/status');
|
| 711 |
+
const data = await response.json();
|
| 712 |
+
|
| 713 |
+
document.getElementById('neo4j-status').textContent = data.neo4j_connected ? 'Connected' : 'Disconnected';
|
| 714 |
+
document.getElementById('neo4j-status').className = data.neo4j_connected ? 'status-value success' : 'status-value error';
|
| 715 |
+
|
| 716 |
+
if (data.statistics) {
|
| 717 |
+
document.getElementById('total-places').textContent = data.statistics.total_places || 0;
|
| 718 |
+
document.getElementById('total-routes').textContent = data.statistics.total_routes || 0;
|
| 719 |
+
document.getElementById('avg-fare').textContent = `Rs. ${data.statistics.average_fare || 0}`;
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
// Update chat status
|
| 723 |
+
const statusIndicator = document.getElementById('status-indicator');
|
| 724 |
+
const statusText = document.getElementById('status-text');
|
| 725 |
+
|
| 726 |
+
if (data.neo4j_connected) {
|
| 727 |
+
statusIndicator.style.color = '#28a745';
|
| 728 |
+
statusText.textContent = 'Connected to Database';
|
| 729 |
+
} else {
|
| 730 |
+
statusIndicator.style.color = '#dc3545';
|
| 731 |
+
statusText.textContent = 'Database Disconnected';
|
| 732 |
+
}
|
| 733 |
+
} catch (error) {
|
| 734 |
+
console.error('Error loading status:', error);
|
| 735 |
+
document.getElementById('status-indicator').style.color = '#dc3545';
|
| 736 |
+
document.getElementById('status-text').textContent = 'Connection Error';
|
| 737 |
+
}
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
async function loadExampleCategories() {
|
| 741 |
+
try {
|
| 742 |
+
const response = await fetch('/api/examples');
|
| 743 |
+
const data = await response.json();
|
| 744 |
+
|
| 745 |
+
const categoriesContainer = document.getElementById('example-categories');
|
| 746 |
+
categoriesContainer.innerHTML = '';
|
| 747 |
+
|
| 748 |
+
data.examples.forEach(category => {
|
| 749 |
+
const categoryDiv = document.createElement('div');
|
| 750 |
+
categoryDiv.className = 'example-category';
|
| 751 |
+
|
| 752 |
+
let categoryHTML = `<div class="category-title">${category.category}</div>`;
|
| 753 |
+
categoryHTML += '<div class="example-queries">';
|
| 754 |
+
|
| 755 |
+
category.examples.forEach(example => {
|
| 756 |
+
categoryHTML += `
|
| 757 |
+
<button class="example-query-btn" onclick="useExampleQuery('${example.query.replace(/'/g, "\\'")}')">
|
| 758 |
+
<i class="fas fa-arrow-right"></i>
|
| 759 |
+
${example.query}
|
| 760 |
+
</button>
|
| 761 |
+
`;
|
| 762 |
+
});
|
| 763 |
+
|
| 764 |
+
categoryHTML += '</div>';
|
| 765 |
+
categoryDiv.innerHTML = categoryHTML;
|
| 766 |
+
categoriesContainer.appendChild(categoryDiv);
|
| 767 |
+
});
|
| 768 |
+
} catch (error) {
|
| 769 |
+
console.error('Error loading example categories:', error);
|
| 770 |
+
}
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
function useExampleQuery(query) {
|
| 774 |
+
document.getElementById('chat-input').value = query;
|
| 775 |
+
document.getElementById('chat-send-btn').disabled = false;
|
| 776 |
+
sendChatMessage();
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
async function sendChatMessage() {
|
| 780 |
+
const chatInput = document.getElementById('chat-input');
|
| 781 |
+
const query = chatInput.value.trim();
|
| 782 |
+
|
| 783 |
+
if (!query) return;
|
| 784 |
+
|
| 785 |
+
currentQuery = query;
|
| 786 |
+
|
| 787 |
+
// Add user message to chat
|
| 788 |
+
addChatMessage('user', query);
|
| 789 |
+
|
| 790 |
+
// Clear input
|
| 791 |
+
chatInput.value = '';
|
| 792 |
+
document.getElementById('chat-send-btn').disabled = true;
|
| 793 |
+
|
| 794 |
+
// Show typing indicator
|
| 795 |
+
showTypingIndicator();
|
| 796 |
+
|
| 797 |
+
try {
|
| 798 |
+
const response = await fetch('/api/query', {
|
| 799 |
+
method: 'POST',
|
| 800 |
+
headers: {
|
| 801 |
+
'Content-Type': 'application/json',
|
| 802 |
+
},
|
| 803 |
+
body: JSON.stringify({ query: query })
|
| 804 |
+
});
|
| 805 |
+
|
| 806 |
+
const result = await response.json();
|
| 807 |
+
|
| 808 |
+
// Remove typing indicator
|
| 809 |
+
removeTypingIndicator();
|
| 810 |
+
|
| 811 |
+
// Add bot response
|
| 812 |
+
addBotResponse(result);
|
| 813 |
+
|
| 814 |
+
} catch (error) {
|
| 815 |
+
removeTypingIndicator();
|
| 816 |
+
addChatMessage('bot', 'Sorry, I encountered an error processing your query. Please try again.');
|
| 817 |
+
}
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
function addChatMessage(sender, message) {
|
| 821 |
+
const chatMessages = document.getElementById('chat-messages');
|
| 822 |
+
const now = new Date();
|
| 823 |
+
const timeString = now.toLocaleTimeString();
|
| 824 |
+
|
| 825 |
+
const messageDiv = document.createElement('div');
|
| 826 |
+
messageDiv.className = `message ${sender}`;
|
| 827 |
+
|
| 828 |
+
const avatarIcon = sender === 'user' ? 'fas fa-user' : 'fas fa-robot';
|
| 829 |
+
|
| 830 |
+
messageDiv.innerHTML = `
|
| 831 |
+
<div class="message-avatar">
|
| 832 |
+
<i class="${avatarIcon}"></i>
|
| 833 |
+
</div>
|
| 834 |
+
<div class="message-content">
|
| 835 |
+
<div class="message-bubble">${message}</div>
|
| 836 |
+
<div class="message-time">${timeString}</div>
|
| 837 |
+
</div>
|
| 838 |
+
`;
|
| 839 |
+
|
| 840 |
+
chatMessages.appendChild(messageDiv);
|
| 841 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 842 |
+
}
|
| 843 |
+
|
| 844 |
+
function addBotResponse(result) {
|
| 845 |
+
const chatMessages = document.getElementById('chat-messages');
|
| 846 |
+
const now = new Date();
|
| 847 |
+
const timeString = now.toLocaleTimeString();
|
| 848 |
+
|
| 849 |
+
const messageDiv = document.createElement('div');
|
| 850 |
+
messageDiv.className = 'message bot';
|
| 851 |
+
|
| 852 |
+
let responseContent = '';
|
| 853 |
+
|
| 854 |
+
if (result.success) {
|
| 855 |
+
responseContent = `<div class="message-bubble">${result.message}</div>`;
|
| 856 |
+
|
| 857 |
+
// Add data display if available
|
| 858 |
+
if (result.data && Array.isArray(result.data) && result.data.length > 0) {
|
| 859 |
+
responseContent += createChatDataDisplay(result.data);
|
| 860 |
+
}
|
| 861 |
+
|
| 862 |
+
// Add corrections if any
|
| 863 |
+
if (result.corrections && result.corrections.length > 0) {
|
| 864 |
+
responseContent += createCorrectionsDisplay(result.corrections);
|
| 865 |
+
}
|
| 866 |
+
|
| 867 |
+
// Add suggestions if any
|
| 868 |
+
if (result.suggestions && result.suggestions.length > 0) {
|
| 869 |
+
responseContent += createSuggestionsDisplay(result.suggestions);
|
| 870 |
+
}
|
| 871 |
+
} else {
|
| 872 |
+
responseContent = `<div class="message-bubble">Sorry, I couldn't process your query: ${result.message}</div>`;
|
| 873 |
+
}
|
| 874 |
+
|
| 875 |
+
messageDiv.innerHTML = `
|
| 876 |
+
<div class="message-avatar">
|
| 877 |
+
<i class="fas fa-robot"></i>
|
| 878 |
+
</div>
|
| 879 |
+
<div class="message-content">
|
| 880 |
+
${responseContent}
|
| 881 |
+
<div class="message-time">${timeString}</div>
|
| 882 |
+
</div>
|
| 883 |
+
`;
|
| 884 |
+
|
| 885 |
+
chatMessages.appendChild(messageDiv);
|
| 886 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 887 |
+
}
|
| 888 |
+
|
| 889 |
+
function createChatDataDisplay(data) {
|
| 890 |
+
if (!data || data.length === 0) return '';
|
| 891 |
+
|
| 892 |
+
const keys = Object.keys(data[0]);
|
| 893 |
+
let html = '<div class="data-display"><h4>Results:</h4><table class="data-table"><thead><tr>';
|
| 894 |
+
|
| 895 |
+
// Headers
|
| 896 |
+
keys.forEach(key => {
|
| 897 |
+
html += `<th>${key.replace(/_/g, ' ').toUpperCase()}</th>`;
|
| 898 |
+
});
|
| 899 |
+
html += '</tr></thead><tbody>';
|
| 900 |
+
|
| 901 |
+
// Rows (limit to first 5 for chat)
|
| 902 |
+
data.slice(0, 5).forEach(row => {
|
| 903 |
+
html += '<tr>';
|
| 904 |
+
keys.forEach(key => {
|
| 905 |
+
html += `<td>${row[key]}</td>`;
|
| 906 |
+
});
|
| 907 |
+
html += '</tr>';
|
| 908 |
+
});
|
| 909 |
+
|
| 910 |
+
html += '</tbody></table>';
|
| 911 |
+
|
| 912 |
+
if (data.length > 5) {
|
| 913 |
+
html += `<p style="font-size: 0.8rem; color: #6c757d; margin-top: 5px;">Showing first 5 of ${data.length} results</p>`;
|
| 914 |
+
}
|
| 915 |
+
|
| 916 |
+
html += '</div>';
|
| 917 |
+
return html;
|
| 918 |
+
}
|
| 919 |
+
|
| 920 |
+
function createCorrectionsDisplay(corrections) {
|
| 921 |
+
let html = '<div class="corrections"><strong>Spell Corrections:</strong>';
|
| 922 |
+
corrections.forEach(correction => {
|
| 923 |
+
html += `<div class="correction-item">"${correction.original}" → "${correction.corrected}" (${correction.method})</div>`;
|
| 924 |
+
});
|
| 925 |
+
html += '</div>';
|
| 926 |
+
return html;
|
| 927 |
+
}
|
| 928 |
+
|
| 929 |
+
function createSuggestionsDisplay(suggestions) {
|
| 930 |
+
let html = '<div class="suggestions"><strong>Suggestions:</strong>';
|
| 931 |
+
suggestions.forEach(suggestion => {
|
| 932 |
+
html += `<div class="suggestion-item" onclick="useSuggestion('${suggestion}')">${suggestion}</div>`;
|
| 933 |
+
});
|
| 934 |
+
html += '</div>';
|
| 935 |
+
return html;
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
function showTypingIndicator() {
|
| 939 |
+
const chatMessages = document.getElementById('chat-messages');
|
| 940 |
+
const typingDiv = document.createElement('div');
|
| 941 |
+
typingDiv.className = 'message bot';
|
| 942 |
+
typingDiv.id = 'typing-indicator';
|
| 943 |
+
|
| 944 |
+
typingDiv.innerHTML = `
|
| 945 |
+
<div class="message-avatar">
|
| 946 |
+
<i class="fas fa-robot"></i>
|
| 947 |
+
</div>
|
| 948 |
+
<div class="message-content">
|
| 949 |
+
<div class="message-bubble">
|
| 950 |
+
<i class="fas fa-spinner fa-spin"></i> Processing...
|
| 951 |
+
</div>
|
| 952 |
+
</div>
|
| 953 |
+
`;
|
| 954 |
+
|
| 955 |
+
chatMessages.appendChild(typingDiv);
|
| 956 |
+
chatMessages.scrollTop = chatMessages.scrollHeight;
|
| 957 |
+
}
|
| 958 |
+
|
| 959 |
+
function removeTypingIndicator() {
|
| 960 |
+
const typingIndicator = document.getElementById('typing-indicator');
|
| 961 |
+
if (typingIndicator) {
|
| 962 |
+
typingIndicator.remove();
|
| 963 |
+
}
|
| 964 |
+
}
|
| 965 |
+
|
| 966 |
+
function useSuggestion(suggestion) {
|
| 967 |
+
document.getElementById('chat-input').value = suggestion;
|
| 968 |
+
document.getElementById('chat-send-btn').disabled = false;
|
| 969 |
+
sendChatMessage();
|
| 970 |
+
}
|
| 971 |
+
|
| 972 |
+
// Auto-refresh status every 30 seconds
|
| 973 |
+
setInterval(loadStatus, 30000);
|
| 974 |
+
</script>
|
| 975 |
+
</body>
|
| 976 |
+
</html>
|
| 977 |
+
|
translation_service.py
ADDED
|
@@ -0,0 +1,702 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Translation Service for Sinhala-English Translation
|
| 4 |
+
Handles translation of queries and responses with multiple free alternatives
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import requests
|
| 8 |
+
import json
|
| 9 |
+
import re
|
| 10 |
+
import openai
|
| 11 |
+
from typing import Dict, Any, Optional
|
| 12 |
+
from config import Config
|
| 13 |
+
from logger import get_logger
|
| 14 |
+
|
| 15 |
+
class TranslationService:
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.config = Config()
|
| 18 |
+
self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
|
| 19 |
+
self.logger = get_logger(self.__class__.__name__)
|
| 20 |
+
# Controls
|
| 21 |
+
import os
|
| 22 |
+
self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
|
| 23 |
+
self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
|
| 24 |
+
self.last_translation_method: Optional[str] = None
|
| 25 |
+
|
| 26 |
+
# Free translation APIs
|
| 27 |
+
self.libre_translate_url = "https://libretranslate.de/translate" # Free public instance
|
| 28 |
+
self.mymemory_url = "https://api.mymemory.translated.net/get"
|
| 29 |
+
|
| 30 |
+
# Common transport terms in Sinhala and their English equivalents
|
| 31 |
+
self.transport_terms = {
|
| 32 |
+
# Fare related
|
| 33 |
+
'කීයද': 'how much',
|
| 34 |
+
'මිල': 'price',
|
| 35 |
+
'වාරික': 'fare',
|
| 36 |
+
'වාරිකය': 'fare',
|
| 37 |
+
'වාරිකව': 'fare',
|
| 38 |
+
'ගාස්තු': 'fare',
|
| 39 |
+
'ගාස්තුව': 'fare',
|
| 40 |
+
'ප්රවාහන ගාස්තු': 'transport fare',
|
| 41 |
+
'බස් ගාස්තු': 'bus fare',
|
| 42 |
+
'බස් ගාස්තුව': 'bus fare',
|
| 43 |
+
'රේල් ගාස්තු': 'train fare',
|
| 44 |
+
'රේල් ගාස්තුව': 'train fare',
|
| 45 |
+
|
| 46 |
+
# Locations
|
| 47 |
+
'කොළඹ': 'Colombo',
|
| 48 |
+
'මහනුවර': 'Kandy',
|
| 49 |
+
'මහනුවරට': 'Kandy',
|
| 50 |
+
'ගාල්ල': 'Galle',
|
| 51 |
+
'ගාල්ලට': 'Galle',
|
| 52 |
+
'මාතර': 'Matara',
|
| 53 |
+
'මාතරට': 'Matara',
|
| 54 |
+
'අනුරාධපුර': 'Anuradhapura',
|
| 55 |
+
'අනුරාධපුරට': 'Anuradhapura',
|
| 56 |
+
'පානදුර': 'Panadura',
|
| 57 |
+
'පානදුරට': 'Panadura',
|
| 58 |
+
'අලුත්ගම': 'Aluthgama',
|
| 59 |
+
'අලුත්ගමට': 'Aluthgama',
|
| 60 |
+
'නුගේගොඩ': 'Nugegoda',
|
| 61 |
+
'නුගේගොඩට': 'Nugegoda',
|
| 62 |
+
'දෙහිවල': 'Dehiwala',
|
| 63 |
+
'දෙහිවලට': 'Dehiwala',
|
| 64 |
+
'මොරටුව': 'Moratuwa',
|
| 65 |
+
'මොරටුවට': 'Moratuwa',
|
| 66 |
+
|
| 67 |
+
# Direction words
|
| 68 |
+
'වලින්': 'from',
|
| 69 |
+
'වල': 'from',
|
| 70 |
+
'ට': 'to',
|
| 71 |
+
'වෙත': 'to',
|
| 72 |
+
'සිට': 'from',
|
| 73 |
+
'දක්වා': 'to',
|
| 74 |
+
'සි': 'from',
|
| 75 |
+
|
| 76 |
+
# Question words
|
| 77 |
+
'කොහෙද': 'where',
|
| 78 |
+
'කවදාද': 'when',
|
| 79 |
+
'කොහොමද': 'how',
|
| 80 |
+
'මොනවාද': 'what',
|
| 81 |
+
'කවුද': 'who',
|
| 82 |
+
|
| 83 |
+
# Comparison words
|
| 84 |
+
'සමඟ': 'with',
|
| 85 |
+
'සහ': 'and',
|
| 86 |
+
'හෝ': 'or',
|
| 87 |
+
'වඩා': 'more',
|
| 88 |
+
'අඩු': 'less',
|
| 89 |
+
'සමාන': 'same',
|
| 90 |
+
'වෙනස': 'different',
|
| 91 |
+
'සසඳන්න': 'compare',
|
| 92 |
+
'සසඳන': 'compare',
|
| 93 |
+
|
| 94 |
+
# Time words
|
| 95 |
+
'දැන්': 'now',
|
| 96 |
+
'අද': 'today',
|
| 97 |
+
'හෙට': 'tomorrow',
|
| 98 |
+
'ඊයේ': 'yesterday',
|
| 99 |
+
|
| 100 |
+
# Common verbs
|
| 101 |
+
'යන්න': 'go',
|
| 102 |
+
'යන': 'go',
|
| 103 |
+
'එන්න': 'come',
|
| 104 |
+
'බලන්න': 'see',
|
| 105 |
+
'දැනගන්න': 'know',
|
| 106 |
+
'සොයන්න': 'find',
|
| 107 |
+
'සොයන': 'find',
|
| 108 |
+
'ඉගෙනගන්න': 'learn',
|
| 109 |
+
'නිර්දේශ': 'recommend',
|
| 110 |
+
'නිර්දේශ කරන්න': 'recommend',
|
| 111 |
+
'පෙන්වන්න': 'show',
|
| 112 |
+
'පෙන්වන': 'show',
|
| 113 |
+
|
| 114 |
+
# Numbers and currency
|
| 115 |
+
'රුපියල්': 'rupees',
|
| 116 |
+
'රු': 'rupees',
|
| 117 |
+
'රුපියල': 'rupees',
|
| 118 |
+
|
| 119 |
+
# Common phrases
|
| 120 |
+
'අතර': 'between',
|
| 121 |
+
'සහිත': 'with',
|
| 122 |
+
'මාර්ග': 'routes',
|
| 123 |
+
'මාර්ගවල': 'routes',
|
| 124 |
+
'ගමනාන්ත': 'destinations',
|
| 125 |
+
'ප්රසිද්ධ': 'popular',
|
| 126 |
+
'සාමාන්ය': 'average',
|
| 127 |
+
'සාමාන්යය': 'average',
|
| 128 |
+
'දත්ත': 'data',
|
| 129 |
+
'සංඛ්යාලේ���න': 'statistics'
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
# Sinhala script detection pattern
|
| 133 |
+
self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
|
| 134 |
+
|
| 135 |
+
def is_sinhala_text(self, text: str) -> bool:
|
| 136 |
+
"""Check if text contains Sinhala characters"""
|
| 137 |
+
detected = bool(self.sinhala_pattern.search(text))
|
| 138 |
+
self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
|
| 139 |
+
return detected
|
| 140 |
+
|
| 141 |
+
def _map_sinhala_place(self, text: str) -> str:
|
| 142 |
+
"""Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
|
| 143 |
+
candidate = text.strip()
|
| 144 |
+
# Direct map
|
| 145 |
+
if candidate in self.transport_terms:
|
| 146 |
+
return self.transport_terms[candidate]
|
| 147 |
+
# Strip common Sinhala case particles/suffixes and try again
|
| 148 |
+
base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
|
| 149 |
+
if base in self.transport_terms:
|
| 150 |
+
return self.transport_terms[base]
|
| 151 |
+
return candidate
|
| 152 |
+
|
| 153 |
+
def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
|
| 154 |
+
"""Detect simple Sinhala fare queries and build a clean English query.
|
| 155 |
+
Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
|
| 156 |
+
"""
|
| 157 |
+
try:
|
| 158 |
+
# Quick check for fare-related tokens to avoid false positives
|
| 159 |
+
if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
|
| 160 |
+
return None
|
| 161 |
+
# Extract source and destination around Sinhala "from" and "to" particles
|
| 162 |
+
m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
|
| 163 |
+
if not m:
|
| 164 |
+
return None
|
| 165 |
+
src_si = m.group(1).strip()
|
| 166 |
+
dst_si = m.group(2).strip()
|
| 167 |
+
src_en = self._map_sinhala_place(src_si)
|
| 168 |
+
dst_en = self._map_sinhala_place(dst_si)
|
| 169 |
+
return f"What is the fare from {src_en} to {dst_en}?"
|
| 170 |
+
except Exception:
|
| 171 |
+
return None
|
| 172 |
+
|
| 173 |
+
def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
|
| 174 |
+
"""Translate using OpenAI LLM (new SDK). Preserve original intent (comparison, lists, conjunctions)."""
|
| 175 |
+
if not self.openai_api_key:
|
| 176 |
+
return None
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
# Determine source language
|
| 180 |
+
if source_lang == 'auto':
|
| 181 |
+
source_lang = 'si' if self.is_sinhala_text(text) else 'en'
|
| 182 |
+
|
| 183 |
+
# Create language mapping
|
| 184 |
+
lang_map = {
|
| 185 |
+
('si', 'en'): 'Sinhala to English',
|
| 186 |
+
('en', 'si'): 'English to Sinhala'
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
direction = lang_map.get((source_lang, target_lang))
|
| 190 |
+
if not direction:
|
| 191 |
+
return None
|
| 192 |
+
|
| 193 |
+
prompt = f"""
|
| 194 |
+
Translate the following text from {direction}.
|
| 195 |
+
Output only the translated text without quotes or extra commentary.
|
| 196 |
+
Critically: Preserve the original intent and structure. Do not simplify.
|
| 197 |
+
- If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
|
| 198 |
+
- Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
|
| 199 |
+
- Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
|
| 200 |
+
Use standard English city names:
|
| 201 |
+
- මහනුවර = Kandy (not Mahanuwara)
|
| 202 |
+
- කොළඹ = Colombo
|
| 203 |
+
- ගාල්ල = Galle
|
| 204 |
+
- මාතර = Matara
|
| 205 |
+
- අනුරාධපුර = Anuradhapura
|
| 206 |
+
|
| 207 |
+
Text to translate: {text}
|
| 208 |
+
"""
|
| 209 |
+
|
| 210 |
+
# Build few-shot examples to preserve comparison/imperative structure
|
| 211 |
+
examples = [
|
| 212 |
+
(
|
| 213 |
+
"කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
|
| 214 |
+
"What is the bus fare from Colombo to Kandy?"
|
| 215 |
+
),
|
| 216 |
+
(
|
| 217 |
+
"කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
|
| 218 |
+
"What is the ticket price from Colombo to Galle?"
|
| 219 |
+
),
|
| 220 |
+
(
|
| 221 |
+
"කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.",
|
| 222 |
+
"Compare fares from Colombo to Panadura and from Colombo to Galle."
|
| 223 |
+
),
|
| 224 |
+
(
|
| 225 |
+
"රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
|
| 226 |
+
"Show routes with fares under 500 rupees."
|
| 227 |
+
),
|
| 228 |
+
(
|
| 229 |
+
"අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
|
| 230 |
+
"Recommend cheap routes."
|
| 231 |
+
),
|
| 232 |
+
]
|
| 233 |
+
|
| 234 |
+
# Compose messages with few-shot conditioning
|
| 235 |
+
def build_messages(txt: str):
|
| 236 |
+
msgs = [
|
| 237 |
+
{
|
| 238 |
+
"role": "system",
|
| 239 |
+
"content": (
|
| 240 |
+
"You are a professional translator. Translate accurately and naturally. "
|
| 241 |
+
"Preserve imperative/comparative intent and list structure. Do not paraphrase. "
|
| 242 |
+
"Return only the English translation without quotes. "
|
| 243 |
+
"Canonical phrasing rules (use exactly): \n"
|
| 244 |
+
"- Use 'Compare' for comparison requests.\n"
|
| 245 |
+
"- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
|
| 246 |
+
"- Use 'How much is the' for 'කීයද' fare/price questions.\n"
|
| 247 |
+
"- Use 'cheap' (not 'affordable').\n"
|
| 248 |
+
"- Use 'under' (not 'below') for '< value'.\n"
|
| 249 |
+
),
|
| 250 |
+
},
|
| 251 |
+
{
|
| 252 |
+
"role": "user",
|
| 253 |
+
"content": (
|
| 254 |
+
"Instructions: Preserve structure. Use 'Compare' for 'සසඳ', use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
|
| 255 |
+
"Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura."
|
| 256 |
+
),
|
| 257 |
+
},
|
| 258 |
+
]
|
| 259 |
+
for si, en in examples:
|
| 260 |
+
msgs.append({"role": "user", "content": f"Sinhala: {si}\nEnglish:"})
|
| 261 |
+
msgs.append({"role": "assistant", "content": en})
|
| 262 |
+
msgs.append({"role": "user", "content": f"Sinhala: {txt}\nEnglish:"})
|
| 263 |
+
return msgs
|
| 264 |
+
|
| 265 |
+
# Use new OpenAI SDK
|
| 266 |
+
try:
|
| 267 |
+
from openai import OpenAI
|
| 268 |
+
client = OpenAI(api_key=self.openai_api_key)
|
| 269 |
+
response = client.chat.completions.create(
|
| 270 |
+
model="gpt-3.5-turbo",
|
| 271 |
+
max_tokens=150,
|
| 272 |
+
temperature=0.3,
|
| 273 |
+
messages=build_messages(text)
|
| 274 |
+
)
|
| 275 |
+
translated = response.choices[0].message.content.strip()
|
| 276 |
+
self.last_translation_method = 'llm'
|
| 277 |
+
except Exception as sdk_err:
|
| 278 |
+
# Fallback to legacy API if available
|
| 279 |
+
import openai
|
| 280 |
+
try:
|
| 281 |
+
openai.api_key = self.openai_api_key
|
| 282 |
+
response = openai.ChatCompletion.create(
|
| 283 |
+
model="gpt-3.5-turbo",
|
| 284 |
+
max_tokens=150,
|
| 285 |
+
temperature=0.3,
|
| 286 |
+
messages=build_messages(text)
|
| 287 |
+
)
|
| 288 |
+
translated = response.choices[0].message.content.strip()
|
| 289 |
+
self.last_translation_method = 'llm'
|
| 290 |
+
except Exception:
|
| 291 |
+
raise sdk_err
|
| 292 |
+
|
| 293 |
+
if translated.startswith('"') and translated.endswith('"'):
|
| 294 |
+
translated = translated[1:-1]
|
| 295 |
+
return translated if translated else None
|
| 296 |
+
except Exception as e:
|
| 297 |
+
self.logger.warning(f"LLM translation error: {e}")
|
| 298 |
+
return None
|
| 299 |
+
|
| 300 |
+
def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
|
| 301 |
+
"""Translate using LibreTranslate (free public API)"""
|
| 302 |
+
try:
|
| 303 |
+
# Map language codes
|
| 304 |
+
lang_map = {
|
| 305 |
+
'si': 'si', # Sinhala
|
| 306 |
+
'en': 'en', # English
|
| 307 |
+
'auto': 'auto'
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
source = lang_map.get(source_lang, 'auto')
|
| 311 |
+
target = lang_map.get(target_lang, 'en')
|
| 312 |
+
|
| 313 |
+
payload = {
|
| 314 |
+
'q': text,
|
| 315 |
+
'source': source,
|
| 316 |
+
'target': target,
|
| 317 |
+
'format': 'text'
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
headers = {
|
| 321 |
+
'Content-Type': 'application/json'
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
response = requests.post(
|
| 325 |
+
self.libre_translate_url,
|
| 326 |
+
json=payload,
|
| 327 |
+
headers=headers,
|
| 328 |
+
timeout=10
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
if response.status_code == 200:
|
| 332 |
+
result = response.json()
|
| 333 |
+
translated = result.get('translatedText')
|
| 334 |
+
self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
|
| 335 |
+
self.last_translation_method = 'libretranslate'
|
| 336 |
+
return translated
|
| 337 |
+
|
| 338 |
+
return None
|
| 339 |
+
|
| 340 |
+
except Exception as e:
|
| 341 |
+
self.logger.warning(f"LibreTranslate error: {e}")
|
| 342 |
+
return None
|
| 343 |
+
|
| 344 |
+
def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
|
| 345 |
+
"""Translate using MyMemory (free API)"""
|
| 346 |
+
try:
|
| 347 |
+
# Map language codes
|
| 348 |
+
lang_map = {
|
| 349 |
+
'si': 'si', # Sinhala
|
| 350 |
+
'en': 'en', # English
|
| 351 |
+
'auto': 'auto'
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
source = lang_map.get(source_lang, 'auto')
|
| 355 |
+
langpair = f"{source}|{target_lang}"
|
| 356 |
+
|
| 357 |
+
params = {
|
| 358 |
+
'q': text,
|
| 359 |
+
'langpair': langpair
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
response = requests.get(
|
| 363 |
+
self.mymemory_url,
|
| 364 |
+
params=params,
|
| 365 |
+
timeout=10
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
if response.status_code == 200:
|
| 369 |
+
result = response.json()
|
| 370 |
+
translated = result.get('responseData', {}).get('translatedText')
|
| 371 |
+
self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
|
| 372 |
+
self.last_translation_method = 'mymemory'
|
| 373 |
+
return translated
|
| 374 |
+
|
| 375 |
+
return None
|
| 376 |
+
|
| 377 |
+
except Exception as e:
|
| 378 |
+
self.logger.warning(f"MyMemory translation error: {e}")
|
| 379 |
+
return None
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def translate_with_dictionary(self, text: str, target_lang: str) -> str:
|
| 384 |
+
"""Translate using dictionary-based approach"""
|
| 385 |
+
if target_lang == 'en':
|
| 386 |
+
# Sinhala to English
|
| 387 |
+
translated = text
|
| 388 |
+
for sinhala, english in self.transport_terms.items():
|
| 389 |
+
translated = translated.replace(sinhala, english)
|
| 390 |
+
return translated
|
| 391 |
+
elif target_lang == 'si':
|
| 392 |
+
# English to Sinhala
|
| 393 |
+
translated = text
|
| 394 |
+
for sinhala, english in self.transport_terms.items():
|
| 395 |
+
translated = translated.replace(english, sinhala)
|
| 396 |
+
return translated
|
| 397 |
+
|
| 398 |
+
return text
|
| 399 |
+
|
| 400 |
+
def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
|
| 401 |
+
"""Main translation method with multiple fallbacks"""
|
| 402 |
+
if not text or not text.strip():
|
| 403 |
+
return text
|
| 404 |
+
|
| 405 |
+
# Try translation methods
|
| 406 |
+
if self.force_llm_translation:
|
| 407 |
+
translation_methods = [
|
| 408 |
+
('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
|
| 409 |
+
]
|
| 410 |
+
else:
|
| 411 |
+
translation_methods = [
|
| 412 |
+
('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
|
| 413 |
+
('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
|
| 414 |
+
('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
|
| 415 |
+
('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
|
| 416 |
+
]
|
| 417 |
+
|
| 418 |
+
for method_name, method_func in translation_methods:
|
| 419 |
+
try:
|
| 420 |
+
result = method_func()
|
| 421 |
+
if result and result.strip():
|
| 422 |
+
self.logger.info(f"Translation successful using {method_name}")
|
| 423 |
+
if not self.last_translation_method:
|
| 424 |
+
self.last_translation_method = method_name.lower()
|
| 425 |
+
return result.strip()
|
| 426 |
+
except Exception as e:
|
| 427 |
+
self.logger.warning(f"{method_name} translation failed: {e}")
|
| 428 |
+
continue
|
| 429 |
+
|
| 430 |
+
# Final fallback
|
| 431 |
+
result = self.translate_with_dictionary(text, target_lang)
|
| 432 |
+
self.last_translation_method = 'dictionary'
|
| 433 |
+
return result
|
| 434 |
+
|
| 435 |
+
def translate_query(self, query: str) -> Dict[str, Any]:
|
| 436 |
+
"""Translate a user query from Sinhala to English"""
|
| 437 |
+
if not self.is_sinhala_text(query):
|
| 438 |
+
return {
|
| 439 |
+
'is_sinhala': False,
|
| 440 |
+
'original_query': query,
|
| 441 |
+
'translated_query': query,
|
| 442 |
+
'translation_method': 'none'
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
# Optional: Sinhala-specific fare parsing (disabled by default unless USE_PATTERN_TRANSLATION=true)
|
| 446 |
+
if self.use_pattern_translation:
|
| 447 |
+
parsed = self._parse_sinhala_fare_query(query)
|
| 448 |
+
if parsed:
|
| 449 |
+
self.logger.info(f"Pattern-based Sinhala fare parse: '{query}' -> '{parsed}'")
|
| 450 |
+
return {
|
| 451 |
+
'is_sinhala': True,
|
| 452 |
+
'original_query': query,
|
| 453 |
+
'translated_query': parsed,
|
| 454 |
+
'translation_method': 'pattern'
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
# Fallback: general translation to English
|
| 458 |
+
translated = self.translate_text(query, 'en', 'si')
|
| 459 |
+
# Normalize English synonyms to expected NLP vocabulary
|
| 460 |
+
translated = self._normalize_english_query(translated)
|
| 461 |
+
method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
|
| 462 |
+
self.logger.info(f"Translated Sinhala query ({method}): '{query}' -> '{translated}'")
|
| 463 |
+
|
| 464 |
+
return {
|
| 465 |
+
'is_sinhala': True,
|
| 466 |
+
'original_query': query,
|
| 467 |
+
'translated_query': translated,
|
| 468 |
+
'translation_method': method
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
def _normalize_english_query(self, text: str) -> str:
|
| 472 |
+
"""Normalize English synonyms to match NLP patterns (fare/price/cost)."""
|
| 473 |
+
if not text:
|
| 474 |
+
return text
|
| 475 |
+
normalized = text
|
| 476 |
+
replacements = {
|
| 477 |
+
'fees': 'fare',
|
| 478 |
+
'fee': 'fare',
|
| 479 |
+
'charges': 'cost',
|
| 480 |
+
'charge': 'cost',
|
| 481 |
+
'ticket price': 'fare',
|
| 482 |
+
'ticket fare': 'fare',
|
| 483 |
+
'bus ticket': 'bus fare',
|
| 484 |
+
}
|
| 485 |
+
# Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
|
| 486 |
+
lower = normalized.lower()
|
| 487 |
+
for old, new in replacements.items():
|
| 488 |
+
lower = lower.replace(old, new)
|
| 489 |
+
return lower
|
| 490 |
+
|
| 491 |
+
def translate_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
|
| 492 |
+
"""Translate response back to Sinhala"""
|
| 493 |
+
translated_response = response.copy()
|
| 494 |
+
|
| 495 |
+
# Translate the main message
|
| 496 |
+
if 'message' in response:
|
| 497 |
+
translated_response['message'] = self.translate_text(
|
| 498 |
+
response['message'], 'si', 'en'
|
| 499 |
+
)
|
| 500 |
+
|
| 501 |
+
# Translate suggestions if any
|
| 502 |
+
if 'suggestions' in response and response['suggestions']:
|
| 503 |
+
translated_response['suggestions'] = [
|
| 504 |
+
self.translate_text(suggestion, 'si', 'en')
|
| 505 |
+
for suggestion in response['suggestions']
|
| 506 |
+
]
|
| 507 |
+
|
| 508 |
+
# Translate corrections if any
|
| 509 |
+
if 'corrections' in response and response['corrections']:
|
| 510 |
+
translated_corrections = []
|
| 511 |
+
for correction in response['corrections']:
|
| 512 |
+
translated_correction = correction.copy()
|
| 513 |
+
if 'original' in correction:
|
| 514 |
+
translated_correction['original'] = self.translate_text(
|
| 515 |
+
correction['original'], 'si', 'en'
|
| 516 |
+
)
|
| 517 |
+
if 'corrected' in correction:
|
| 518 |
+
translated_correction['corrected'] = self.translate_text(
|
| 519 |
+
correction['corrected'], 'si', 'en'
|
| 520 |
+
)
|
| 521 |
+
translated_corrections.append(translated_correction)
|
| 522 |
+
translated_response['corrections'] = translated_corrections
|
| 523 |
+
|
| 524 |
+
# Add translation metadata
|
| 525 |
+
translated_response['translation_info'] = {
|
| 526 |
+
'translated': True,
|
| 527 |
+
'translation_method': 'llm' if self.openai_api_key else 'dictionary'
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
return translated_response
|
| 531 |
+
|
| 532 |
+
def get_sinhala_examples(self) -> Dict[str, Any]:
|
| 533 |
+
"""Get example queries in Sinhala"""
|
| 534 |
+
sinhala_examples = {
|
| 535 |
+
'fare_queries': [
|
| 536 |
+
{
|
| 537 |
+
'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
|
| 538 |
+
'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
|
| 542 |
+
'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
|
| 546 |
+
'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
|
| 547 |
+
}
|
| 548 |
+
],
|
| 549 |
+
'comparison_queries': [
|
| 550 |
+
{
|
| 551 |
+
'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
|
| 552 |
+
'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
|
| 553 |
+
},
|
| 554 |
+
{
|
| 555 |
+
'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
|
| 556 |
+
'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
|
| 557 |
+
}
|
| 558 |
+
],
|
| 559 |
+
'range_queries': [
|
| 560 |
+
{
|
| 561 |
+
'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
|
| 562 |
+
'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
|
| 566 |
+
'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
|
| 567 |
+
}
|
| 568 |
+
],
|
| 569 |
+
'recommendation_queries': [
|
| 570 |
+
{
|
| 571 |
+
'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
|
| 572 |
+
'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
|
| 573 |
+
},
|
| 574 |
+
{
|
| 575 |
+
'query': 'ප්රසිද්ධ ගමනාන්ත පෙන්වන්න',
|
| 576 |
+
'description': 'ප්රසිද්ධ ගමනාන්ත සොයන්න'
|
| 577 |
+
}
|
| 578 |
+
],
|
| 579 |
+
'statistical_queries': [
|
| 580 |
+
{
|
| 581 |
+
'query': 'සාමාන්ය ගාස්තුව කීයද?',
|
| 582 |
+
'description': 'සාමාන්ය ගාස්තුව සොයන්න'
|
| 583 |
+
},
|
| 584 |
+
{
|
| 585 |
+
'query': 'දත්ත ගබඩා සංඛ්යාලේඛන',
|
| 586 |
+
'description': 'දත්ත ගබඩා සංඛ්යාලේඛන සොයන්න'
|
| 587 |
+
}
|
| 588 |
+
]
|
| 589 |
+
}
|
| 590 |
+
|
| 591 |
+
return sinhala_examples
|
| 592 |
+
|
| 593 |
+
def test_translation(self) -> Dict[str, Any]:
|
| 594 |
+
"""Test translation functionality on transportation-related Sinhala queries."""
|
| 595 |
+
test_cases = [
|
| 596 |
+
{
|
| 597 |
+
'sinhala': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
|
| 598 |
+
'expected_english': 'What is the bus fare from Colombo to Kandy?'
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
'sinhala': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
|
| 602 |
+
'expected_english': 'How much is the price from Matara to Galle?'
|
| 603 |
+
},
|
| 604 |
+
{
|
| 605 |
+
'sinhala': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
|
| 606 |
+
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
|
| 607 |
+
},
|
| 608 |
+
{
|
| 609 |
+
'sinhala': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
|
| 610 |
+
'expected_english': 'Show routes with fares under 500 rupees.'
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
'sinhala': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
|
| 614 |
+
'expected_english': 'Recommend cheap routes.'
|
| 615 |
+
},
|
| 616 |
+
{
|
| 617 |
+
'sinhala': 'කොළඹ සිට යන මාර්ග මොනවාද?',
|
| 618 |
+
'expected_english': 'What routes depart from Colombo?'
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
'sinhala': 'සාමාන්ය ගාස්තුව කීයද?',
|
| 622 |
+
'expected_english': 'What is the average fare?'
|
| 623 |
+
},
|
| 624 |
+
{
|
| 625 |
+
'sinhala': 'කඩුවෙල සිට මාතර දක්වා සහ ගාල්ල දක්වා බස් ගාස්තු සසඳන්න.',
|
| 626 |
+
'expected_english': 'Compare bus fares from Kaduwela to Matara and to Galle.'
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
'sinhala': 'කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?',
|
| 630 |
+
'expected_english': 'What is the ticket price from Colombo to Galle?'
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
'sinhala': 'රුපියල් 1000 ට වැඩි ගාස්තු සහිත මාර්ග සදහන් කරන්න.',
|
| 634 |
+
'expected_english': 'List routes with fares over 1000 rupees.'
|
| 635 |
+
}
|
| 636 |
+
]
|
| 637 |
+
|
| 638 |
+
results = []
|
| 639 |
+
total_exact = 0
|
| 640 |
+
total_good = 0
|
| 641 |
+
total_tests = len(test_cases)
|
| 642 |
+
|
| 643 |
+
for test_case in test_cases:
|
| 644 |
+
sinhala = test_case['sinhala']
|
| 645 |
+
expected = test_case['expected_english']
|
| 646 |
+
is_sinhala = self.is_sinhala_text(sinhala)
|
| 647 |
+
|
| 648 |
+
# Reset method tracker and translate
|
| 649 |
+
self.last_translation_method = None
|
| 650 |
+
translated = self.translate_text(sinhala, 'en', 'si') or ''
|
| 651 |
+
|
| 652 |
+
tr = translated.strip()
|
| 653 |
+
ex = expected.strip()
|
| 654 |
+
tr_low = tr.lower()
|
| 655 |
+
ex_low = ex.lower()
|
| 656 |
+
|
| 657 |
+
# Accuracy heuristic
|
| 658 |
+
if tr_low == ex_low:
|
| 659 |
+
accuracy = 'exact'
|
| 660 |
+
total_exact += 1
|
| 661 |
+
total_good += 1
|
| 662 |
+
elif tr_low in ex_low or ex_low in tr_low:
|
| 663 |
+
accuracy = 'good'
|
| 664 |
+
total_good += 1
|
| 665 |
+
else:
|
| 666 |
+
accuracy = 'partial'
|
| 667 |
+
|
| 668 |
+
# Intent preservation check for comparisons
|
| 669 |
+
intent_preserved = True
|
| 670 |
+
if 'ස��ඳ' in sinhala or 'සසඳා' in sinhala:
|
| 671 |
+
intent_preserved = ('compare' in tr_low)
|
| 672 |
+
|
| 673 |
+
results.append({
|
| 674 |
+
'sinhala_query': sinhala,
|
| 675 |
+
'is_sinhala_detected': is_sinhala,
|
| 676 |
+
'translated_english': tr,
|
| 677 |
+
'expected_english': ex,
|
| 678 |
+
'translation_accuracy': accuracy,
|
| 679 |
+
'intent_preserved': intent_preserved,
|
| 680 |
+
'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
|
| 681 |
+
})
|
| 682 |
+
|
| 683 |
+
summary = {
|
| 684 |
+
'total_tests': total_tests,
|
| 685 |
+
'exact_matches': total_exact,
|
| 686 |
+
'good_or_better': total_good,
|
| 687 |
+
'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
self.logger.info(f"Translation test summary: {summary}")
|
| 691 |
+
|
| 692 |
+
return {
|
| 693 |
+
'translation_service_status': 'active',
|
| 694 |
+
'available_methods': {
|
| 695 |
+
'llm': self.openai_api_key is not None,
|
| 696 |
+
'libre_translate': True,
|
| 697 |
+
'mymemory': True,
|
| 698 |
+
'dictionary': True
|
| 699 |
+
},
|
| 700 |
+
'summary': summary,
|
| 701 |
+
'test_results': results
|
| 702 |
+
}
|