TuanMinhajSeedin commited on
Commit
d75ff9c
·
verified ·
1 Parent(s): a1c87f9

Upload 15 files

Browse files
Files changed (15) hide show
  1. .env +14 -0
  2. .gitattributes +14 -35
  3. .gitignore +175 -0
  4. Dockerfile +32 -0
  5. README.md +107 -11
  6. app.py +974 -0
  7. config.py +263 -0
  8. enhanced_nlp_processor.py +904 -0
  9. llm_query_processor.py +351 -0
  10. logger.py +53 -0
  11. neo4j_service.py +222 -0
  12. requirements.txt +10 -0
  13. spell_corrector.py +257 -0
  14. templates/index.html +977 -0
  15. translation_service.py +702 -0
.env ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GIT = 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
2
+ NEO4J_URI = bolt://44.201.107.35:7687
3
+ NEO4J_USER = neo4j
4
+ # NEO4J_PASSWORD = "20665130@mM"
5
+ NEO4J_PASSWORD = "securities-arrays-entrapments"
6
+
7
+ # OpenAI Configuration (for LLM)
8
+ OPENAI_API_KEY = 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
9
+ OPENAI_MODEL = gpt-3.5-turbo
10
+
11
+
12
+
13
+ # Flask Configuration
14
+ SECRET_KEY = transport-query-app-secret-key
.gitattributes CHANGED
@@ -1,35 +1,14 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.py linguist-language=Python
2
+ *.html linguist-language=HTML
3
+ *.css linguist-language=CSS
4
+ *.js linguist-language=JavaScript
5
+ *.md linguist-language=Markdown
6
+ *.txt linguist-language=Text
7
+ *.json linguist-language=JSON
8
+ *.csv linguist-language=CSV
9
+ *.pdf linguist-documentation
10
+ *.png linguist-documentation
11
+ *.jpg linguist-documentation
12
+ *.jpeg linguist-documentation
13
+ *.gif linguist-documentation
14
+ *.svg linguist-documentation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ config.py
132
+ .env
133
+ .venv
134
+ env/
135
+ venv/
136
+ ENV/
137
+ env.bak/
138
+ venv.bak/
139
+
140
+ # Spyder project settings
141
+ .spyderproject
142
+ .spyproject
143
+
144
+ # Rope project settings
145
+ .ropeproject
146
+
147
+ # mkdocs documentation
148
+ /site
149
+
150
+ # mypy
151
+ .mypy_cache/
152
+ .dmypy.json
153
+ dmypy.json
154
+
155
+ # Pyre type checker
156
+ .pyre/
157
+
158
+ # pytype static type analyzer
159
+ .pytype/
160
+
161
+ # Cython debug symbols
162
+ cython_debug/
163
+
164
+ # PyCharm
165
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
168
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
169
+ #.idea/
170
+
171
+ # Ruff stuff:
172
+ .ruff_cache/
173
+
174
+ # PyPI configuration file
175
+ .pypirc
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ gcc \
9
+ g++ \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements first for better caching
13
+ COPY requirements.txt .
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application code
19
+ COPY . .
20
+
21
+ # Create logs directory
22
+ RUN mkdir -p logs
23
+
24
+ # Expose port
25
+ EXPOSE 7860
26
+
27
+ # Set environment variables
28
+ ENV FLASK_ENV=production
29
+ ENV PORT=7860
30
+
31
+ # Run the application
32
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,107 @@
1
- ---
2
- title: Transport
3
- emoji: 🏆
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚌 Natural Language Transport Query System
2
+
3
+ A sophisticated Flask application that provides natural language querying capabilities for Sri Lankan transport information, featuring Sinhala-English translation and Neo4j integration.
4
+
5
+ ## 🌟 Features
6
+
7
+ - **Natural Language Processing**: Advanced NLP for understanding transport queries
8
+ - **Multilingual Support**: Sinhala-English translation with LLM integration
9
+ - **Spell Correction**: Fuzzy matching and LLM-based location correction
10
+ - **Neo4j Integration**: Graph database for efficient route and fare queries
11
+ - **Enhanced Query Types**: Support for comparisons, ranges, recommendations
12
+ - **RESTful API**: Comprehensive API endpoints for all functionality
13
+
14
+ ## 🚀 Quick Start
15
+
16
+ ### Local Development
17
+ ```bash
18
+ # Install dependencies
19
+ pip install -r requirements.txt
20
+
21
+ # Set environment variables
22
+ export OPENAI_API_KEY="your_openai_key"
23
+ export NEO4J_URI="your_neo4j_uri"
24
+ export NEO4J_USER="your_neo4j_user"
25
+ export NEO4J_PASSWORD="your_neo4j_password"
26
+
27
+ # Run the application
28
+ python app.py
29
+ ```
30
+
31
+ ### Hugging Face Spaces
32
+ This application is deployed on Hugging Face Spaces and is accessible via the provided URL.
33
+
34
+ ## 📡 API Endpoints
35
+
36
+ ### Core Query Processing
37
+ - `POST /api/query` - Process natural language transport queries
38
+ - `GET /api/status` - System status and statistics
39
+ - `GET /api/places` - Get all available places
40
+
41
+ ### NLP Capabilities
42
+ - `GET /api/nlp/capabilities` - View enhanced NLP capabilities
43
+ - `GET /api/nlp/demo` - Get comprehensive demo queries
44
+ - `POST /api/nlp/test` - Test queries with detailed analysis
45
+ - `GET /api/nlp/test-all-types` - Test all query types
46
+
47
+ ### Translation Services
48
+ - `POST /api/translation/translate` - Translate text between languages
49
+ - `GET /api/translation/test` - Test translation functionality
50
+ - `GET /api/sinhala/examples` - Get Sinhala example queries
51
+
52
+ ### Utilities
53
+ - `POST /api/suggestions` - Get location suggestions for autocomplete
54
+ - `GET /api/examples` - Get categorized example queries
55
+
56
+ ## 🔧 Configuration
57
+
58
+ The application uses environment variables for configuration:
59
+
60
+ ```bash
61
+ # OpenAI Configuration
62
+ OPENAI_API_KEY=your_openai_api_key
63
+
64
+ # Neo4j Configuration
65
+ NEO4J_URI=bolt://localhost:7687
66
+ NEO4J_USER=neo4j
67
+ NEO4J_PASSWORD=password
68
+
69
+ # Translation Configuration
70
+ FORCE_LLM_TRANSLATION=true
71
+ USE_PATTERN_TRANSLATION=false
72
+
73
+ # Logging Configuration
74
+ LOG_LEVEL=INFO
75
+ LOG_DIR=logs
76
+ ```
77
+
78
+ ## 📊 Query Examples
79
+
80
+ ### English Queries
81
+ - "What is the fare from Colombo to Kandy?"
82
+ - "Show me routes from Galle to Matara"
83
+ - "Compare fares from Colombo to Panadura and Colombo to Galle"
84
+ - "Find routes under 500 LKR"
85
+
86
+ ### Sinhala Queries
87
+ - "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?"
88
+ - "ගාල්ල සිට මාතර දක්වා මාර්ග පෙන්වන්න"
89
+ - "කොළඹ සිට පානදුර සහ කොළඹ සිට ගාල්ල ගාස්තු සසඳා බලන්න"
90
+
91
+ ## 🏗️ Architecture
92
+
93
+ - **Flask**: Web framework
94
+ - **OpenAI GPT**: LLM for translation and query interpretation
95
+ - **Neo4j**: Graph database for transport data
96
+ - **FuzzyWuzzy**: Spell correction and fuzzy matching
97
+ - **Pandas**: Data processing and manipulation
98
+
99
+ ## 📝 License
100
+
101
+ This project is licensed under the MIT License.
102
+
103
+ ## 🤝 Contributing
104
+
105
+ Contributions are welcome! Please feel free to submit a Pull Request.
106
+
107
+
app.py ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Main Flask Application for Transport Query System
4
+ """
5
+
6
+ from flask import Flask, render_template, request, jsonify, session
7
+ import os
8
+ from llm_query_processor import LLMQueryProcessor
9
+ from enhanced_nlp_processor import EnhancedNLPProcessor
10
+ from spell_corrector import SpellCorrector
11
+ from neo4j_service import Neo4jService
12
+ from translation_service import TranslationService
13
+ from logger import get_logger
14
+ from config import Config
15
+
16
+ app = Flask(__name__)
17
+ app.config.from_object(Config)
18
+ logger = get_logger("FlaskApp")
19
+
20
+ # Initialize services
21
+ query_processor = LLMQueryProcessor()
22
+ enhanced_nlp_processor = EnhancedNLPProcessor()
23
+ spell_corrector = SpellCorrector()
24
+ neo4j_service = Neo4jService()
25
+ translation_service = TranslationService()
26
+
27
+ @app.route('/')
28
+ def index():
29
+ """Main page"""
30
+ return render_template('index.html')
31
+
32
+ @app.route('/api/query', methods=['POST'])
33
+ def process_query():
34
+ """Process user query with enhanced NLP and translation support"""
35
+ try:
36
+ data = request.get_json()
37
+ user_query = data.get('query', '').strip()
38
+ use_enhanced_nlp = data.get('enhanced_nlp', True) # Default to enhanced NLP
39
+
40
+ if not user_query:
41
+ return jsonify({
42
+ 'success': False,
43
+ 'message': 'Please enter a query.'
44
+ })
45
+
46
+ # Check if query is in Sinhala and translate if needed
47
+ translation_info = translation_service.translate_query(user_query)
48
+
49
+ # Use translated query for processing
50
+ query_to_process = translation_info['translated_query']
51
+
52
+ # Log translation info to console
53
+ if translation_info['is_sinhala']:
54
+ logger.info(f"Translation: si->en method={translation_info['translation_method']} original='{translation_info['original_query']}' translated='{translation_info['translated_query']}'")
55
+ else:
56
+ logger.info(f"Processing English Query: '{user_query}'")
57
+
58
+ # Process the query with enhanced NLP or fallback to basic processor
59
+ if use_enhanced_nlp:
60
+ result = enhanced_nlp_processor.process_query(query_to_process)
61
+ else:
62
+ result = query_processor.process_query(query_to_process)
63
+
64
+ # If original query was in Sinhala, translate the response back
65
+ if translation_info['is_sinhala']:
66
+ print(f" English Response: {result.get('message', 'No message')}")
67
+ result = translation_service.translate_response(result)
68
+ result['translation_info'] = translation_info
69
+ print(f" Sinhala Response: {result.get('message', 'No message')}")
70
+ print(f" Translation Complete ✅")
71
+
72
+ logger.info(f"Response success={result.get('success')} type={result.get('query_type','n/a')} message='{result.get('message','')[:120]}'")
73
+ return jsonify(result)
74
+
75
+ except Exception as e:
76
+ return jsonify({
77
+ 'success': False,
78
+ 'message': f'Error processing query: {str(e)}'
79
+ })
80
+
81
+ @app.route('/api/suggestions', methods=['POST'])
82
+ def get_suggestions():
83
+ """Get location suggestions for autocomplete"""
84
+ try:
85
+ data = request.get_json()
86
+ partial_location = data.get('location', '').strip()
87
+
88
+ if not partial_location:
89
+ return jsonify({'suggestions': []})
90
+
91
+ suggestions = spell_corrector.get_suggestions(partial_location)
92
+
93
+ return jsonify({
94
+ 'suggestions': [{'name': name, 'confidence': conf} for name, conf in suggestions]
95
+ })
96
+
97
+ except Exception as e:
98
+ return jsonify({
99
+ 'success': False,
100
+ 'message': f'Error getting suggestions: {str(e)}'
101
+ })
102
+
103
+ @app.route('/api/status')
104
+ def get_status():
105
+ """Get system status"""
106
+ try:
107
+ neo4j_connected = neo4j_service.is_connected()
108
+ places = neo4j_service.get_all_places() if neo4j_connected else []
109
+ stats = neo4j_service.get_route_statistics() if neo4j_connected else {}
110
+
111
+ return jsonify({
112
+ 'neo4j_connected': neo4j_connected,
113
+ 'total_places': len(places),
114
+ 'statistics': stats
115
+ })
116
+
117
+ except Exception as e:
118
+ return jsonify({
119
+ 'success': False,
120
+ 'message': f'Error getting status: {str(e)}'
121
+ })
122
+
123
+ @app.route('/api/places')
124
+ def get_places():
125
+ """Get all available places"""
126
+ try:
127
+ places = neo4j_service.get_all_places()
128
+ return jsonify({
129
+ 'success': True,
130
+ 'places': places
131
+ })
132
+
133
+ except Exception as e:
134
+ return jsonify({
135
+ 'success': False,
136
+ 'message': f'Error getting places: {str(e)}'
137
+ })
138
+
139
+ @app.route('/api/sinhala/examples')
140
+ def get_sinhala_examples():
141
+ """Get example queries in Sinhala"""
142
+ try:
143
+ sinhala_examples = translation_service.get_sinhala_examples()
144
+ return jsonify({
145
+ 'success': True,
146
+ 'examples': sinhala_examples
147
+ })
148
+
149
+ except Exception as e:
150
+ return jsonify({
151
+ 'success': False,
152
+ 'message': f'Error getting Sinhala examples: {str(e)}'
153
+ })
154
+
155
+ @app.route('/api/translation/test')
156
+ def test_translation():
157
+ """Test translation functionality"""
158
+ try:
159
+ test_results = translation_service.test_translation()
160
+ return jsonify({
161
+ 'success': True,
162
+ 'test_results': test_results
163
+ })
164
+
165
+ except Exception as e:
166
+ return jsonify({
167
+ 'success': False,
168
+ 'message': f'Error testing translation: {str(e)}'
169
+ })
170
+
171
+ @app.route('/api/translation/translate', methods=['POST'])
172
+ def translate_text():
173
+ """Translate text between Sinhala and English"""
174
+ try:
175
+ data = request.get_json()
176
+ text = data.get('text', '').strip()
177
+ target_lang = data.get('target_lang', 'en') # 'en' or 'si'
178
+ source_lang = data.get('source_lang', 'auto')
179
+
180
+ if not text:
181
+ return jsonify({
182
+ 'success': False,
183
+ 'message': 'Please provide text to translate.'
184
+ })
185
+
186
+ translated_text = translation_service.translate_text(text, target_lang, source_lang)
187
+ is_sinhala = translation_service.is_sinhala_text(text)
188
+
189
+ return jsonify({
190
+ 'success': True,
191
+ 'original_text': text,
192
+ 'translated_text': translated_text,
193
+ 'source_language': 'si' if is_sinhala else 'en',
194
+ 'target_language': target_lang,
195
+ 'translation_method': 'google' if translation_service.google_translate_api_key else 'dictionary'
196
+ })
197
+
198
+ except Exception as e:
199
+ return jsonify({
200
+ 'success': False,
201
+ 'message': f'Error translating text: {str(e)}'
202
+ })
203
+
204
+ @app.route('/api/nlp/capabilities')
205
+ def get_nlp_capabilities():
206
+ """Get information about natural language processing capabilities with live examples"""
207
+
208
+ # Test queries for each type to demonstrate actual results
209
+ test_queries = [
210
+ {
211
+ 'type': 'fare_inquiry',
212
+ 'description': 'Find fare between two specific locations',
213
+ 'examples': [
214
+ 'What is the fare from Colombo to Kandy?',
215
+ 'fare of anuradhapura to kandy',
216
+ 'price from panadura to galle',
217
+ 'Colombo to Kandy fare'
218
+ ]
219
+ },
220
+ {
221
+ 'type': 'comparison',
222
+ 'description': 'Compare fares between different routes',
223
+ 'examples': [
224
+ 'Compare fares from Colombo to Kandy vs Colombo to Galle',
225
+ 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
226
+ 'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
227
+ ]
228
+ },
229
+ {
230
+ 'type': 'range_search',
231
+ 'description': 'Find routes within specific price ranges',
232
+ 'examples': [
233
+ 'Find routes under 500 rupees',
234
+ 'Show me routes between 200 and 800 rupees',
235
+ 'Routes over 1000 rupees'
236
+ ]
237
+ },
238
+ {
239
+ 'type': 'recommendation',
240
+ 'description': 'Get route recommendations based on criteria',
241
+ 'examples': [
242
+ 'Recommend cheap routes',
243
+ 'Show me popular destinations',
244
+ 'What are the best routes from Colombo?'
245
+ ]
246
+ },
247
+ {
248
+ 'type': 'route_inquiry',
249
+ 'description': 'Find routes from/to specific locations',
250
+ 'examples': [
251
+ 'Routes from Colombo',
252
+ 'Routes to Galle',
253
+ 'What routes depart from Kandy?'
254
+ ]
255
+ },
256
+ {
257
+ 'type': 'statistics',
258
+ 'description': 'Get database overview and statistics',
259
+ 'examples': [
260
+ 'What is the average fare?',
261
+ 'Database statistics',
262
+ 'How many routes are there?'
263
+ ]
264
+ }
265
+ ]
266
+
267
+ # Process each test query to get actual results
268
+ live_examples = []
269
+ for query_type in test_queries:
270
+ type_examples = []
271
+ for example_query in query_type['examples'][:2]: # Test first 2 examples
272
+ try:
273
+ result = enhanced_nlp_processor.process_query(example_query)
274
+ type_examples.append({
275
+ 'query': example_query,
276
+ 'result': result
277
+ })
278
+ except Exception as e:
279
+ type_examples.append({
280
+ 'query': example_query,
281
+ 'result': {
282
+ 'success': False,
283
+ 'message': f'Error: {str(e)}'
284
+ }
285
+ })
286
+
287
+ live_examples.append({
288
+ 'type': query_type['type'],
289
+ 'description': query_type['description'],
290
+ 'examples': type_examples
291
+ })
292
+
293
+ capabilities = {
294
+ 'natural_language_processing': {
295
+ 'description': 'Advanced NLP for transport queries with enhanced understanding',
296
+ 'features': [
297
+ 'Multiple query formats (fare, price, cost)',
298
+ 'Natural language patterns (from X to Y, X to Y fare, etc.)',
299
+ 'Question formats (What is, How much, Show me, etc.)',
300
+ 'Compact formats (Colombo to Kandy fare)',
301
+ 'Spell correction and fuzzy matching',
302
+ 'Automatic location name correction',
303
+ 'LLM-powered query interpretation',
304
+ 'Fallback keyword-based processing',
305
+ 'Advanced intent classification',
306
+ 'Entity extraction and normalization',
307
+ 'Confidence scoring for query understanding'
308
+ ]
309
+ },
310
+ 'query_types': test_queries,
311
+ 'live_examples': live_examples,
312
+ 'spell_correction': {
313
+ 'description': 'Automatic location name correction',
314
+ 'methods': [
315
+ 'Direct mapping (exact matches)',
316
+ 'Fuzzy matching (similar names)',
317
+ 'LLM correction (AI-powered)',
318
+ 'Partial matching (substring matching)'
319
+ ],
320
+ 'examples': [
321
+ 'panadra → Panadura',
322
+ 'gale → Galle',
323
+ 'colmbo → Colombo',
324
+ 'kandee → Kandy'
325
+ ]
326
+ },
327
+ 'llm_integration': {
328
+ 'description': 'AI-powered query interpretation with LLM Cypher generation',
329
+ 'features': [
330
+ 'Automatic query type detection',
331
+ 'LLM-powered Cypher query generation',
332
+ 'Natural language understanding',
333
+ 'Fallback to keyword-based processing',
334
+ 'Advanced entity extraction',
335
+ 'Intent classification with confidence scoring',
336
+ 'Real-time database querying'
337
+ ]
338
+ },
339
+ 'enhanced_features': {
340
+ 'description': 'Advanced NLP capabilities',
341
+ 'features': [
342
+ 'Multi-intent query understanding',
343
+ 'Context-aware responses',
344
+ 'Query preprocessing and normalization',
345
+ 'Advanced pattern matching',
346
+ 'Confidence-based result ranking',
347
+ 'Comprehensive query analysis',
348
+ 'Live database results for all query types'
349
+ ]
350
+ }
351
+ }
352
+
353
+ return jsonify({
354
+ 'success': True,
355
+ 'capabilities': capabilities
356
+ })
357
+
358
+ @app.route('/api/nlp/test', methods=['POST'])
359
+ def test_nlp_query():
360
+ """Test a natural language query and return detailed analysis"""
361
+ try:
362
+ data = request.get_json()
363
+ user_query = data.get('query', '').strip()
364
+ use_enhanced_nlp = data.get('enhanced_nlp', True)
365
+
366
+ if not user_query:
367
+ return jsonify({
368
+ 'success': False,
369
+ 'message': 'Please provide a query to test.'
370
+ })
371
+
372
+ # Get detailed analysis
373
+ analysis = {
374
+ 'original_query': user_query,
375
+ 'processing_steps': []
376
+ }
377
+
378
+ # Step 1: Extract locations
379
+ locations = spell_corrector.extract_locations_from_query(user_query)
380
+ analysis['processing_steps'].append({
381
+ 'step': 'Location Extraction',
382
+ 'locations_found': len(locations),
383
+ 'details': [
384
+ {
385
+ 'original': loc[0],
386
+ 'corrected': loc[1],
387
+ 'confidence': loc[2],
388
+ 'method': loc[3]
389
+ } for loc in locations
390
+ ]
391
+ })
392
+
393
+ # Step 2: Process query with enhanced NLP
394
+ if use_enhanced_nlp:
395
+ result = enhanced_nlp_processor.process_query(user_query)
396
+ analysis['processing_steps'].append({
397
+ 'step': 'Enhanced NLP Processing',
398
+ 'success': result.get('success', False),
399
+ 'query_type': result.get('query_type', 'unknown'),
400
+ 'message': result.get('message', ''),
401
+ 'confidence': result.get('query_analysis', {}).get('confidence', 0),
402
+ 'intent': result.get('query_analysis', {}).get('intent', {}),
403
+ 'entities': result.get('query_analysis', {}).get('entities', {})
404
+ })
405
+ else:
406
+ result = query_processor.process_query(user_query)
407
+ analysis['processing_steps'].append({
408
+ 'step': 'Basic Query Processing',
409
+ 'success': result.get('success', False),
410
+ 'query_type': result.get('query_type', 'unknown'),
411
+ 'message': result.get('message', ''),
412
+ 'cypher_query': result.get('cypher_query', ''),
413
+ 'corrections': result.get('corrections', [])
414
+ })
415
+
416
+ # Step 3: Results
417
+ if result.get('success') and result.get('data'):
418
+ analysis['processing_steps'].append({
419
+ 'step': 'Database Results',
420
+ 'results_count': len(result['data']),
421
+ 'sample_results': result['data'][:3] # Show first 3 results
422
+ })
423
+
424
+ return jsonify({
425
+ 'success': True,
426
+ 'analysis': analysis,
427
+ 'result': result
428
+ })
429
+
430
+ except Exception as e:
431
+ return jsonify({
432
+ 'success': False,
433
+ 'message': f'Error testing NLP query: {str(e)}'
434
+ })
435
+
436
+ @app.route('/api/nlp/demo')
437
+ def get_nlp_demo():
438
+ """Get a comprehensive demo of natural language capabilities"""
439
+ demo_queries = [
440
+ {
441
+ 'category': 'Basic Fare Queries',
442
+ 'queries': [
443
+ 'What is the fare from Colombo to Kandy?',
444
+ 'fare of anuradhapura to kandy',
445
+ 'price from panadura to galle',
446
+ 'Colombo to Kandy fare'
447
+ ]
448
+ },
449
+ {
450
+ 'category': 'Comparison Queries',
451
+ 'queries': [
452
+ 'Compare fares from Colombo to Kandy vs Colombo to Galle',
453
+ 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
454
+ 'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
455
+ ]
456
+ },
457
+ {
458
+ 'category': 'Range Search Queries',
459
+ 'queries': [
460
+ 'Find routes under 500 rupees',
461
+ 'Show me routes between 200 and 800 rupees',
462
+ 'Routes over 1000 rupees'
463
+ ]
464
+ },
465
+ {
466
+ 'category': 'Recommendation Queries',
467
+ 'queries': [
468
+ 'Recommend cheap routes',
469
+ 'Show me popular destinations',
470
+ 'What are the best routes from Colombo?'
471
+ ]
472
+ },
473
+ {
474
+ 'category': 'Statistical Queries',
475
+ 'queries': [
476
+ 'What is the average fare?',
477
+ 'Database statistics',
478
+ 'How many routes are there?'
479
+ ]
480
+ },
481
+ {
482
+ 'category': 'Route Queries',
483
+ 'queries': [
484
+ 'Show me the cheapest routes',
485
+ 'Routes from Colombo',
486
+ 'Routes to Galle',
487
+ 'What routes depart from Kandy?'
488
+ ]
489
+ },
490
+ {
491
+ 'category': 'Spell Correction Tests',
492
+ 'queries': [
493
+ 'price from panadra to gale',
494
+ 'fare of colmbo to kandee',
495
+ 'cost from anuradapura to kandy'
496
+ ]
497
+ }
498
+ ]
499
+
500
+ return jsonify({
501
+ 'success': True,
502
+ 'demo': {
503
+ 'title': 'Enhanced Natural Language Transport Query Demo',
504
+ 'description': 'Advanced NLP capabilities with comparison, range search, and recommendations',
505
+ 'categories': demo_queries
506
+ }
507
+ })
508
+
509
+ @app.route('/api/examples')
510
+ def get_examples():
511
+ """Get comprehensive example queries showcasing natural language capabilities"""
512
+ examples = [
513
+ # === FARE QUERIES (Various Natural Language Formats) ===
514
+ {
515
+ 'category': 'Fare Queries',
516
+ 'examples': [
517
+ {
518
+ # 'query': 'What is the fare from Colombo to Kandy?',
519
+ 'query': 'කොළඹ සිට මහනුවරට ගාස්තුව කීයද?',
520
+ 'description': 'Standard fare query format'
521
+ },
522
+ {
523
+ 'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
524
+ 'description': 'Alternative way to ask for fare'
525
+ },
526
+ {
527
+ 'query': 'අනුරාධපුර සිට මහනුවර දක්වා ගාස්තුව',
528
+ 'description': 'Natural language format'
529
+ },
530
+ {
531
+ # 'query': 'price from panadura to galle',
532
+ 'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
533
+ 'description': 'Using "price" instead of "fare"'
534
+ },
535
+ {
536
+ # 'query': 'Colombo to nuwara eliya fare',
537
+ 'query': 'බදුල්ල සිට කොළඹට ගාස්තුව කීයද?',
538
+ 'description': 'Compact format'
539
+ },
540
+ {
541
+ # 'query': 'How much is the fare from matara to kandy?',
542
+ 'query': 'මහනුවර සිට මාතරට ගාස්තුව කීයද?',
543
+ 'description': 'Question format'
544
+ }
545
+ ]
546
+ },
547
+
548
+ # === COMPARISON QUERIES ===
549
+ {
550
+ 'category': 'Comparison Queries',
551
+ 'examples': [
552
+ {
553
+ # 'query': 'Compare fares from Colombo to Kandy vs Colombo to Galle',
554
+ 'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සංසන්දනය කරන්න.',
555
+ 'description': 'Compare two different routes'
556
+ },
557
+ {
558
+ # 'query': 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
559
+ 'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට අනුරාධපුර දක්වා ලාභදායී වන්නේ කුමක්ද?',
560
+ 'description': 'Find the cheaper option'
561
+ },
562
+ {
563
+ # 'query': 'What is the difference in fare between Panadura to Galle and Panadura to Matara?',
564
+ 'query': 'පානදුර සිට ගාල්ල දක්වා සහ පානදුර සිට මාතර දක්වා ගාස්තුවේ වෙනස කීයද?',
565
+ 'description': 'Calculate fare difference'
566
+ }
567
+ ]
568
+ },
569
+
570
+ # === RANGE SEARCH QUERIES ===
571
+ {
572
+ 'category': 'Range Search Queries',
573
+ 'examples': [
574
+ {
575
+ # 'query': 'Find routes under 500 rupees',
576
+ 'query': 'රුපියල් 500ට අඩු මාර්ග සොයා ගන්න',
577
+ 'description': 'Find affordable routes'
578
+ },
579
+ {
580
+ # 'query': 'Show me routes between 200 and 800 rupees',
581
+ 'query': 'රුපියල් 200 සහ 800 අතර මාර්ග සොයා ගන්න',
582
+ 'description': 'Find routes in price range'
583
+ },
584
+ {
585
+ # 'query': 'Routes over 1000 rupees',
586
+ 'query': 'රුපියල් 1000ට ඉහළ මාර්ග සොයා ගන්න',
587
+ 'description': 'Find expensive routes'
588
+ }
589
+ ]
590
+ },
591
+
592
+ # === RECOMMENDATION QUERIES ===
593
+ {
594
+ 'category': 'Recommendation Queries',
595
+ 'examples': [
596
+ {
597
+ # 'query': 'Recommend cheap routes',
598
+ 'query': 'ලාභ මාර්ග නිර්දේශ කරන්න',
599
+ 'description': 'Get budget-friendly recommendations'
600
+ },
601
+ {
602
+ # 'query': 'Show me popular destinations',
603
+ 'query': 'මට ජනප්‍රිය ගමනාන්ත පෙන්වන්න',
604
+ 'description': 'Find frequently traveled routes'
605
+ },
606
+ {
607
+ # 'query': 'What are the best routes from Colombo?',
608
+ 'query': 'කොළඹ සිට යාමට හොඳම මාර්ග මොනවාද?',
609
+ 'description': 'Get optimal route suggestions'
610
+ }
611
+ ]
612
+ },
613
+
614
+ # === STATISTICAL QUERIES ===
615
+ {
616
+ 'category': 'Statistical Queries',
617
+ 'examples': [
618
+ {
619
+ # 'query': 'What is the average fare?',
620
+ 'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
621
+ 'description': 'Get average fare statistics'
622
+ },
623
+ {
624
+ # 'query': 'Database statistics',
625
+ 'query': 'දත්ත සමුදා සංඛ්යා ලේඛන',
626
+ 'description': 'Get comprehensive database overview'
627
+ },
628
+ {
629
+ 'query': 'මාර්ග කීයක් තිබේද?',
630
+ 'description': 'Count total routes'
631
+ }
632
+ ]
633
+ },
634
+
635
+ # === ROUTE QUERIES ===
636
+ {
637
+ 'category': 'Route Queries',
638
+ 'examples': [
639
+ {
640
+ # 'query': 'Show me the cheapest routes',
641
+ 'query': 'මට ලාභදායී මාර්ග 10ක් පෙන්වන්න',
642
+ 'description': 'Find top 10 cheapest routes'
643
+ },
644
+ {
645
+ # 'query': 'Routes from Colombo',
646
+ 'query': 'කොළඹ සිට යාමට මාර්ග මොනවාද?',
647
+ 'description': 'Find all routes departing from a location'
648
+ },
649
+ {
650
+ # 'query': 'Routes to Galle',
651
+ 'query': 'ගාල්ල යාමට මාර්ග මොනවාද?',
652
+ 'description': 'Find all routes going to a location'
653
+ },
654
+ {
655
+ # 'query': 'What routes depart from Kandy?',
656
+ 'query': 'මහනුවර සිට යාමට මාර්ග මොනවාද?',
657
+ 'description': 'Question format for routes'
658
+ }
659
+ ]
660
+ },
661
+
662
+ # === SPELLING ERROR EXAMPLES ===
663
+ {
664
+ 'category': 'Spell Correction Examples',
665
+ 'examples': [
666
+ {
667
+ # 'query': 'price from panadra to gale',
668
+ 'query': 'පාන්දුරේ ඉඳන් ගාල්ල්ට කීයක් යනවද?',
669
+ 'description': 'Test spell correction (Panadura, Galle)'
670
+ },
671
+ {
672
+ # 'query': 'fare of colmbo to kandee',
673
+ 'query': 'කොළ්බ්හ සිට මහනුවර්ට ගාස්තුව කීයද?',
674
+ 'description': 'Test spell correction (Colombo, Kandy)'
675
+ },
676
+ {
677
+ # 'query': 'cost from anuradapura to kandy',
678
+ 'query': 'අනුරපුර සිට මහනුවර්රට ගාස්තුව කීයද?',
679
+ 'description': 'Natural format with correct spelling'
680
+ }
681
+ ]
682
+ }
683
+ ]
684
+
685
+ return jsonify({
686
+ 'success': True,
687
+ 'examples': examples
688
+ })
689
+
690
+ @app.route('/api/nlp/advanced', methods=['POST'])
691
+ def advanced_nlp_query():
692
+ """Advanced NLP query processing with detailed analysis"""
693
+ try:
694
+ data = request.get_json()
695
+ user_query = data.get('query', '').strip()
696
+
697
+ if not user_query:
698
+ return jsonify({
699
+ 'success': False,
700
+ 'message': 'Please provide a query to process.'
701
+ })
702
+
703
+ # Process with enhanced NLP
704
+ result = enhanced_nlp_processor.process_query(user_query)
705
+
706
+ return jsonify(result)
707
+
708
+ except Exception as e:
709
+ return jsonify({
710
+ 'success': False,
711
+ 'message': f'Error processing advanced NLP query: {str(e)}'
712
+ })
713
+
714
+ @app.route('/api/nlp/compare', methods=['POST'])
715
+ def compare_routes():
716
+ """Compare multiple routes"""
717
+ try:
718
+ data = request.get_json()
719
+ routes = data.get('routes', [])
720
+
721
+ if len(routes) < 2:
722
+ return jsonify({
723
+ 'success': False,
724
+ 'message': 'Please provide at least 2 routes to compare.'
725
+ })
726
+
727
+ # Build comparison query
728
+ comparison_query = "MATCH "
729
+ for i, route in enumerate(routes):
730
+ from_loc = route.get('from')
731
+ to_loc = route.get('to')
732
+ if from_loc and to_loc:
733
+ if i > 0:
734
+ comparison_query += ", "
735
+ comparison_query += f"(a{i}:Place {{name: '{from_loc}'}})-[r{i}:Fare]->(b{i}:Place {{name: '{to_loc}'}})"
736
+
737
+ comparison_query += " RETURN "
738
+ for i, route in enumerate(routes):
739
+ if i > 0:
740
+ comparison_query += ", "
741
+ comparison_query += f"a{i}.name + ' to ' + b{i}.name as route{i+1}, r{i}.fare as fare{i+1}"
742
+
743
+ # Execute query
744
+ with neo4j_service.driver.session() as session:
745
+ result = session.run(comparison_query)
746
+ results = [dict(record) for record in result]
747
+
748
+ return jsonify({
749
+ 'success': True,
750
+ 'data': results,
751
+ 'message': f'Comparison of {len(routes)} routes completed'
752
+ })
753
+
754
+ except Exception as e:
755
+ return jsonify({
756
+ 'success': False,
757
+ 'message': f'Error comparing routes: {str(e)}'
758
+ })
759
+
760
+ @app.route('/api/nlp/range', methods=['POST'])
761
+ def search_by_range():
762
+ """Search routes by price range"""
763
+ try:
764
+ data = request.get_json()
765
+ min_price = data.get('min_price')
766
+ max_price = data.get('max_price')
767
+
768
+ if min_price is None and max_price is None:
769
+ return jsonify({
770
+ 'success': False,
771
+ 'message': 'Please provide min_price or max_price or both.'
772
+ })
773
+
774
+ # Build range query
775
+ range_query = "MATCH (a:Place)-[r:Fare]->(b:Place) WHERE "
776
+ conditions = []
777
+
778
+ if min_price is not None:
779
+ conditions.append(f"r.fare >= {min_price}")
780
+ if max_price is not None:
781
+ conditions.append(f"r.fare <= {max_price}")
782
+
783
+ range_query += " AND ".join(conditions)
784
+ range_query += " RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare"
785
+
786
+ # Execute query
787
+ with neo4j_service.driver.session() as session:
788
+ result = session.run(range_query)
789
+ results = [dict(record) for record in result]
790
+
791
+ return jsonify({
792
+ 'success': True,
793
+ 'data': results,
794
+ 'message': f'Found {len(results)} routes in the specified range'
795
+ })
796
+
797
+ except Exception as e:
798
+ return jsonify({
799
+ 'success': False,
800
+ 'message': f'Error searching by range: {str(e)}'
801
+ })
802
+
803
+ @app.route('/api/nlp/test-all-types')
804
+ def test_all_query_types():
805
+ """Test all query types with live results from Neo4j database"""
806
+ try:
807
+ # Define test queries for each type
808
+ test_queries = {
809
+ 'fare_inquiry': [
810
+ 'What is the fare from Colombo to Kandy?',
811
+ 'fare of anuradhapura to kandy',
812
+ 'price from panadura to galle'
813
+ ],
814
+ 'comparison': [
815
+ 'Compare fares from Colombo to Kandy vs Colombo to Galle',
816
+ 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?'
817
+ ],
818
+ 'range_search': [
819
+ 'Find routes under 500 rupees',
820
+ 'Show me routes between 200 and 800 rupees',
821
+ 'Routes over 1000 rupees'
822
+ ],
823
+ 'recommendation': [
824
+ 'Recommend cheap routes',
825
+ 'Show me popular destinations',
826
+ 'What are the best routes from Colombo?'
827
+ ],
828
+ 'route_inquiry': [
829
+ 'Routes from Colombo',
830
+ 'Routes to Galle',
831
+ 'What routes depart from Kandy?'
832
+ ],
833
+ 'statistics': [
834
+ 'What is the average fare?',
835
+ 'Database statistics',
836
+ 'How many routes are there?'
837
+ ]
838
+ }
839
+
840
+ results = {}
841
+
842
+ for query_type, queries in test_queries.items():
843
+ type_results = []
844
+ for query in queries:
845
+ try:
846
+ # Process with enhanced NLP (uses LLM for Cypher generation)
847
+ result = enhanced_nlp_processor.process_query(query)
848
+ type_results.append({
849
+ 'query': query,
850
+ 'result': result,
851
+ 'success': result.get('success', False)
852
+ })
853
+ except Exception as e:
854
+ type_results.append({
855
+ 'query': query,
856
+ 'result': {
857
+ 'success': False,
858
+ 'message': f'Error processing query: {str(e)}'
859
+ },
860
+ 'success': False
861
+ })
862
+
863
+ results[query_type] = {
864
+ 'description': f'Test results for {query_type} queries',
865
+ 'total_queries': len(queries),
866
+ 'successful_queries': sum(1 for r in type_results if r['success']),
867
+ 'examples': type_results
868
+ }
869
+
870
+ # Summary statistics
871
+ total_queries = sum(len(queries) for queries in test_queries.values())
872
+ total_successful = sum(
873
+ results[query_type]['successful_queries']
874
+ for query_type in results
875
+ )
876
+
877
+ return jsonify({
878
+ 'success': True,
879
+ 'message': f'Tested {total_queries} queries across {len(test_queries)} types. {total_successful} successful.',
880
+ 'summary': {
881
+ 'total_query_types': len(test_queries),
882
+ 'total_queries_tested': total_queries,
883
+ 'successful_queries': total_successful,
884
+ 'success_rate': round((total_successful / total_queries) * 100, 2) if total_queries > 0 else 0
885
+ },
886
+ 'results': results,
887
+ 'neo4j_connected': neo4j_service.is_connected()
888
+ })
889
+
890
+ except Exception as e:
891
+ return jsonify({
892
+ 'success': False,
893
+ 'message': f'Error testing query types: {str(e)}',
894
+ 'neo4j_connected': neo4j_service.is_connected()
895
+ })
896
+
897
+ @app.errorhandler(404)
898
+ def not_found(error):
899
+ return jsonify({
900
+ 'success': False,
901
+ 'message': 'Endpoint not found'
902
+ }), 404
903
+
904
+ @app.errorhandler(500)
905
+ def internal_error(error):
906
+ return jsonify({
907
+ 'success': False,
908
+ 'message': 'Internal server error'
909
+ }), 500
910
+
911
+ if __name__ == '__main__':
912
+ port = int(os.getenv('PORT', 7860)) # Hugging Face Spaces uses port 7860 by default
913
+
914
+ print("🚌 Natural Language Transport Query System")
915
+ print("=" * 60)
916
+ print(f"🚀 Starting on port {port}")
917
+ print(f"🌐 Open your browser and go to: http://localhost:{port}")
918
+
919
+ # Check Neo4j connection
920
+ if neo4j_service.is_connected():
921
+ print("✅ Connected to Neo4j database")
922
+ stats = neo4j_service.get_route_statistics()
923
+ if stats:
924
+ print(f"📊 Database: {stats.get('total_places', 0)} places, {stats.get('total_routes', 0)} routes")
925
+ else:
926
+ print("⚠️ Neo4j not connected - some features may not work")
927
+
928
+ # Check LLM availability
929
+ if spell_corrector.llm_available:
930
+ print("🤖 LLM integration available for spell correction")
931
+ else:
932
+ print("⚠️ LLM not available - using fuzzy matching only")
933
+
934
+ print("\n🎯 Enhanced Natural Language Capabilities:")
935
+ print(" • Multiple query formats (fare, price, cost)")
936
+ print(" • Natural language patterns (from X to Y, X to Y fare)")
937
+ print(" • Question formats (What is, How much, Show me)")
938
+ print(" • Compact formats (Colombo to Kandy fare)")
939
+ print(" • Spell correction and fuzzy matching")
940
+ print(" • LLM-powered query interpretation")
941
+ print(" • Automatic Cypher query generation")
942
+ print(" • Advanced intent classification")
943
+ print(" • Entity extraction and normalization")
944
+ print(" • Comparison queries (vs, versus, compare)")
945
+ print(" • Range search queries (under, over, between)")
946
+ print(" • Recommendation queries (recommend, suggest)")
947
+ print(" • Confidence scoring for query understanding")
948
+ print(" • Sinhala language support with translation")
949
+ print(" • Automatic Sinhala-English translation")
950
+ print(" • Dictionary-based and Google Translate fallback")
951
+
952
+ print("\n🔗 Available API Endpoints:")
953
+ print(" • /api/query - Process natural language queries (enhanced NLP)")
954
+ print(" • /api/nlp/capabilities - View enhanced NLP capabilities with live examples")
955
+ print(" • /api/nlp/test-all-types - Test all query types with live results")
956
+ print(" • /api/nlp/test - Test queries with detailed analysis")
957
+ print(" • /api/nlp/demo - Get comprehensive demo queries")
958
+ print(" • /api/examples - Get categorized example queries")
959
+ print(" • /api/sinhala/examples - Get Sinhala example queries")
960
+ print(" • /api/translation/test - Test translation functionality")
961
+ print(" • /api/translation/translate - Translate text between languages")
962
+ print(" • /api/status - System status and statistics")
963
+ print(" • /api/suggestions - Get location suggestions")
964
+ print(" • /api/places - Get all available places")
965
+
966
+ print("=" * 60)
967
+
968
+ try:
969
+ app.run(debug=False, port=port, host='0.0.0.0') # Set debug=False for production
970
+ except Exception as e:
971
+ print(f"❌ Error starting application: {e}")
972
+ print("💡 Try running as administrator or check if another application is using the port")
973
+
974
+
config.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Configuration file for Transport Query Application
4
+ """
5
+
6
+ import os
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ class Config:
13
+ """Application configuration"""
14
+
15
+ # Neo4j Configuration
16
+ # NEO4J_URI = "bolt://localhost:7687"
17
+ # NEO4J_URI = "bolt://44.201.107.35:7687"
18
+ NEO4J_URI = os.getenv("NEO4J_URI")
19
+ # NEO4J_USER = "neo4j"
20
+ NEO4J_USER = os.getenv("NEO4J_USER")
21
+ # NEO4J_PASSWORD = "20665130@mM"
22
+ # NEO4J_PASSWORD = "securities-arrays-entrapments"
23
+ NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
24
+
25
+ # OpenAI Configuration (for LLM)
26
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
27
+ OPENAI_MODEL = "gpt-3.5-turbo"
28
+
29
+
30
+
31
+ # Flask Configuration
32
+ # SECRET_KEY = os.getenv("SECRET_KEY", "transport-query-app-secret-key")
33
+ SECRET_KEY = os.getenv("SECRET_KEY")
34
+ DEBUG = True
35
+
36
+ # Spell Correction Configuration
37
+ SIMILARITY_THRESHOLD = 0.8
38
+ MAX_SUGGESTIONS = 5
39
+
40
+ # Location Mapping for Common Misspellings
41
+ LOCATION_MAPPING = {
42
+ 'colombo': 'Colombo',
43
+ 'colmbo': 'Colombo',
44
+ 'kandy': 'Kandy',
45
+ 'panadura': 'Panadura',
46
+ 'panaduwa': 'Panadura',
47
+ 'galkissa': 'Mount Lavinia',
48
+ 'mount lavinia': 'Mount Lavinia',
49
+ 'kalutara': 'Kalutara',
50
+ 'aluthgama': 'Aluthgama',
51
+ 'balapitiya': 'Balapitiya',
52
+ 'ambalangoda': 'Ambalangoda',
53
+ 'hikkaduwa': 'Hikkaduwa',
54
+ 'galle': 'Galle',
55
+ 'koggala': 'Koggala',
56
+ 'waligama': 'Waligama',
57
+ 'matara': 'Matara',
58
+ 'anuradapura': 'Anuradapura',
59
+ 'anuradhapura': 'Anuradapura',
60
+ 'kurunagala': 'Kurunagala',
61
+ 'kurunegala': 'Kurunagala',
62
+ 'trincomalee': 'Trincomalee',
63
+ 'tricomalee': 'Trincomalee',
64
+ 'jaffna': 'Jaffna',
65
+ 'vavuniya': 'Vavuniya',
66
+ 'vavniyava': 'Vavuniya',
67
+ 'vavniyawa': 'Vavuniya',
68
+ 'chilaw': 'Chilaw',
69
+ 'chillaw': 'Chilaw',
70
+ 'puthalama': 'Puththalama',
71
+ 'puttalama': 'Puththalama',
72
+ 'thangalle': 'Thangalle',
73
+ 'thangalla': 'Thangalle',
74
+ 'bandarawela': 'Bandarawela',
75
+ 'bandatrawela': 'Bandarawela',
76
+ 'nuwaraeliya': 'Nuwaraeliya',
77
+ 'nuwara eliya': 'Nuwaraeliya',
78
+ 'badulla': 'Badulla',
79
+ 'monaragala': 'Monaragala',
80
+ 'ratnapura': 'Rathnapura',
81
+ 'rathnapura': 'Rathnapura',
82
+ 'kegalle': 'Kegalle',
83
+ 'mawanella': 'Mawanella',
84
+ 'mavanalla': 'Mawanella',
85
+ 'awissawella': 'Awissawella',
86
+ 'awisswella': 'Awissawella',
87
+ 'kaduwela': 'Kaduwela',
88
+ 'kaduruwela': 'Kaduwela',
89
+ 'maharagama': 'Maharagama',
90
+ 'dehiwala': 'Dehiwala',
91
+ 'moratuwa': 'Moratuwa',
92
+ 'kalutara': 'Kalutara',
93
+ 'beruwala': 'Beruwala',
94
+ 'bentota': 'Bentota',
95
+ 'induruwa': 'Induruwa',
96
+ 'kosgoda': 'Kosgoda',
97
+ 'ahungalla': 'Ahungalla',
98
+ 'karandeniya': 'Karandeniya',
99
+ 'eladuwa': 'Eladuwa',
100
+ 'gintota': 'Gintota',
101
+ 'boossa': 'Boossa',
102
+ 'katunayake': 'Katunayake',
103
+ 'negombo': 'Negombo',
104
+ 'seeduwa': 'Seeduwa',
105
+ 'ja-ela': 'Ja-ela',
106
+ 'wattala': 'Wattala',
107
+ 'kelaniya': 'Kelaniya',
108
+ 'kiribathgoda': 'Kiribathgoda',
109
+ 'kiribathgodas': 'Kiribathgoda',
110
+ 'ganemulla': 'Ganemulla',
111
+ 'mirigama': 'Mirigama',
112
+ 'polgahawela': 'Polgahawela',
113
+ 'warakapola': 'Warakapola',
114
+ 'galigamuwa': 'Galigamuwa',
115
+ 'galgamuwa': 'Galigamuwa',
116
+ 'ambepussa': 'Ambepussa',
117
+ 'alawwa': 'Alawwa',
118
+ 'kandy': 'Kandy',
119
+ 'peradeniya': 'Peradeniya',
120
+ 'gampola': 'Gampola',
121
+ 'nawalapitiya': 'Nawalapitiya',
122
+ 'teldeniya': 'Teldeniya',
123
+ 'kundasale': 'Kundasale',
124
+ 'katugastota': 'Katugastota',
125
+ 'pilimatalawa': 'Pilimatalawa',
126
+ 'harispattuwa': 'Harispattuwa',
127
+ 'akurana': 'Akurana',
128
+ 'matale': 'Matale',
129
+ 'dambulla': 'Dambulla',
130
+ 'sigiriya': 'Sigiriya',
131
+ 'habarana': 'Habarana',
132
+ 'polonnaruwa': 'Polonnaruwa',
133
+ 'minneriya': 'Minneriya',
134
+ 'galoya': 'Galoya',
135
+ 'batticaloa': 'Batticaloa',
136
+ 'batticolo': 'Batticaloa',
137
+ 'ampara': 'Ampara',
138
+ 'mahiyanganaya': 'Mahiyanganaya',
139
+ 'bibile': 'Bibile',
140
+ 'monaragala': 'Monaragala',
141
+ 'wellawaya': 'Wellawaya',
142
+ 'bandarawela': 'Bandarawela',
143
+ 'hali-ela': 'Hali-ela',
144
+ 'passara': 'Passara',
145
+ 'badulla': 'Badulla',
146
+ 'mahiyanganaya': 'Mahiyanganaya',
147
+ 'kandy': 'Kandy',
148
+ 'nuwaraeliya': 'Nuwaraeliya',
149
+ 'hatton': 'Hatton',
150
+ 'talawakele': 'Talawakele',
151
+ 'nanuoya': 'Nanuoya',
152
+ 'ambewela': 'Ambewela',
153
+ 'pattipola': 'Pattipola',
154
+ 'oya': 'Oya',
155
+ 'ella': 'Ella',
156
+ 'demodara': 'Demodara',
157
+ 'hali-ela': 'Hali-ela',
158
+ 'badulla': 'Badulla',
159
+ 'mahiyanganaya': 'Mahiyanganaya',
160
+ 'bibile': 'Bibile',
161
+ 'monaragala': 'Monaragala',
162
+ 'wellawaya': 'Wellawaya',
163
+ 'kataragama': 'Kataragama',
164
+ 'tissamaharama': 'Tissamaharama',
165
+ 'hambantota': 'Hambantota',
166
+ 'tangalle': 'Tangalle',
167
+ 'thangalle': 'Tangalle',
168
+ 'beliatta': 'Beliatta',
169
+ 'ambalantota': 'Ambalantota',
170
+ 'matara': 'Matara',
171
+ 'weligama': 'Weligama',
172
+ 'mirissa': 'Mirissa',
173
+ 'dikwella': 'Dikwella',
174
+ 'kamburupitiya': 'Kamburupitiya',
175
+ 'deniyaya': 'Deniyaya',
176
+ 'akurassa': 'Akurassa',
177
+ 'akuressa': 'Akurassa',
178
+ 'galle': 'Galle',
179
+ 'hikkaduwa': 'Hikkaduwa',
180
+ 'koggala': 'Koggala',
181
+ 'ahangama': 'Ahangama',
182
+ 'midigama': 'Midigama',
183
+ 'talpe': 'Talpe',
184
+ 'unawatuna': 'Unawatuna',
185
+ 'gintota': 'Gintota',
186
+ 'boossa': 'Boossa',
187
+ 'karandeniya': 'Karandeniya',
188
+ 'eladuwa': 'Eladuwa',
189
+ 'bentota': 'Bentota',
190
+ 'induruwa': 'Induruwa',
191
+ 'kosgoda': 'Kosgoda',
192
+ 'ahungalla': 'Ahungalla',
193
+ 'beruwala': 'Beruwala',
194
+ 'kalutara': 'Kalutara',
195
+ 'panadura': 'Panadura',
196
+ 'moratuwa': 'Moratuwa',
197
+ 'dehiwala': 'Dehiwala',
198
+ 'maharagama': 'Maharagama',
199
+ 'kaduwela': 'Kaduwela',
200
+ 'awissawella': 'Awissawella',
201
+ 'kegalle': 'Kegalle',
202
+ 'mawanella': 'Mawanella',
203
+ 'peradeniya': 'Peradeniya',
204
+ 'gampola': 'Gampola',
205
+ 'nawalapitiya': 'Nawalapitiya',
206
+ 'teldeniya': 'Teldeniya',
207
+ 'kundasale': 'Kundasale',
208
+ 'katugastota': 'Katugastota',
209
+ 'pilimatalawa': 'Pilimatalawa',
210
+ 'harispattuwa': 'Harispattuwa',
211
+ 'akurana': 'Akurana',
212
+ 'dambulla': 'Dambulla',
213
+ 'sigiriya': 'Sigiriya',
214
+ 'habarana': 'Habarana',
215
+ 'polonnaruwa': 'Polonnaruwa',
216
+ 'minneriya': 'Minneriya',
217
+ 'galoya': 'Galoya',
218
+ 'batticaloa': 'Batticaloa',
219
+ 'ampara': 'Ampara',
220
+ 'mahiyanganaya': 'Mahiyanganaya',
221
+ 'bibile': 'Bibile',
222
+ 'monaragala': 'Monaragala',
223
+ 'wellawaya': 'Wellawaya',
224
+ 'bandarawela': 'Bandarawela',
225
+ 'hali-ela': 'Hali-ela',
226
+ 'passara': 'Passara',
227
+ 'badulla': 'Badulla',
228
+ 'hatton': 'Hatton',
229
+ 'talawakele': 'Talawakele',
230
+ 'nanuoya': 'Nanuoya',
231
+ 'ambewela': 'Ambewela',
232
+ 'pattipola': 'Pattipola',
233
+ 'oya': 'Oya',
234
+ 'ella': 'Ella',
235
+ 'demodara': 'Demodara',
236
+ 'kataragama': 'Kataragama',
237
+ 'tissamaharama': 'Tissamaharama',
238
+ 'hambantota': 'Hambantota',
239
+ 'tangalle': 'Tangalle',
240
+ 'beliatta': 'Beliatta',
241
+ 'ambalantota': 'Ambalantota',
242
+ 'weligama': 'Weligama',
243
+ 'kamburupitiya': 'Kamburupitiya',
244
+ 'deniyaya': 'Deniyaya',
245
+ 'akurassa': 'Akurassa',
246
+ 'ahangama': 'Ahangama',
247
+ 'seeduwa': 'Seeduwa',
248
+ 'ja-ela': 'Ja-ela',
249
+ 'wattala': 'Wattala',
250
+ 'kelaniya': 'Kelaniya',
251
+ 'kiribathgoda': 'Kiribathgoda',
252
+ 'ganemulla': 'Ganemulla',
253
+ 'mirigama': 'Mirigama',
254
+ 'polgahawela': 'Polgahawela',
255
+ 'warakapola': 'Warakapola',
256
+ 'galigamuwa': 'Galigamuwa',
257
+ 'ambepussa': 'Ambepussa',
258
+ 'alawwa': 'Alawwa',
259
+ 'peradeniya': 'Peradeniya',
260
+ 'gampola': 'Gampola',
261
+ 'matale': 'Matale',
262
+ 'polonnaruwa': 'Polonnaruwa'
263
+ }
enhanced_nlp_processor.py ADDED
@@ -0,0 +1,904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Enhanced NLP Processor for Transport Query Application
4
+ Advanced natural language understanding and query processing
5
+ """
6
+
7
+ import re
8
+ import json
9
+ from typing import Dict, List, Tuple, Optional, Any
10
+ from datetime import datetime
11
+ from spell_corrector import SpellCorrector
12
+ from neo4j_service import Neo4jService
13
+ from config import Config
14
+ from logger import get_logger
15
+
16
+ class EnhancedNLPProcessor:
17
+ """Advanced NLP processor with sophisticated query understanding"""
18
+
19
+ def __init__(self):
20
+ self.config = Config()
21
+ self.spell_corrector = SpellCorrector()
22
+ self.neo4j_service = Neo4jService()
23
+ self.logger = get_logger(self.__class__.__name__)
24
+
25
+ # Query patterns and templates
26
+ self.query_patterns = {
27
+ 'fare_queries': [
28
+ r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
29
+ r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
30
+ r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
31
+ r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
32
+ r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
33
+ r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
34
+ r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
35
+ r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
36
+ ],
37
+ 'comparison_queries': [
38
+ r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
39
+ r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
40
+ ],
41
+ 'range_queries': [
42
+ r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
43
+ r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
44
+ r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
45
+ ],
46
+ 'route_queries': [
47
+ r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
48
+ r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
49
+ r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
50
+ r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
51
+ ],
52
+ 'statistical_queries': [
53
+ r'(?:average|mean|median)\s+(?:fare|price|cost)',
54
+ r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
55
+ r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
56
+ r'(?:database|system)\s+(?:statistics?|stats?|overview)',
57
+ r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
58
+ ],
59
+ 'recommendation_queries': [
60
+ r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
61
+ r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
62
+ r'(?:popular|frequent)\s+(?:routes?|destinations?)',
63
+ r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
64
+ ]
65
+ }
66
+
67
+ # Query intent classification
68
+ self.intent_keywords = {
69
+ 'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
70
+ 'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
71
+ 'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
72
+ 'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
73
+ 'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
74
+ 'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
75
+ 'availability': ['available', 'exist', 'have', 'is there', 'can i']
76
+ }
77
+
78
+ def process_query(self, user_query: str) -> Dict[str, Any]:
79
+ """
80
+ Process natural language query with advanced NLP understanding
81
+
82
+ Args:
83
+ user_query: Natural language query string
84
+
85
+ Returns:
86
+ Dictionary with comprehensive query analysis and results
87
+ """
88
+ try:
89
+ # Step 1: Preprocess query
90
+ processed_query = self._preprocess_query(user_query)
91
+ self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
92
+
93
+ # Step 2: Extract entities and intent
94
+ entities = self._extract_entities(processed_query)
95
+ intent = self._classify_intent(processed_query, entities)
96
+
97
+ # Step 3: Generate Cypher query
98
+ cypher_query = self._generate_cypher_query(intent, entities, processed_query)
99
+ self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
100
+
101
+
102
+
103
+ # Step 4: Execute query and format results
104
+ if cypher_query:
105
+ results = self._execute_query(cypher_query)
106
+ self.logger.info(f"Query results count: {len(results)}")
107
+ response = self._format_response(intent, entities, results, processed_query)
108
+ else:
109
+ response = self._handle_unclear_query(processed_query)
110
+
111
+ # Step 5: Add metadata
112
+ response.update({
113
+ 'query_analysis': {
114
+ 'original_query': user_query,
115
+ 'processed_query': processed_query,
116
+ 'intent': intent,
117
+ 'entities': entities,
118
+ 'confidence': self._calculate_confidence(intent, entities)
119
+ }
120
+ })
121
+
122
+ return response
123
+
124
+ except Exception as e:
125
+ return {
126
+ 'success': False,
127
+ 'message': f'Error processing query: {str(e)}',
128
+ 'suggestions': self._get_suggestions()
129
+ }
130
+
131
+ def _preprocess_query(self, query: str) -> str:
132
+ """Preprocess and normalize the query"""
133
+ # Convert to lowercase
134
+ query = query.lower().strip()
135
+
136
+ # Remove extra whitespace
137
+ query = re.sub(r'\s+', ' ', query)
138
+
139
+ # Normalize common variations
140
+ replacements = {
141
+ 'rs.': 'rupees',
142
+ 'rs': 'rupees',
143
+ 'lkr': 'rupees',
144
+ '→': 'to',
145
+ '->': 'to',
146
+ 'vs': 'versus',
147
+ '&': 'and',
148
+ 'w/': 'with',
149
+ 'w/o': 'without'
150
+ }
151
+
152
+ for old, new in replacements.items():
153
+ query = query.replace(old, new)
154
+
155
+ return query
156
+
157
+ def _extract_entities(self, query: str) -> Dict[str, Any]:
158
+ """Extract entities from the query"""
159
+ entities = {
160
+ 'locations': [],
161
+ 'numbers': [],
162
+ 'currencies': [],
163
+ 'comparators': [],
164
+ 'time_expressions': []
165
+ }
166
+
167
+ # Extract locations with priority for different query types
168
+ comparison_patterns = [
169
+ r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
170
+ r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
171
+ r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
172
+ # Simpler patterns for comparison
173
+ r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
174
+ r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
175
+ ]
176
+
177
+ fare_patterns = [
178
+ r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
179
+ r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
180
+ r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
181
+ ]
182
+
183
+ general_patterns = [
184
+ r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
185
+ r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
186
+ r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
187
+ ]
188
+
189
+ # Use a set to avoid duplicates
190
+ seen_locations = set()
191
+
192
+ # Try comparison patterns first (highest priority)
193
+ for pattern in comparison_patterns:
194
+ matches = re.finditer(pattern, query, re.IGNORECASE)
195
+ for match in matches:
196
+ locations = [loc.strip() for loc in match.groups() if loc.strip()]
197
+ for loc in locations:
198
+ # Skip if we've already processed this location
199
+ if loc.lower() in seen_locations:
200
+ continue
201
+ seen_locations.add(loc.lower())
202
+
203
+ corrected, confidence, method = self.spell_corrector.correct_location(loc)
204
+ if confidence > 0.5:
205
+ entities['locations'].append({
206
+ 'original': loc,
207
+ 'corrected': corrected,
208
+ 'confidence': confidence,
209
+ 'method': method
210
+ })
211
+
212
+ # If no locations found with comparison patterns, try fare patterns
213
+ if not entities['locations']:
214
+ for pattern in fare_patterns:
215
+ matches = re.finditer(pattern, query, re.IGNORECASE)
216
+ for match in matches:
217
+ locations = [loc.strip() for loc in match.groups() if loc.strip()]
218
+ for loc in locations:
219
+ # Skip if we've already processed this location
220
+ if loc.lower() in seen_locations:
221
+ continue
222
+ seen_locations.add(loc.lower())
223
+
224
+ corrected, confidence, method = self.spell_corrector.correct_location(loc)
225
+ if confidence > 0.5:
226
+ entities['locations'].append({
227
+ 'original': loc,
228
+ 'corrected': corrected,
229
+ 'confidence': confidence,
230
+ 'method': method
231
+ })
232
+
233
+ # If no locations found with fare patterns, try general patterns
234
+ if not entities['locations']:
235
+ for pattern in general_patterns:
236
+ matches = re.finditer(pattern, query, re.IGNORECASE)
237
+ for match in matches:
238
+ locations = [loc.strip() for loc in match.groups() if loc.strip()]
239
+ for loc in locations:
240
+ # Skip if we've already processed this location
241
+ if loc.lower() in seen_locations:
242
+ continue
243
+ seen_locations.add(loc.lower())
244
+
245
+ corrected, confidence, method = self.spell_corrector.correct_location(loc)
246
+ if confidence > 0.5:
247
+ entities['locations'].append({
248
+ 'original': loc,
249
+ 'corrected': corrected,
250
+ 'confidence': confidence,
251
+ 'method': method
252
+ })
253
+
254
+
255
+
256
+
257
+
258
+ # Extract numbers and currencies
259
+ number_patterns = [
260
+ r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
261
+ r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
262
+ r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
263
+ ]
264
+
265
+ for pattern in number_patterns:
266
+ matches = re.finditer(pattern, query, re.IGNORECASE)
267
+ for match in matches:
268
+ groups = match.groups()
269
+ if len(groups) >= 2:
270
+ if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
271
+ # Pattern: (under|below|less than|over|above|more than) (number) (currency)
272
+ comparator = groups[0]
273
+ number = groups[1]
274
+ currency = groups[2] if len(groups) >= 3 else 'rupees'
275
+
276
+ entities['numbers'].append({
277
+ 'value': float(number.replace(',', '')),
278
+ 'currency': currency,
279
+ 'comparator': comparator
280
+ })
281
+ elif 'between' in pattern:
282
+ # Pattern: between (number1) and (number2) (currency)
283
+ min_number = groups[0]
284
+ max_number = groups[1]
285
+ currency = groups[2] if len(groups) >= 3 else 'rupees'
286
+
287
+ entities['numbers'].append({
288
+ 'value': float(min_number.replace(',', '')),
289
+ 'currency': currency,
290
+ 'comparator': 'between_min'
291
+ })
292
+ entities['numbers'].append({
293
+ 'value': float(max_number.replace(',', '')),
294
+ 'currency': currency,
295
+ 'comparator': 'between_max'
296
+ })
297
+ else:
298
+ # Pattern: (number) (currency)
299
+ number = groups[0]
300
+ currency = groups[1] if len(groups) >= 2 else 'rupees'
301
+
302
+ entities['numbers'].append({
303
+ 'value': float(number.replace(',', '')),
304
+ 'currency': currency,
305
+ 'comparator': None
306
+ })
307
+
308
+ # Extract comparators
309
+ comparator_patterns = [
310
+ r'(cheaper|more\s+expensive|better|worse|faster|slower)',
311
+ r'(compare|difference|vs|versus)',
312
+ r'(under|below|less\s+than|over|above|more\s+than)'
313
+ ]
314
+
315
+ for pattern in comparator_patterns:
316
+ matches = re.finditer(pattern, query, re.IGNORECASE)
317
+ for match in matches:
318
+ entities['comparators'].append(match.group(1).lower())
319
+
320
+ return entities
321
+
322
+ def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
323
+ """Classify the intent of the query"""
324
+ intent_scores = {}
325
+
326
+ for intent, keywords in self.intent_keywords.items():
327
+ score = 0
328
+ for keyword in keywords:
329
+ if keyword in query:
330
+ score += 1
331
+ intent_scores[intent] = score
332
+
333
+ # Get primary intent
334
+ primary_intent = max(intent_scores.items(), key=lambda x: x[1])
335
+
336
+ # Check for specific patterns with priority
337
+ if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
338
+ primary_intent = ('comparison', 10)
339
+ elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
340
+ primary_intent = ('recommendation', 10)
341
+ elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
342
+ primary_intent = ('range_search', 10)
343
+ elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
344
+ # Check if we have at least 2 locations
345
+ if entities and len(entities.get('locations', [])) >= 2:
346
+ primary_intent = ('fare_inquiry', 10)
347
+ elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
348
+ primary_intent = ('route_inquiry', 10)
349
+
350
+ return {
351
+ 'primary': primary_intent[0],
352
+ 'confidence': primary_intent[1] / 10,
353
+ 'all_scores': intent_scores
354
+ }
355
+
356
+ def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
357
+ """Generate Cypher query using LLM for better understanding"""
358
+ try:
359
+ # Try LLM-based query generation first
360
+ llm_query = self._generate_cypher_with_llm(query, intent, entities)
361
+ if llm_query:
362
+ return llm_query
363
+ except Exception as e:
364
+ print(f"LLM query generation failed: {e}")
365
+
366
+ # Fallback to rule-based generation
367
+ primary_intent = intent['primary']
368
+
369
+ if primary_intent == 'fare_inquiry':
370
+ return self._generate_fare_query(entities)
371
+ elif primary_intent == 'comparison':
372
+ return self._generate_comparison_query(entities)
373
+ elif primary_intent == 'route_inquiry':
374
+ return self._generate_route_query(entities, query)
375
+ elif primary_intent == 'statistics':
376
+ return self._generate_statistics_query(entities)
377
+ elif primary_intent == 'recommendation':
378
+ return self._generate_recommendation_query(entities, query)
379
+ elif primary_intent == 'range_search':
380
+ return self._generate_range_query(entities)
381
+ else:
382
+ return self._generate_fallback_query(query)
383
+
384
+ def _generate_fare_query(self, entities: Dict) -> Optional[str]:
385
+ """Generate fare inquiry Cypher query"""
386
+ locations = entities.get('locations', [])
387
+
388
+ if len(locations) >= 2:
389
+ from_loc = locations[0]['corrected']
390
+ to_loc = locations[1]['corrected']
391
+
392
+ return f"""
393
+ MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
394
+ RETURN
395
+ a.name as from_place,
396
+ b.name as to_place,
397
+ r.fare as fare,
398
+ 'Direct route' as route_type
399
+ """
400
+
401
+ return None
402
+
403
+ def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
404
+ """Generate comparison Cypher query"""
405
+ locations = entities.get('locations', [])
406
+
407
+ if len(locations) >= 3:
408
+ # Handle case where we have same origin, different destinations
409
+ if len(locations) == 3:
410
+ # Pattern: "Colombo to Kandy and Colombo to Anuradapura"
411
+ route1_from = locations[0]['corrected']
412
+ route1_to = locations[1]['corrected']
413
+ route2_from = locations[0]['corrected'] # Same origin
414
+ route2_to = locations[2]['corrected']
415
+ elif len(locations) >= 4:
416
+ # Pattern: "Colombo to Kandy and Anuradapura to Galle"
417
+ route1_from = locations[0]['corrected']
418
+ route1_to = locations[1]['corrected']
419
+ route2_from = locations[2]['corrected']
420
+ route2_to = locations[3]['corrected']
421
+ else:
422
+ return None
423
+
424
+ return f"""
425
+ MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
426
+ MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
427
+ RETURN
428
+ a1.name + ' to ' + b1.name as route1,
429
+ r1.fare as fare1,
430
+ a2.name + ' to ' + b2.name as route2,
431
+ r2.fare as fare2,
432
+ r1.fare - r2.fare as difference,
433
+ CASE
434
+ WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
435
+ WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
436
+ ELSE 'Both routes have the same fare'
437
+ END as comparison
438
+ """
439
+
440
+ return None
441
+
442
+ def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
443
+ """Generate route inquiry Cypher query"""
444
+ locations = entities.get('locations', [])
445
+
446
+ if 'from' in query and locations:
447
+ location = locations[0]['corrected']
448
+ return f"""
449
+ MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
450
+ RETURN
451
+ a.name as from_place,
452
+ b.name as to_place,
453
+ r.fare as fare
454
+ ORDER BY r.fare
455
+ """
456
+ elif 'to' in query and locations:
457
+ location = locations[0]['corrected']
458
+ return f"""
459
+ MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
460
+ RETURN
461
+ a.name as from_place,
462
+ b.name as to_place,
463
+ r.fare as fare
464
+ ORDER BY r.fare
465
+ """
466
+
467
+ return None
468
+
469
+ def _generate_statistics_query(self, entities: Dict) -> str:
470
+ """Generate statistics Cypher query"""
471
+ return """
472
+ MATCH (p:Place)
473
+ MATCH ()-[r:Fare]->()
474
+ RETURN
475
+ count(DISTINCT p) as total_places,
476
+ count(r) as total_routes,
477
+ round(avg(r.fare), 2) as average_fare,
478
+ min(r.fare) as minimum_fare,
479
+ max(r.fare) as maximum_fare,
480
+ round(stdDev(r.fare), 2) as fare_standard_deviation
481
+ """
482
+
483
+ def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
484
+ """Generate recommendation Cypher query"""
485
+ if 'cheap' in query or 'budget' in query or 'affordable' in query:
486
+ return """
487
+ MATCH (a:Place)-[r:Fare]->(b:Place)
488
+ RETURN
489
+ a.name as from_place,
490
+ b.name as to_place,
491
+ r.fare as fare
492
+ ORDER BY r.fare ASC
493
+ LIMIT 10
494
+ """
495
+ elif 'popular' in query or 'frequent' in query:
496
+ return """
497
+ MATCH (a:Place)-[r:Fare]->(b:Place)
498
+ RETURN
499
+ a.name as from_place,
500
+ b.name as to_place,
501
+ r.fare as fare
502
+ ORDER BY r.fare DESC
503
+ LIMIT 10
504
+ """
505
+ else:
506
+ return """
507
+ MATCH (a:Place)-[r:Fare]->(b:Place)
508
+ RETURN
509
+ a.name as from_place,
510
+ b.name as to_place,
511
+ r.fare as fare
512
+ ORDER BY r.fare ASC
513
+ LIMIT 5
514
+ """
515
+
516
+ def _generate_range_query(self, entities: Dict) -> Optional[str]:
517
+ """Generate range search Cypher query"""
518
+ numbers = entities.get('numbers', [])
519
+
520
+ if numbers:
521
+ # Check for between range
522
+ between_min = None
523
+ between_max = None
524
+ single_value = None
525
+ single_comparator = None
526
+
527
+ for number in numbers:
528
+ comparator = number.get('comparator', '')
529
+ value = number['value']
530
+
531
+ if comparator == 'between_min':
532
+ between_min = value
533
+ elif comparator == 'between_max':
534
+ between_max = value
535
+ elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
536
+ single_value = value
537
+ single_comparator = comparator
538
+
539
+ # Generate query based on type
540
+ if between_min is not None and between_max is not None:
541
+ return f"""
542
+ MATCH (a:Place)-[r:Fare]->(b:Place)
543
+ WHERE r.fare >= {between_min} AND r.fare <= {between_max}
544
+ RETURN
545
+ a.name as from_place,
546
+ b.name as to_place,
547
+ r.fare as fare
548
+ ORDER BY r.fare ASC
549
+ """
550
+ elif single_value is not None and single_comparator is not None:
551
+ if single_comparator in ['under', 'below', 'less than']:
552
+ return f"""
553
+ MATCH (a:Place)-[r:Fare]->(b:Place)
554
+ WHERE r.fare < {single_value}
555
+ RETURN
556
+ a.name as from_place,
557
+ b.name as to_place,
558
+ r.fare as fare
559
+ ORDER BY r.fare ASC
560
+ """
561
+ elif single_comparator in ['over', 'above', 'more than']:
562
+ return f"""
563
+ MATCH (a:Place)-[r:Fare]->(b:Place)
564
+ WHERE r.fare > {single_value}
565
+ RETURN
566
+ a.name as from_place,
567
+ b.name as to_place,
568
+ r.fare as fare
569
+ ORDER BY r.fare DESC
570
+ """
571
+
572
+ return None
573
+
574
+ def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
575
+ """Generate Cypher query using LLM for better understanding"""
576
+ try:
577
+ if not self.config.OPENAI_API_KEY:
578
+ return None
579
+
580
+ # Get available places for context
581
+ available_places = list(self.neo4j_service.get_all_places())
582
+
583
+ # Create comprehensive prompt for Cypher generation
584
+ prompt = f"""
585
+ You are a Neo4j Cypher query generator for a transport database.
586
+
587
+ Database Schema:
588
+ - Nodes: Place (with property 'name')
589
+ - Relationships: Fare (with property 'fare')
590
+
591
+ Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
592
+
593
+ User Query: "{query}"
594
+ Detected Intent: {intent.get('primary', 'unknown')}
595
+ Extracted Entities: {entities}
596
+
597
+ Your task is to generate a valid Cypher query that answers the user's question.
598
+
599
+ Query Types and Examples:
600
+
601
+ 1. FARE INQUIRY:
602
+ - "What is the fare from Colombo to Kandy?"
603
+ - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
604
+
605
+ 2. COMPARISON:
606
+ - "Compare fares from Colombo to Kandy vs Colombo to Galle"
607
+ - Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
608
+
609
+ 3. RANGE SEARCH:
610
+ - "Find routes under 500 rupees"
611
+ - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
612
+
613
+ 4. RECOMMENDATION:
614
+ - "Recommend cheap routes"
615
+ - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
616
+
617
+ 5. STATISTICS:
618
+ - "What is the average fare?"
619
+ - Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
620
+
621
+ 6. ROUTE INQUIRY:
622
+ - "Routes from Colombo"
623
+ - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
624
+
625
+ Important Rules:
626
+ 1. Always use proper Cypher syntax
627
+ 2. Use exact place names from the available places list
628
+ 3. For comparisons, use multiple MATCH clauses
629
+ 4. For ranges, use WHERE clauses with appropriate operators
630
+ 5. For statistics, use aggregation functions
631
+ 6. Always include meaningful column aliases
632
+ 7. Use ORDER BY for sorted results
633
+ 8. Use LIMIT for large result sets
634
+
635
+ Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
636
+ """
637
+
638
+ cypher_query = None
639
+ # Prefer new SDK
640
+ try:
641
+ from openai import OpenAI
642
+ client = OpenAI(api_key=self.config.OPENAI_API_KEY)
643
+ response = client.chat.completions.create(
644
+ model=self.config.OPENAI_MODEL,
645
+ messages=[
646
+ {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
647
+ {"role": "user", "content": prompt}
648
+ ],
649
+ max_tokens=300,
650
+ temperature=0.1
651
+ )
652
+ cypher_query = response.choices[0].message.content.strip()
653
+ except Exception as sdk_err:
654
+ import openai
655
+ try:
656
+ openai.api_key = self.config.OPENAI_API_KEY
657
+ response = openai.ChatCompletion.create(
658
+ model=self.config.OPENAI_MODEL,
659
+ messages=[
660
+ {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
661
+ {"role": "user", "content": prompt}
662
+ ],
663
+ max_tokens=300,
664
+ temperature=0.1
665
+ )
666
+ cypher_query = response.choices[0].message.content.strip()
667
+ except Exception:
668
+ raise sdk_err
669
+
670
+ # Validate the response
671
+ if cypher_query.upper() == "FALLBACK":
672
+ return None
673
+
674
+ # Basic validation - check if it starts with MATCH
675
+ if cypher_query.upper().startswith('MATCH'):
676
+ return cypher_query
677
+
678
+ return None
679
+
680
+ except Exception as e:
681
+ print(f"LLM Cypher generation error: {e}")
682
+ return None
683
+
684
+ def _generate_fallback_query(self, query: str) -> Optional[str]:
685
+ """Generate fallback query when intent is unclear"""
686
+ # Try to extract locations using spell corrector
687
+ locations = self.spell_corrector.extract_locations_from_query(query)
688
+
689
+ if len(locations) >= 2:
690
+ from_loc = locations[0][1]
691
+ to_loc = locations[1][1]
692
+ return f"""
693
+ MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
694
+ RETURN
695
+ a.name as from_place,
696
+ b.name as to_place,
697
+ r.fare as fare
698
+ """
699
+
700
+ # Additional fallback: direct pattern matching for fare queries
701
+ if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
702
+ import re
703
+ fare_patterns = [
704
+ r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
705
+ r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
706
+ r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
707
+ r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
708
+ r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
709
+ ]
710
+
711
+ for pattern in fare_patterns:
712
+ match = re.search(pattern, query.lower())
713
+ if match:
714
+ from_loc = match.group(1).strip()
715
+ to_loc = match.group(2).strip()
716
+
717
+ # Correct locations
718
+ from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
719
+ to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
720
+
721
+ if from_conf > 0.5 and to_conf > 0.5:
722
+ return f"""
723
+ MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
724
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
725
+ """
726
+
727
+ return None
728
+
729
+ def _execute_query(self, cypher_query: str) -> List[Dict]:
730
+ """Execute Cypher query and return results"""
731
+ try:
732
+ with self.neo4j_service.driver.session() as session:
733
+ result = session.run(cypher_query)
734
+ return [dict(record) for record in result]
735
+ except Exception as e:
736
+ print(f"Query execution error: {e}")
737
+ return []
738
+
739
+ def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
740
+ """Format the response based on intent and results"""
741
+ primary_intent = intent['primary']
742
+
743
+ if not results:
744
+ return {
745
+ 'success': False,
746
+ 'message': 'No results found for your query.',
747
+ 'suggestions': self._get_suggestions()
748
+ }
749
+
750
+ if primary_intent == 'fare_inquiry':
751
+ return self._format_fare_response(results, entities)
752
+ elif primary_intent == 'comparison':
753
+ return self._format_comparison_response(results, entities)
754
+ elif primary_intent == 'route_inquiry':
755
+ return self._format_route_response(results, entities)
756
+ elif primary_intent == 'statistics':
757
+ return self._format_statistics_response(results)
758
+ elif primary_intent == 'recommendation':
759
+ return self._format_recommendation_response(results, query)
760
+ elif primary_intent == 'range_search':
761
+ return self._format_range_response(results, entities)
762
+ else:
763
+ return self._format_generic_response(results)
764
+
765
+ def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
766
+ """Format fare inquiry response"""
767
+ if results:
768
+ result = results[0]
769
+ return {
770
+ 'success': True,
771
+ 'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
772
+ 'data': results,
773
+ 'query_type': 'fare_inquiry',
774
+ 'summary': {
775
+ 'from_place': result['from_place'],
776
+ 'to_place': result['to_place'],
777
+ 'fare': result['fare']
778
+ }
779
+ }
780
+ return {'success': False, 'message': 'Fare information not found.'}
781
+
782
+ def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
783
+ """Format comparison response"""
784
+ if results:
785
+ result = results[0]
786
+ return {
787
+ 'success': True,
788
+ 'message': result.get('comparison', 'Comparison completed'),
789
+ 'data': results,
790
+ 'query_type': 'comparison',
791
+ 'summary': {
792
+ 'route1': result.get('route1'),
793
+ 'route2': result.get('route2'),
794
+ 'difference': result.get('difference')
795
+ }
796
+ }
797
+ return {'success': False, 'message': 'Comparison not possible.'}
798
+
799
+ def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
800
+ """Format route inquiry response"""
801
+ return {
802
+ 'success': True,
803
+ 'message': f"Found {len(results)} routes",
804
+ 'data': results,
805
+ 'query_type': 'route_inquiry',
806
+ 'summary': {
807
+ 'total_routes': len(results),
808
+ 'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
809
+ }
810
+ }
811
+
812
+ def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
813
+ """Format statistics response"""
814
+ if results:
815
+ stats = results[0]
816
+ return {
817
+ 'success': True,
818
+ 'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
819
+ 'data': results,
820
+ 'query_type': 'statistics',
821
+ 'summary': {
822
+ 'total_places': stats['total_places'],
823
+ 'total_routes': stats['total_routes'],
824
+ 'average_fare': stats['average_fare'],
825
+ 'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
826
+ }
827
+ }
828
+ return {'success': False, 'message': 'Statistics not available.'}
829
+
830
+ def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
831
+ """Format recommendation response"""
832
+ return {
833
+ 'success': True,
834
+ 'message': f"Here are {len(results)} recommended routes",
835
+ 'data': results,
836
+ 'query_type': 'recommendation',
837
+ 'summary': {
838
+ 'recommendations_count': len(results),
839
+ 'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
840
+ }
841
+ }
842
+
843
+ def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
844
+ """Format range search response"""
845
+ return {
846
+ 'success': True,
847
+ 'message': f"Found {len(results)} routes in your specified range",
848
+ 'data': results,
849
+ 'query_type': 'range_search',
850
+ 'summary': {
851
+ 'routes_found': len(results),
852
+ 'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
853
+ }
854
+ }
855
+
856
+ def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
857
+ """Format generic response"""
858
+ return {
859
+ 'success': True,
860
+ 'message': f"Found {len(results)} results",
861
+ 'data': results,
862
+ 'query_type': 'generic'
863
+ }
864
+
865
+ def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
866
+ """Handle unclear or ambiguous queries"""
867
+ return {
868
+ 'success': False,
869
+ 'message': 'I could not understand your query. Please try rephrasing it.',
870
+ 'suggestions': self._get_suggestions(),
871
+ 'query_type': 'unclear'
872
+ }
873
+
874
+ def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
875
+ """Calculate confidence score for the query interpretation"""
876
+ confidence = 0.0
877
+
878
+ # Intent confidence
879
+ confidence += intent.get('confidence', 0) * 0.4
880
+
881
+ # Entity confidence
882
+ locations = entities.get('locations', [])
883
+ if locations:
884
+ avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
885
+ confidence += avg_location_confidence * 0.4
886
+
887
+ # Query complexity bonus
888
+ if len(locations) >= 2:
889
+ confidence += 0.2
890
+
891
+ return min(confidence, 1.0)
892
+
893
+ def _get_suggestions(self) -> List[str]:
894
+ """Get query suggestions"""
895
+ return [
896
+ "What is the fare from Colombo to Kandy?",
897
+ "Compare fares from Colombo to Kandy vs Colombo to Galle",
898
+ "Show me routes from Panadura",
899
+ "Find routes under 500 rupees",
900
+ "What are the cheapest routes?",
901
+ "Show me popular destinations",
902
+ "Give me database statistics",
903
+ "Recommend affordable routes"
904
+ ]
llm_query_processor.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ LLM-Based Query Processor for Transport Query Application
4
+ Uses AI to interpret queries and generate Cypher queries
5
+ """
6
+
7
+ import re
8
+ from typing import Dict, List, Tuple, Optional
9
+ from spell_corrector import SpellCorrector
10
+ from neo4j_service import Neo4jService
11
+ from config import Config
12
+
13
+ class LLMQueryProcessor:
14
+ """Process natural language queries using LLM for interpretation and Cypher generation"""
15
+
16
+ def __init__(self):
17
+ self.config = Config()
18
+ self.spell_corrector = SpellCorrector()
19
+ self.neo4j_service = Neo4jService()
20
+
21
+ def process_query(self, user_query: str) -> Dict:
22
+ """
23
+ Process a natural language query using LLM for interpretation
24
+
25
+ Returns:
26
+ Dictionary with query results and metadata
27
+ """
28
+ try:
29
+ # First, extract and correct locations from the query
30
+ locations = self.spell_corrector.extract_locations_from_query(user_query)
31
+
32
+ # Use LLM to interpret the query and generate Cypher
33
+ interpretation = self._interpret_query_with_llm(user_query, locations)
34
+
35
+ if interpretation['success']:
36
+ # Execute the generated Cypher query
37
+ result = self._execute_cypher_query(interpretation['cypher_query'])
38
+
39
+ return {
40
+ 'success': True,
41
+ 'message': interpretation['message'],
42
+ 'cypher_query': interpretation['cypher_query'],
43
+ 'data': result,
44
+ 'corrections': self._format_corrections(locations),
45
+ 'query_type': interpretation['query_type']
46
+ }
47
+ else:
48
+ return {
49
+ 'success': False,
50
+ 'message': interpretation['message'],
51
+ 'suggestions': self._get_query_suggestions()
52
+ }
53
+
54
+ except Exception as e:
55
+ print(f"Query processing error: {e}")
56
+ return {
57
+ 'success': False,
58
+ 'message': 'An error occurred while processing your query.',
59
+ 'suggestions': self._get_query_suggestions()
60
+ }
61
+
62
+ def _interpret_query_with_llm(self, query: str, locations: List[Tuple]) -> Dict:
63
+ """Use LLM to interpret the query and generate appropriate Cypher"""
64
+ try:
65
+ if not self.config.OPENAI_API_KEY:
66
+ return self._fallback_interpretation(query, locations)
67
+
68
+ # Get available places for context
69
+ available_places = list(self.neo4j_service.get_all_places())
70
+
71
+ # Create comprehensive prompt for query interpretation
72
+ prompt = f"""
73
+ You are an intelligent transport query interpreter for a Neo4j database containing Sri Lankan transport data.
74
+
75
+ Database Schema:
76
+ - Nodes: Place (with property 'name')
77
+ - Relationships: Fare (with property 'fare')
78
+
79
+ Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
80
+
81
+ User Query: "{query}"
82
+
83
+ Extracted Locations: {[f"{orig}->{corr}" for orig, corr, conf, method in locations]}
84
+
85
+ Your task is to:
86
+ 1. Determine the query type (fare, cheapest, expensive, places, routes_from, routes_to, statistics, lowest_fare)
87
+ 2. Generate the appropriate Cypher query
88
+ 3. Provide a clear response message
89
+
90
+ Query Types:
91
+ - fare: Find fare between two specific locations
92
+ - cheapest: Find cheapest routes (top 10)
93
+ - expensive: Find most expensive routes (top 10)
94
+ - places: List all places
95
+ - routes_from: Find routes departing from a location
96
+ - routes_to: Find routes arriving at a location
97
+ - statistics: Get database statistics
98
+ - lowest_fare: Find the single lowest fare with route details
99
+
100
+ Return your response in this exact JSON format:
101
+ {{
102
+ "query_type": "fare|cheapest|expensive|places|routes_from|routes_to|statistics|lowest_fare",
103
+ "cypher_query": "MATCH ... RETURN ...",
104
+ "message": "Clear response message for the user"
105
+ }}
106
+
107
+ Examples:
108
+ - "What is the fare from Colombo to Kandy?" → fare query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
109
+ - "fare of anuradhapura to kandy?" → fare query: MATCH (a:Place {name: 'Anuradapura'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
110
+ - "Show me the cheapest routes" → cheapest query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
111
+ - "What is the lowest fare?" → lowest_fare query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 1
112
+ - "List all places" → places query: MATCH (p:Place) RETURN DISTINCT p.name as place ORDER BY p.name
113
+ - "Routes from Colombo" → routes_from query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
114
+ - "Database statistics" → statistics query: MATCH (p:Place) MATCH ()-[r:Fare]->() RETURN count(DISTINCT p) as total_places, count(r) as total_routes, avg(r.fare) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
115
+
116
+ Keep Cypher queries simple and avoid complex functions like shortestPath. Use direct relationships only.
117
+
118
+ For fare queries, recognize various formats like "fare of X to Y", "fare from X to Y", "price from X to Y", etc.
119
+ """
120
+
121
+ # Call LLM using new SDK first, legacy as fallback
122
+ import json
123
+ interpretation = None
124
+ try:
125
+ from openai import OpenAI
126
+ client = OpenAI(api_key=self.config.OPENAI_API_KEY)
127
+ response = client.chat.completions.create(
128
+ model=self.config.OPENAI_MODEL,
129
+ messages=[
130
+ {"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
131
+ {"role": "user", "content": prompt}
132
+ ],
133
+ max_tokens=500,
134
+ temperature=0.1
135
+ )
136
+ interpretation = json.loads(response.choices[0].message.content.strip())
137
+ except Exception as sdk_err:
138
+ try:
139
+ import openai
140
+ openai.api_key = self.config.OPENAI_API_KEY
141
+ response = openai.ChatCompletion.create(
142
+ model=self.config.OPENAI_MODEL,
143
+ messages=[
144
+ {"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
145
+ {"role": "user", "content": prompt}
146
+ ],
147
+ max_tokens=500,
148
+ temperature=0.1
149
+ )
150
+ interpretation = json.loads(response.choices[0].message.content.strip())
151
+ except Exception:
152
+ raise sdk_err
153
+
154
+ # Validate the response
155
+ if interpretation and 'query_type' in interpretation and 'cypher_query' in interpretation and 'message' in interpretation:
156
+ return {
157
+ 'success': True,
158
+ 'query_type': interpretation['query_type'],
159
+ 'cypher_query': interpretation['cypher_query'],
160
+ 'message': interpretation['message']
161
+ }
162
+ else:
163
+ return self._fallback_interpretation(query, locations)
164
+
165
+ except Exception as e:
166
+ print(f"LLM interpretation error: {e}")
167
+ return self._fallback_interpretation(query, locations)
168
+
169
+ def _fallback_interpretation(self, query: str, locations: List[Tuple]) -> Dict:
170
+ """Fallback interpretation when LLM is not available"""
171
+ query_lower = query.lower()
172
+
173
+ # Simple keyword-based interpretation
174
+ if 'lowest' in query_lower or 'minimum' in query_lower or 'cheapest' in query_lower:
175
+ if 'lowest fare' in query_lower or 'minimum fare' in query_lower:
176
+ return {
177
+ 'success': True,
178
+ 'query_type': 'lowest_fare',
179
+ 'cypher_query': """
180
+ MATCH (a:Place)-[r:Fare]->(b:Place)
181
+ WITH a, b, r, r.fare as fare
182
+ ORDER BY r.fare ASC
183
+ LIMIT 1
184
+ RETURN a.name as from_place, b.name as to_place, fare
185
+ """,
186
+ 'message': 'Finding the lowest fare in the database...'
187
+ }
188
+ else:
189
+ return {
190
+ 'success': True,
191
+ 'query_type': 'cheapest',
192
+ 'cypher_query': """
193
+ MATCH (a:Place)-[r:Fare]->(b:Place)
194
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
195
+ ORDER BY r.fare ASC
196
+ LIMIT 10
197
+ """,
198
+ 'message': 'Finding the cheapest routes...'
199
+ }
200
+ elif 'expensive' in query_lower or 'highest' in query_lower or 'maximum' in query_lower:
201
+ return {
202
+ 'success': True,
203
+ 'query_type': 'expensive',
204
+ 'cypher_query': """
205
+ MATCH (a:Place)-[r:Fare]->(b:Place)
206
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
207
+ ORDER BY r.fare DESC
208
+ LIMIT 10
209
+ """,
210
+ 'message': 'Finding the most expensive routes...'
211
+ }
212
+ elif 'places' in query_lower or 'locations' in query_lower or 'list all' in query_lower:
213
+ return {
214
+ 'success': True,
215
+ 'query_type': 'places',
216
+ 'cypher_query': """
217
+ MATCH (p:Place)
218
+ RETURN DISTINCT p.name as place
219
+ ORDER BY p.name
220
+ """,
221
+ 'message': 'Listing all places...'
222
+ }
223
+ elif 'statistics' in query_lower or 'stats' in query_lower:
224
+ return {
225
+ 'success': True,
226
+ 'query_type': 'statistics',
227
+ 'cypher_query': """
228
+ MATCH (p:Place)
229
+ MATCH ()-[r:Fare]->()
230
+ RETURN
231
+ count(DISTINCT p) as total_places,
232
+ count(r) as total_routes,
233
+ avg(r.fare) as average_fare,
234
+ min(r.fare) as min_fare,
235
+ max(r.fare) as max_fare
236
+ """,
237
+ 'message': 'Getting database statistics...'
238
+ }
239
+ elif len(locations) >= 2:
240
+ # Fare query between two locations
241
+ from_location = locations[0][1]
242
+ to_location = locations[1][1]
243
+ return {
244
+ 'success': True,
245
+ 'query_type': 'fare',
246
+ 'cypher_query': f"""
247
+ MATCH (a:Place {{name: '{from_location}'}})-[r:Fare]->(b:Place {{name: '{to_location}'}})
248
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
249
+ """,
250
+ 'message': f'Finding fare from {from_location} to {to_location}...'
251
+ }
252
+ elif 'fare' in query_lower and 'to' in query_lower:
253
+ # Handle queries like "fare of X to Y" where locations might not be extracted properly
254
+ # Try to extract locations using a simpler pattern
255
+ import re
256
+ fare_patterns = [
257
+ r'fare\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
258
+ r'price\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
259
+ r'cost\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
260
+ r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
261
+ r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
262
+ r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)',
263
+ r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)'
264
+ ]
265
+
266
+ for pattern in fare_patterns:
267
+ match = re.search(pattern, query_lower)
268
+ if match:
269
+ from_loc = match.group(1).strip()
270
+ to_loc = match.group(2).strip()
271
+
272
+ # Correct the locations
273
+ from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
274
+ to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
275
+
276
+ if from_conf > 0.5 and to_conf > 0.5:
277
+ return {
278
+ 'success': True,
279
+ 'query_type': 'fare',
280
+ 'cypher_query': f"""
281
+ MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
282
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
283
+ """,
284
+ 'message': f'Finding fare from {from_corrected} to {to_corrected}...'
285
+ }
286
+ elif len(locations) == 1:
287
+ # Routes from/to a single location
288
+ location = locations[0][1]
289
+ if 'from' in query_lower:
290
+ return {
291
+ 'success': True,
292
+ 'query_type': 'routes_from',
293
+ 'cypher_query': f"""
294
+ MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
295
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
296
+ ORDER BY r.fare
297
+ """,
298
+ 'message': f'Finding routes from {location}...'
299
+ }
300
+ else:
301
+ return {
302
+ 'success': True,
303
+ 'query_type': 'routes_to',
304
+ 'cypher_query': f"""
305
+ MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
306
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
307
+ ORDER BY r.fare
308
+ """,
309
+ 'message': f'Finding routes to {location}...'
310
+ }
311
+ else:
312
+ return {
313
+ 'success': False,
314
+ 'message': 'I could not understand your query. Please try rephrasing it.'
315
+ }
316
+
317
+ def _execute_cypher_query(self, cypher_query: str) -> List[Dict]:
318
+ """Execute the generated Cypher query"""
319
+ try:
320
+ with self.neo4j_service.driver.session() as session:
321
+ result = session.run(cypher_query)
322
+ return [dict(record) for record in result]
323
+ except Exception as e:
324
+ print(f"Cypher execution error: {e}")
325
+ return []
326
+
327
+ def _format_corrections(self, locations: List[Tuple]) -> List[Dict]:
328
+ """Format location corrections for display"""
329
+ corrections = []
330
+ for original, corrected, confidence, method in locations:
331
+ if original.lower() != corrected.lower():
332
+ corrections.append({
333
+ 'original': original,
334
+ 'corrected': corrected,
335
+ 'confidence': confidence,
336
+ 'method': method
337
+ })
338
+ return corrections
339
+
340
+ def _get_query_suggestions(self) -> List[str]:
341
+ """Get query suggestions"""
342
+ return [
343
+ "What is the fare from Colombo to Kandy?",
344
+ "What is the lowest fare price?",
345
+ "Show me the cheapest routes",
346
+ "Show me the most expensive routes",
347
+ "List all places",
348
+ "Routes from Panadura",
349
+ "Routes to Galle",
350
+ "Database statistics"
351
+ ]
logger.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Centralized logging setup for the Transport Query Application.
4
+ Provides a rotating file handler and console output.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ from logging.handlers import RotatingFileHandler
10
+
11
+
12
+ def get_logger(name: str) -> logging.Logger:
13
+ """Create or retrieve a configured logger with file and console handlers."""
14
+ logger = logging.getLogger(name)
15
+
16
+ if getattr(logger, "_configured", False):
17
+ return logger
18
+
19
+ log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
20
+ log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
21
+ try:
22
+ log_dir = os.path.abspath(log_dir)
23
+ os.makedirs(log_dir, exist_ok=True)
24
+ except Exception:
25
+ # Fallback to current directory if path invalid
26
+ log_dir = os.getcwd()
27
+
28
+ log_path = os.path.join(log_dir, "app.log")
29
+
30
+ formatter = logging.Formatter(
31
+ fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
32
+ datefmt="%Y-%m-%d %H:%M:%S",
33
+ )
34
+
35
+ # File handler with rotation (1 MB, keep 5 backups)
36
+ file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
37
+ file_handler.setFormatter(formatter)
38
+
39
+ # Console handler
40
+ console_handler = logging.StreamHandler()
41
+ console_handler.setFormatter(formatter)
42
+
43
+ # Configure logger
44
+ logger.setLevel(getattr(logging, log_level_str, logging.INFO))
45
+ logger.addHandler(file_handler)
46
+ logger.addHandler(console_handler)
47
+ logger.propagate = False
48
+
49
+ logger._configured = True # type: ignore[attr-defined]
50
+ logger.debug(f"Logger initialized. Level={log_level_str}, File={log_path}")
51
+ return logger
52
+
53
+
neo4j_service.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Neo4j Service for Transport Query Application
4
+ Handles all database operations
5
+ """
6
+
7
+ from neo4j import GraphDatabase
8
+ from typing import List, Dict, Optional, Tuple
9
+ from config import Config
10
+
11
+ class Neo4jService:
12
+ """Neo4j database service"""
13
+
14
+ def __init__(self):
15
+ self.config = Config()
16
+ self.driver = None
17
+ self._connect()
18
+
19
+ def _connect(self):
20
+ """Connect to Neo4j database"""
21
+ try:
22
+ self.driver = GraphDatabase.driver(
23
+ self.config.NEO4J_URI,
24
+ auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
25
+ )
26
+ # Test connection
27
+ with self.driver.session() as session:
28
+ session.run("RETURN 1")
29
+ print("✅ Connected to Neo4j database")
30
+ except Exception as e:
31
+ print(f"❌ Failed to connect to Neo4j: {e}")
32
+ self.driver = None
33
+
34
+ def is_connected(self) -> bool:
35
+ """Check if connected to Neo4j"""
36
+ return self.driver is not None
37
+
38
+ def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
39
+ """Get fare between two locations"""
40
+ if not self.is_connected():
41
+ return None
42
+
43
+ try:
44
+ with self.driver.session() as session:
45
+ result = session.run("""
46
+ MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
47
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
48
+ """, from_location=from_location, to_location=to_location)
49
+
50
+ record = result.single()
51
+ if record:
52
+ return {
53
+ 'from_place': record['from_place'],
54
+ 'to_place': record['to_place'],
55
+ 'fare': record['fare']
56
+ }
57
+ return None
58
+
59
+ except Exception as e:
60
+ print(f"Error getting fare: {e}")
61
+ return None
62
+
63
+ def get_all_places(self) -> List[str]:
64
+ """Get all available places"""
65
+ if not self.is_connected():
66
+ return []
67
+
68
+ try:
69
+ with self.driver.session() as session:
70
+ result = session.run("""
71
+ MATCH (p:Place)
72
+ RETURN DISTINCT p.name as place
73
+ ORDER BY p.name
74
+ """)
75
+
76
+ return [record['place'] for record in result]
77
+
78
+ except Exception as e:
79
+ print(f"Error getting places: {e}")
80
+ return []
81
+
82
+ def get_routes_from_location(self, from_location: str) -> List[Dict]:
83
+ """Get all routes from a specific location"""
84
+ if not self.is_connected():
85
+ return []
86
+
87
+ try:
88
+ with self.driver.session() as session:
89
+ result = session.run("""
90
+ MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
91
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
92
+ ORDER BY r.fare
93
+ """, from_location=from_location)
94
+
95
+ return [dict(record) for record in result]
96
+
97
+ except Exception as e:
98
+ print(f"Error getting routes from location: {e}")
99
+ return []
100
+
101
+ def get_routes_to_location(self, to_location: str) -> List[Dict]:
102
+ """Get all routes to a specific location"""
103
+ if not self.is_connected():
104
+ return []
105
+
106
+ try:
107
+ with self.driver.session() as session:
108
+ result = session.run("""
109
+ MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
110
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
111
+ ORDER BY r.fare
112
+ """, to_location=to_location)
113
+
114
+ return [dict(record) for record in result]
115
+
116
+ except Exception as e:
117
+ print(f"Error getting routes to location: {e}")
118
+ return []
119
+
120
+ def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
121
+ """Get cheapest routes"""
122
+ if not self.is_connected():
123
+ return []
124
+
125
+ try:
126
+ with self.driver.session() as session:
127
+ result = session.run("""
128
+ MATCH (a:Place)-[r:Fare]->(b:Place)
129
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
130
+ ORDER BY r.fare ASC
131
+ LIMIT $limit
132
+ """, limit=limit)
133
+
134
+ return [dict(record) for record in result]
135
+
136
+ except Exception as e:
137
+ print(f"Error getting cheapest routes: {e}")
138
+ return []
139
+
140
+ def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
141
+ """Get most expensive routes"""
142
+ if not self.is_connected():
143
+ return []
144
+
145
+ try:
146
+ with self.driver.session() as session:
147
+ result = session.run("""
148
+ MATCH (a:Place)-[r:Fare]->(b:Place)
149
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
150
+ ORDER BY r.fare DESC
151
+ LIMIT $limit
152
+ """, limit=limit)
153
+
154
+ return [dict(record) for record in result]
155
+
156
+ except Exception as e:
157
+ print(f"Error getting most expensive routes: {e}")
158
+ return []
159
+
160
+ def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
161
+ """Search routes within a fare range"""
162
+ if not self.is_connected():
163
+ return []
164
+
165
+ try:
166
+ with self.driver.session() as session:
167
+ result = session.run("""
168
+ MATCH (a:Place)-[r:Fare]->(b:Place)
169
+ WHERE r.fare >= $min_fare AND r.fare <= $max_fare
170
+ RETURN a.name as from_place, b.name as to_place, r.fare as fare
171
+ ORDER BY r.fare
172
+ """, min_fare=min_fare, max_fare=max_fare)
173
+
174
+ return [dict(record) for record in result]
175
+
176
+ except Exception as e:
177
+ print(f"Error searching routes by fare range: {e}")
178
+ return []
179
+
180
+ def get_route_statistics(self) -> Dict:
181
+ """Get database statistics"""
182
+ if not self.is_connected():
183
+ return {}
184
+
185
+ try:
186
+ with self.driver.session() as session:
187
+ # Count places
188
+ places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
189
+ place_count = places_result.single()['place_count']
190
+
191
+ # Count routes
192
+ routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
193
+ route_count = routes_result.single()['route_count']
194
+
195
+ # Average fare
196
+ avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
197
+ avg_fare = avg_result.single()['avg_fare']
198
+
199
+ # Min and max fares
200
+ fare_range_result = session.run("""
201
+ MATCH ()-[r:Fare]->()
202
+ RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
203
+ """)
204
+ fare_range = fare_range_result.single()
205
+
206
+ return {
207
+ 'total_places': place_count,
208
+ 'total_routes': route_count,
209
+ 'average_fare': round(avg_fare, 2) if avg_fare else 0,
210
+ 'min_fare': fare_range['min_fare'],
211
+ 'max_fare': fare_range['max_fare']
212
+ }
213
+
214
+ except Exception as e:
215
+ print(f"Error getting statistics: {e}")
216
+ return {}
217
+
218
+ def close(self):
219
+ """Close database connection"""
220
+ if self.driver:
221
+ self.driver.close()
222
+ self.driver = None
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask==2.3.3
2
+ neo4j==5.14.1
3
+ requests==2.31.0
4
+ openai==1.3.0
5
+ python-dotenv==1.0.0
6
+ fuzzywuzzy==0.18.0
7
+ python-Levenshtein==0.23.0
8
+ pandas==2.1.3
9
+ numpy==1.24.3
10
+ Werkzeug==2.3.7
spell_corrector.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Spell Correction Module for Transport Query Application
4
+ Handles location name corrections using fuzzy matching and LLM
5
+ """
6
+
7
+ import re
8
+ from fuzzywuzzy import fuzz
9
+ from typing import List, Tuple, Optional
10
+ import openai
11
+ from config import Config
12
+
13
+ class SpellCorrector:
14
+ """Spell correction for location names"""
15
+
16
+ def __init__(self):
17
+ self.config = Config()
18
+ self.location_mapping = self.config.LOCATION_MAPPING
19
+ self.available_locations = set(self.location_mapping.values())
20
+
21
+ # Initialize OpenAI if API key is available
22
+ if self.config.OPENAI_API_KEY:
23
+ try:
24
+ # Prefer new SDK client if installed; otherwise set legacy api key
25
+ try:
26
+ from openai import OpenAI # noqa: F401
27
+ self.llm_available = True
28
+ except Exception:
29
+ openai.api_key = self.config.OPENAI_API_KEY
30
+ self.llm_available = True
31
+ except Exception:
32
+ self.llm_available = False
33
+ else:
34
+ self.llm_available = False
35
+
36
+ def correct_location(self, location: str) -> Tuple[str, float, str]:
37
+ """
38
+ Correct a location name using multiple methods
39
+
40
+ Returns:
41
+ Tuple of (corrected_name, confidence_score, correction_method)
42
+ """
43
+ location = location.strip().lower()
44
+
45
+ # Method 1: Direct mapping
46
+ if location in self.location_mapping:
47
+ corrected = self.location_mapping[location]
48
+ return corrected, 1.0, "direct_mapping"
49
+
50
+ # Method 2: Fuzzy matching
51
+ best_match, confidence = self._fuzzy_match(location)
52
+ if confidence >= self.config.SIMILARITY_THRESHOLD:
53
+ return best_match, confidence, "fuzzy_matching"
54
+
55
+ # Method 3: LLM correction (if available)
56
+ if self.llm_available:
57
+ llm_corrected = self._llm_correct(location)
58
+ if llm_corrected:
59
+ # Verify LLM suggestion with fuzzy matching
60
+ llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
61
+ if llm_confidence >= 0.6: # Lower threshold for LLM suggestions
62
+ return llm_corrected, llm_confidence, "llm_correction"
63
+
64
+ # Method 4: Partial matching
65
+ partial_match = self._partial_match(location)
66
+ if partial_match:
67
+ return partial_match, 0.7, "partial_matching"
68
+
69
+ # No correction found
70
+ return location.title(), 0.0, "no_correction"
71
+
72
+ def _fuzzy_match(self, location: str) -> Tuple[str, float]:
73
+ """Find best fuzzy match for location"""
74
+ best_match = None
75
+ best_score = 0
76
+
77
+ for available_location in self.available_locations:
78
+ score = fuzz.ratio(location.lower(), available_location.lower()) / 100
79
+ if score > best_score:
80
+ best_score = score
81
+ best_match = available_location
82
+
83
+ return best_match, best_score
84
+
85
+ def _partial_match(self, location: str) -> Optional[str]:
86
+ """Find partial matches (substring matching)"""
87
+ location_lower = location.lower()
88
+
89
+ for available_location in self.available_locations:
90
+ available_lower = available_location.lower()
91
+
92
+ # Check if location is contained in available location
93
+ if location_lower in available_lower or available_lower in location_lower:
94
+ return available_location
95
+
96
+ return None
97
+
98
+ def _llm_correct(self, location: str) -> Optional[str]:
99
+ """Use LLM to correct location name"""
100
+ try:
101
+ prompt = f"""
102
+ You are a location name correction system for Sri Lankan cities and towns.
103
+ Given a potentially misspelled location name, return the correct spelling.
104
+
105
+ Available locations include: {', '.join(sorted(self.available_locations))}
106
+
107
+ Input location: "{location}"
108
+
109
+ Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
110
+ """
111
+
112
+ corrected = None
113
+ # Try new SDK first
114
+ try:
115
+ from openai import OpenAI
116
+ client = OpenAI(api_key=self.config.OPENAI_API_KEY)
117
+ response = client.chat.completions.create(
118
+ model=self.config.OPENAI_MODEL,
119
+ messages=[
120
+ {"role": "system", "content": "You are a helpful assistant that corrects location names."},
121
+ {"role": "user", "content": prompt}
122
+ ],
123
+ max_tokens=50,
124
+ temperature=0.1
125
+ )
126
+ corrected = response.choices[0].message.content.strip()
127
+ except Exception as sdk_err:
128
+ # Fallback to legacy API if present
129
+ import openai
130
+ try:
131
+ openai.api_key = self.config.OPENAI_API_KEY
132
+ response = openai.ChatCompletion.create(
133
+ model=self.config.OPENAI_MODEL,
134
+ messages=[
135
+ {"role": "system", "content": "You are a helpful assistant that corrects location names."},
136
+ {"role": "user", "content": prompt}
137
+ ],
138
+ max_tokens=50,
139
+ temperature=0.1
140
+ )
141
+ corrected = response.choices[0].message.content.strip()
142
+ except Exception:
143
+ raise sdk_err
144
+
145
+ # Validate LLM response
146
+ if corrected.upper() == "UNKNOWN":
147
+ return None
148
+
149
+ # Check if corrected location exists in our database
150
+ if corrected in self.available_locations:
151
+ return corrected
152
+
153
+ # Try fuzzy matching on LLM response
154
+ llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
155
+ if confidence >= 0.8:
156
+ return llm_fuzzy_match
157
+
158
+ return None
159
+
160
+ except Exception as e:
161
+ print(f"LLM correction error: {e}")
162
+ return None
163
+
164
+ def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
165
+ """
166
+ Extract and correct locations from a natural language query
167
+
168
+ Returns:
169
+ List of tuples: (original, corrected, confidence, method)
170
+ """
171
+ # Common patterns for location extraction
172
+ patterns = [
173
+ r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
174
+ r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
175
+ r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
176
+ r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
177
+ r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
178
+ r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
179
+ r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
180
+ r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
181
+ r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
182
+ r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
183
+ ]
184
+
185
+ locations = []
186
+
187
+ # Try all patterns to find locations
188
+ for pattern in patterns:
189
+ match = re.search(pattern, query, re.IGNORECASE)
190
+ if match:
191
+ # Extract locations from the match
192
+ groups = match.groups()
193
+ if len(groups) >= 2:
194
+ from_location = groups[0].strip()
195
+ to_location = groups[1].strip()
196
+
197
+ # Skip if locations are too short or common words
198
+ if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
199
+ from_corrected, from_confidence, from_method = self.correct_location(from_location)
200
+ if from_confidence > 0.5:
201
+ locations.append((
202
+ from_location,
203
+ from_corrected,
204
+ from_confidence,
205
+ from_method
206
+ ))
207
+
208
+ if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
209
+ to_corrected, to_confidence, to_method = self.correct_location(to_location)
210
+ if to_confidence > 0.5:
211
+ locations.append((
212
+ to_location,
213
+ to_corrected,
214
+ to_confidence,
215
+ to_method
216
+ ))
217
+
218
+ # If we found locations, break to avoid duplicates
219
+ if len(locations) >= 2:
220
+ break
221
+
222
+ return locations
223
+
224
+ def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
225
+ """Get location suggestions for autocomplete"""
226
+ suggestions = []
227
+ partial_lower = partial_location.lower()
228
+
229
+ for location in self.available_locations:
230
+ location_lower = location.lower()
231
+
232
+ # Check if partial location is a prefix
233
+ if location_lower.startswith(partial_lower):
234
+ suggestions.append((location, 1.0))
235
+ # Check fuzzy similarity
236
+ elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
237
+ suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
238
+
239
+ # Sort by confidence and return top suggestions
240
+ suggestions.sort(key=lambda x: x[1], reverse=True)
241
+ return suggestions[:self.config.MAX_SUGGESTIONS]
242
+
243
+ def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
244
+ """Validate if a route exists in the database"""
245
+ from_corrected, from_confidence, _ = self.correct_location(from_location)
246
+ to_corrected, to_confidence, _ = self.correct_location(to_location)
247
+
248
+ if from_confidence < 0.5:
249
+ return False, f"Could not identify departure location: '{from_location}'"
250
+
251
+ if to_confidence < 0.5:
252
+ return False, f"Could not identify destination location: '{to_location}'"
253
+
254
+ if from_corrected == to_corrected:
255
+ return False, f"Departure and destination cannot be the same: '{from_corrected}'"
256
+
257
+ return True, f"Route: {from_corrected} → {to_corrected}"
templates/index.html ADDED
@@ -0,0 +1,977 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>🚌 Transport Query System</title>
7
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
8
+ <style>
9
+ * {
10
+ margin: 0;
11
+ padding: 0;
12
+ box-sizing: border-box;
13
+ }
14
+
15
+ body {
16
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
17
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
18
+ min-height: 100vh;
19
+ color: #333;
20
+ }
21
+
22
+ .container {
23
+ max-width: 1200px;
24
+ margin: 0 auto;
25
+ padding: 20px;
26
+ }
27
+
28
+ .header {
29
+ text-align: center;
30
+ margin-bottom: 30px;
31
+ color: white;
32
+ }
33
+
34
+ .header h1 {
35
+ font-size: 2.5rem;
36
+ margin-bottom: 10px;
37
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
38
+ }
39
+
40
+ .header p {
41
+ font-size: 1.1rem;
42
+ opacity: 0.9;
43
+ }
44
+
45
+ .main-content {
46
+ display: grid;
47
+ grid-template-columns: 1fr 1fr;
48
+ gap: 30px;
49
+ margin-bottom: 30px;
50
+ }
51
+
52
+ .query-section {
53
+ background: white;
54
+ border-radius: 15px;
55
+ padding: 30px;
56
+ box-shadow: 0 10px 30px rgba(0,0,0,0.2);
57
+ }
58
+
59
+ .query-section h2 {
60
+ color: #667eea;
61
+ margin-bottom: 20px;
62
+ font-size: 1.5rem;
63
+ }
64
+
65
+ .query-input {
66
+ position: relative;
67
+ margin-bottom: 20px;
68
+ }
69
+
70
+ .query-input input {
71
+ width: 100%;
72
+ padding: 15px 20px;
73
+ border: 2px solid #e1e5e9;
74
+ border-radius: 10px;
75
+ font-size: 1rem;
76
+ transition: all 0.3s ease;
77
+ }
78
+
79
+ .query-input input:focus {
80
+ outline: none;
81
+ border-color: #667eea;
82
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
83
+ }
84
+
85
+ .query-button {
86
+ width: 100%;
87
+ padding: 15px;
88
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
89
+ color: white;
90
+ border: none;
91
+ border-radius: 10px;
92
+ font-size: 1.1rem;
93
+ font-weight: 600;
94
+ cursor: pointer;
95
+ transition: all 0.3s ease;
96
+ }
97
+
98
+ .query-button:hover {
99
+ transform: translateY(-2px);
100
+ box-shadow: 0 5px 15px rgba(0,0,0,0.2);
101
+ }
102
+
103
+ .query-button:disabled {
104
+ opacity: 0.6;
105
+ cursor: not-allowed;
106
+ transform: none;
107
+ }
108
+
109
+ .results-section {
110
+ background: white;
111
+ border-radius: 15px;
112
+ padding: 30px;
113
+ box-shadow: 0 10px 30px rgba(0,0,0,0.2);
114
+ max-height: 600px;
115
+ overflow-y: auto;
116
+ }
117
+
118
+ .results-section h2 {
119
+ color: #667eea;
120
+ margin-bottom: 20px;
121
+ font-size: 1.5rem;
122
+ }
123
+
124
+ .result-item {
125
+ background: #f8f9fa;
126
+ border-radius: 10px;
127
+ padding: 20px;
128
+ margin-bottom: 15px;
129
+ border-left: 4px solid #667eea;
130
+ }
131
+
132
+ .result-item.success {
133
+ border-left-color: #28a745;
134
+ }
135
+
136
+ .result-item.error {
137
+ border-left-color: #dc3545;
138
+ }
139
+
140
+ .result-message {
141
+ font-size: 1.1rem;
142
+ margin-bottom: 10px;
143
+ font-weight: 500;
144
+ }
145
+
146
+ .result-data {
147
+ background: #e9ecef;
148
+ border-radius: 8px;
149
+ padding: 15px;
150
+ margin: 10px 0;
151
+ font-family: 'Courier New', monospace;
152
+ font-size: 0.9rem;
153
+ overflow-x: auto;
154
+ }
155
+
156
+ .corrections {
157
+ margin-top: 15px;
158
+ padding: 10px;
159
+ background: #fff3cd;
160
+ border-radius: 8px;
161
+ border: 1px solid #ffeaa7;
162
+ }
163
+
164
+ .correction-item {
165
+ margin: 5px 0;
166
+ font-size: 0.9rem;
167
+ }
168
+
169
+ .suggestions {
170
+ margin-top: 15px;
171
+ }
172
+
173
+ .suggestion-item {
174
+ background: #e3f2fd;
175
+ border-radius: 5px;
176
+ padding: 8px 12px;
177
+ margin: 5px 0;
178
+ cursor: pointer;
179
+ transition: background 0.3s ease;
180
+ }
181
+
182
+ .suggestion-item:hover {
183
+ background: #bbdefb;
184
+ }
185
+
186
+ .chat-container {
187
+ background: white;
188
+ border-radius: 15px;
189
+ box-shadow: 0 10px 30px rgba(0,0,0,0.2);
190
+ margin-bottom: 30px;
191
+ overflow: hidden;
192
+ }
193
+
194
+ .chat-header {
195
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
196
+ color: white;
197
+ padding: 20px 30px;
198
+ display: flex;
199
+ justify-content: space-between;
200
+ align-items: center;
201
+ }
202
+
203
+ .chat-header h2 {
204
+ margin: 0;
205
+ font-size: 1.5rem;
206
+ }
207
+
208
+ .chat-status {
209
+ font-size: 0.9rem;
210
+ opacity: 0.9;
211
+ }
212
+
213
+ .chat-body {
214
+ display: grid;
215
+ grid-template-columns: 1fr 300px;
216
+ min-height: 500px;
217
+ }
218
+
219
+ .chat-messages {
220
+ padding: 20px;
221
+ max-height: 400px;
222
+ overflow-y: auto;
223
+ border-right: 1px solid #e9ecef;
224
+ }
225
+
226
+ .message {
227
+ margin-bottom: 20px;
228
+ display: flex;
229
+ align-items: flex-start;
230
+ gap: 10px;
231
+ }
232
+
233
+ .message.user {
234
+ flex-direction: row-reverse;
235
+ }
236
+
237
+ .message-avatar {
238
+ width: 40px;
239
+ height: 40px;
240
+ border-radius: 50%;
241
+ display: flex;
242
+ align-items: center;
243
+ justify-content: center;
244
+ font-size: 1.2rem;
245
+ flex-shrink: 0;
246
+ }
247
+
248
+ .message.user .message-avatar {
249
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
250
+ color: white;
251
+ }
252
+
253
+ .message.bot .message-avatar {
254
+ background: #f8f9fa;
255
+ color: #667eea;
256
+ border: 2px solid #667eea;
257
+ }
258
+
259
+ .message-content {
260
+ flex: 1;
261
+ max-width: 70%;
262
+ }
263
+
264
+ .message.user .message-content {
265
+ text-align: right;
266
+ }
267
+
268
+ .message-bubble {
269
+ background: #f8f9fa;
270
+ border-radius: 15px;
271
+ padding: 15px;
272
+ display: inline-block;
273
+ max-width: 100%;
274
+ word-wrap: break-word;
275
+ }
276
+
277
+ .message.user .message-bubble {
278
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
279
+ color: white;
280
+ }
281
+
282
+ .message-time {
283
+ font-size: 0.8rem;
284
+ color: #6c757d;
285
+ margin-top: 5px;
286
+ }
287
+
288
+ .message.user .message-time {
289
+ text-align: right;
290
+ }
291
+
292
+ .chat-input-section {
293
+ padding: 20px;
294
+ border-top: 1px solid #e9ecef;
295
+ background: #f8f9fa;
296
+ }
297
+
298
+ .chat-input-container {
299
+ display: flex;
300
+ gap: 10px;
301
+ align-items: center;
302
+ }
303
+
304
+ .chat-input {
305
+ flex: 1;
306
+ padding: 12px 15px;
307
+ border: 2px solid #e1e5e9;
308
+ border-radius: 25px;
309
+ font-size: 1rem;
310
+ transition: all 0.3s ease;
311
+ }
312
+
313
+ .chat-input:focus {
314
+ outline: none;
315
+ border-color: #667eea;
316
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
317
+ }
318
+
319
+ .chat-send-btn {
320
+ width: 45px;
321
+ height: 45px;
322
+ border-radius: 50%;
323
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
324
+ color: white;
325
+ border: none;
326
+ cursor: pointer;
327
+ transition: all 0.3s ease;
328
+ display: flex;
329
+ align-items: center;
330
+ justify-content: center;
331
+ }
332
+
333
+ .chat-send-btn:hover {
334
+ transform: scale(1.1);
335
+ }
336
+
337
+ .chat-send-btn:disabled {
338
+ opacity: 0.6;
339
+ cursor: not-allowed;
340
+ transform: none;
341
+ }
342
+
343
+ .examples-sidebar {
344
+ background: #f8f9fa;
345
+ padding: 20px;
346
+ border-left: 1px solid #e9ecef;
347
+ }
348
+
349
+ .examples-sidebar h3 {
350
+ color: #667eea;
351
+ margin-bottom: 15px;
352
+ font-size: 1.2rem;
353
+ }
354
+
355
+ .example-categories {
356
+ display: flex;
357
+ flex-direction: column;
358
+ gap: 10px;
359
+ }
360
+
361
+ .example-category {
362
+ background: white;
363
+ border-radius: 10px;
364
+ padding: 15px;
365
+ border: 2px solid transparent;
366
+ transition: all 0.3s ease;
367
+ }
368
+
369
+ .example-category:hover {
370
+ border-color: #667eea;
371
+ transform: translateX(5px);
372
+ }
373
+
374
+ .category-title {
375
+ font-weight: 600;
376
+ color: #667eea;
377
+ margin-bottom: 10px;
378
+ font-size: 0.9rem;
379
+ }
380
+
381
+ .example-queries {
382
+ display: flex;
383
+ flex-direction: column;
384
+ gap: 8px;
385
+ }
386
+
387
+ .example-query-btn {
388
+ background: #e3f2fd;
389
+ border: none;
390
+ border-radius: 8px;
391
+ padding: 8px 12px;
392
+ text-align: left;
393
+ cursor: pointer;
394
+ transition: all 0.3s ease;
395
+ font-size: 0.85rem;
396
+ color: #333;
397
+ }
398
+
399
+ .example-query-btn:hover {
400
+ background: #bbdefb;
401
+ transform: translateX(3px);
402
+ }
403
+
404
+ .example-query-btn i {
405
+ margin-right: 5px;
406
+ color: #667eea;
407
+ }
408
+
409
+ .data-display {
410
+ background: #f8f9fa;
411
+ border-radius: 10px;
412
+ padding: 15px;
413
+ margin-top: 10px;
414
+ }
415
+
416
+ .data-display h4 {
417
+ color: #667eea;
418
+ margin-bottom: 10px;
419
+ font-size: 1rem;
420
+ }
421
+
422
+ .data-table {
423
+ width: 100%;
424
+ border-collapse: collapse;
425
+ background: white;
426
+ border-radius: 8px;
427
+ overflow: hidden;
428
+ font-size: 0.85rem;
429
+ }
430
+
431
+ .data-table th,
432
+ .data-table td {
433
+ padding: 8px 12px;
434
+ text-align: left;
435
+ border-bottom: 1px solid #e9ecef;
436
+ }
437
+
438
+ .data-table th {
439
+ background: #667eea;
440
+ color: white;
441
+ font-weight: 600;
442
+ }
443
+
444
+ .data-table tr:hover {
445
+ background: #f8f9fa;
446
+ }
447
+
448
+ .corrections {
449
+ background: #fff3cd;
450
+ border: 1px solid #ffeaa7;
451
+ border-radius: 8px;
452
+ padding: 10px;
453
+ margin-top: 10px;
454
+ font-size: 0.85rem;
455
+ }
456
+
457
+ .corrections strong {
458
+ color: #856404;
459
+ }
460
+
461
+ .correction-item {
462
+ margin: 3px 0;
463
+ color: #856404;
464
+ }
465
+
466
+ .suggestions {
467
+ margin-top: 10px;
468
+ }
469
+
470
+ .suggestion-item {
471
+ background: #e3f2fd;
472
+ border-radius: 5px;
473
+ padding: 6px 10px;
474
+ margin: 3px 0;
475
+ cursor: pointer;
476
+ transition: background 0.3s ease;
477
+ font-size: 0.85rem;
478
+ }
479
+
480
+ .suggestion-item:hover {
481
+ background: #bbdefb;
482
+ }
483
+
484
+ @media (max-width: 768px) {
485
+ .chat-body {
486
+ grid-template-columns: 1fr;
487
+ }
488
+
489
+ .examples-sidebar {
490
+ border-left: none;
491
+ border-top: 1px solid #e9ecef;
492
+ }
493
+
494
+ .message-content {
495
+ max-width: 85%;
496
+ }
497
+ }
498
+
499
+ .status-bar {
500
+ background: white;
501
+ border-radius: 15px;
502
+ padding: 20px;
503
+ box-shadow: 0 10px 30px rgba(0,0,0,0.2);
504
+ margin-bottom: 20px;
505
+ }
506
+
507
+ .status-grid {
508
+ display: grid;
509
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
510
+ gap: 20px;
511
+ }
512
+
513
+ .status-item {
514
+ text-align: center;
515
+ padding: 15px;
516
+ background: #f8f9fa;
517
+ border-radius: 10px;
518
+ }
519
+
520
+ .status-label {
521
+ font-size: 0.9rem;
522
+ color: #6c757d;
523
+ margin-bottom: 5px;
524
+ }
525
+
526
+ .status-value {
527
+ font-size: 1.5rem;
528
+ font-weight: 600;
529
+ color: #667eea;
530
+ }
531
+
532
+ .status-value.success {
533
+ color: #28a745;
534
+ }
535
+
536
+ .status-value.error {
537
+ color: #dc3545;
538
+ }
539
+
540
+ .loading {
541
+ display: none;
542
+ text-align: center;
543
+ padding: 20px;
544
+ }
545
+
546
+ .loading i {
547
+ font-size: 2rem;
548
+ color: #667eea;
549
+ animation: spin 1s linear infinite;
550
+ }
551
+
552
+ @keyframes spin {
553
+ 0% { transform: rotate(0deg); }
554
+ 100% { transform: rotate(360deg); }
555
+ }
556
+
557
+ .table-container {
558
+ overflow-x: auto;
559
+ margin-top: 15px;
560
+ }
561
+
562
+ .data-table {
563
+ width: 100%;
564
+ border-collapse: collapse;
565
+ background: white;
566
+ border-radius: 8px;
567
+ overflow: hidden;
568
+ }
569
+
570
+ .data-table th,
571
+ .data-table td {
572
+ padding: 12px;
573
+ text-align: left;
574
+ border-bottom: 1px solid #e9ecef;
575
+ }
576
+
577
+ .data-table th {
578
+ background: #667eea;
579
+ color: white;
580
+ font-weight: 600;
581
+ }
582
+
583
+ .data-table tr:hover {
584
+ background: #f8f9fa;
585
+ }
586
+
587
+ @media (max-width: 768px) {
588
+ .main-content {
589
+ grid-template-columns: 1fr;
590
+ }
591
+
592
+ .header h1 {
593
+ font-size: 2rem;
594
+ }
595
+
596
+ .container {
597
+ padding: 10px;
598
+ }
599
+ }
600
+ </style>
601
+ </head>
602
+ <body>
603
+ <div class="container">
604
+ <div class="header">
605
+ <h1><i class="fas fa-bus"></i> Transport Query System</h1>
606
+ <p>Ask questions about transport fares and routes with intelligent spell correction</p>
607
+ </div>
608
+
609
+ <div class="status-bar">
610
+ <div class="status-grid">
611
+ <div class="status-item">
612
+ <div class="status-label">Neo4j Status</div>
613
+ <div class="status-value" id="neo4j-status">Checking...</div>
614
+ </div>
615
+ <div class="status-item">
616
+ <div class="status-label">Total Places</div>
617
+ <div class="status-value" id="total-places">-</div>
618
+ </div>
619
+ <div class="status-item">
620
+ <div class="status-label">Total Routes</div>
621
+ <div class="status-value" id="total-routes">-</div>
622
+ </div>
623
+ <div class="status-item">
624
+ <div class="status-label">Average Fare</div>
625
+ <div class="status-value" id="avg-fare">-</div>
626
+ </div>
627
+ </div>
628
+ </div>
629
+
630
+ <div class="chat-container">
631
+ <div class="chat-header">
632
+ <h2><i class="fas fa-comments"></i> Transport Query Chat</h2>
633
+ <div class="chat-status">
634
+ <i class="fas fa-circle" id="status-indicator"></i>
635
+ <span id="status-text">Ready</span>
636
+ </div>
637
+ </div>
638
+
639
+ <div class="chat-body">
640
+ <div class="chat-messages" id="chat-messages">
641
+ <div class="message bot">
642
+ <div class="message-avatar">
643
+ <i class="fas fa-robot"></i>
644
+ </div>
645
+ <div class="message-content">
646
+ <div class="message-bubble">
647
+ Hello! I'm your transport assistant. I can help you find fares, compare routes, and get transport information. Try clicking on an example query or type your own question!
648
+ </div>
649
+ <div class="message-time" id="welcome-time"></div>
650
+ </div>
651
+ </div>
652
+ </div>
653
+
654
+ <div class="examples-sidebar">
655
+ <h3><i class="fas fa-lightbulb"></i> Example Queries</h3>
656
+ <div class="example-categories" id="example-categories">
657
+ <!-- Example categories will be loaded here -->
658
+ </div>
659
+ </div>
660
+ </div>
661
+
662
+ <div class="chat-input-section">
663
+ <div class="chat-input-container">
664
+ <input type="text" class="chat-input" id="chat-input" placeholder="Type your transport question here..." autocomplete="off">
665
+ <button class="chat-send-btn" id="chat-send-btn" onclick="sendChatMessage()">
666
+ <i class="fas fa-paper-plane"></i>
667
+ </button>
668
+ </div>
669
+ </div>
670
+ </div>
671
+ </div>
672
+
673
+ <script>
674
+ // Global variables
675
+ let currentQuery = '';
676
+
677
+ // Initialize the application
678
+ document.addEventListener('DOMContentLoaded', function() {
679
+ loadStatus();
680
+ loadExampleCategories();
681
+ setupChatEventListeners();
682
+ setWelcomeTime();
683
+ });
684
+
685
+ function setupChatEventListeners() {
686
+ const chatInput = document.getElementById('chat-input');
687
+ const chatSendBtn = document.getElementById('chat-send-btn');
688
+
689
+ // Enter key to send message
690
+ chatInput.addEventListener('keypress', function(e) {
691
+ if (e.key === 'Enter') {
692
+ sendChatMessage();
693
+ }
694
+ });
695
+
696
+ // Input validation
697
+ chatInput.addEventListener('input', function() {
698
+ chatSendBtn.disabled = !this.value.trim();
699
+ });
700
+ }
701
+
702
+ function setWelcomeTime() {
703
+ const now = new Date();
704
+ const timeString = now.toLocaleTimeString();
705
+ document.getElementById('welcome-time').textContent = timeString;
706
+ }
707
+
708
+ async function loadStatus() {
709
+ try {
710
+ const response = await fetch('/api/status');
711
+ const data = await response.json();
712
+
713
+ document.getElementById('neo4j-status').textContent = data.neo4j_connected ? 'Connected' : 'Disconnected';
714
+ document.getElementById('neo4j-status').className = data.neo4j_connected ? 'status-value success' : 'status-value error';
715
+
716
+ if (data.statistics) {
717
+ document.getElementById('total-places').textContent = data.statistics.total_places || 0;
718
+ document.getElementById('total-routes').textContent = data.statistics.total_routes || 0;
719
+ document.getElementById('avg-fare').textContent = `Rs. ${data.statistics.average_fare || 0}`;
720
+ }
721
+
722
+ // Update chat status
723
+ const statusIndicator = document.getElementById('status-indicator');
724
+ const statusText = document.getElementById('status-text');
725
+
726
+ if (data.neo4j_connected) {
727
+ statusIndicator.style.color = '#28a745';
728
+ statusText.textContent = 'Connected to Database';
729
+ } else {
730
+ statusIndicator.style.color = '#dc3545';
731
+ statusText.textContent = 'Database Disconnected';
732
+ }
733
+ } catch (error) {
734
+ console.error('Error loading status:', error);
735
+ document.getElementById('status-indicator').style.color = '#dc3545';
736
+ document.getElementById('status-text').textContent = 'Connection Error';
737
+ }
738
+ }
739
+
740
+ async function loadExampleCategories() {
741
+ try {
742
+ const response = await fetch('/api/examples');
743
+ const data = await response.json();
744
+
745
+ const categoriesContainer = document.getElementById('example-categories');
746
+ categoriesContainer.innerHTML = '';
747
+
748
+ data.examples.forEach(category => {
749
+ const categoryDiv = document.createElement('div');
750
+ categoryDiv.className = 'example-category';
751
+
752
+ let categoryHTML = `<div class="category-title">${category.category}</div>`;
753
+ categoryHTML += '<div class="example-queries">';
754
+
755
+ category.examples.forEach(example => {
756
+ categoryHTML += `
757
+ <button class="example-query-btn" onclick="useExampleQuery('${example.query.replace(/'/g, "\\'")}')">
758
+ <i class="fas fa-arrow-right"></i>
759
+ ${example.query}
760
+ </button>
761
+ `;
762
+ });
763
+
764
+ categoryHTML += '</div>';
765
+ categoryDiv.innerHTML = categoryHTML;
766
+ categoriesContainer.appendChild(categoryDiv);
767
+ });
768
+ } catch (error) {
769
+ console.error('Error loading example categories:', error);
770
+ }
771
+ }
772
+
773
+ function useExampleQuery(query) {
774
+ document.getElementById('chat-input').value = query;
775
+ document.getElementById('chat-send-btn').disabled = false;
776
+ sendChatMessage();
777
+ }
778
+
779
+ async function sendChatMessage() {
780
+ const chatInput = document.getElementById('chat-input');
781
+ const query = chatInput.value.trim();
782
+
783
+ if (!query) return;
784
+
785
+ currentQuery = query;
786
+
787
+ // Add user message to chat
788
+ addChatMessage('user', query);
789
+
790
+ // Clear input
791
+ chatInput.value = '';
792
+ document.getElementById('chat-send-btn').disabled = true;
793
+
794
+ // Show typing indicator
795
+ showTypingIndicator();
796
+
797
+ try {
798
+ const response = await fetch('/api/query', {
799
+ method: 'POST',
800
+ headers: {
801
+ 'Content-Type': 'application/json',
802
+ },
803
+ body: JSON.stringify({ query: query })
804
+ });
805
+
806
+ const result = await response.json();
807
+
808
+ // Remove typing indicator
809
+ removeTypingIndicator();
810
+
811
+ // Add bot response
812
+ addBotResponse(result);
813
+
814
+ } catch (error) {
815
+ removeTypingIndicator();
816
+ addChatMessage('bot', 'Sorry, I encountered an error processing your query. Please try again.');
817
+ }
818
+ }
819
+
820
+ function addChatMessage(sender, message) {
821
+ const chatMessages = document.getElementById('chat-messages');
822
+ const now = new Date();
823
+ const timeString = now.toLocaleTimeString();
824
+
825
+ const messageDiv = document.createElement('div');
826
+ messageDiv.className = `message ${sender}`;
827
+
828
+ const avatarIcon = sender === 'user' ? 'fas fa-user' : 'fas fa-robot';
829
+
830
+ messageDiv.innerHTML = `
831
+ <div class="message-avatar">
832
+ <i class="${avatarIcon}"></i>
833
+ </div>
834
+ <div class="message-content">
835
+ <div class="message-bubble">${message}</div>
836
+ <div class="message-time">${timeString}</div>
837
+ </div>
838
+ `;
839
+
840
+ chatMessages.appendChild(messageDiv);
841
+ chatMessages.scrollTop = chatMessages.scrollHeight;
842
+ }
843
+
844
+ function addBotResponse(result) {
845
+ const chatMessages = document.getElementById('chat-messages');
846
+ const now = new Date();
847
+ const timeString = now.toLocaleTimeString();
848
+
849
+ const messageDiv = document.createElement('div');
850
+ messageDiv.className = 'message bot';
851
+
852
+ let responseContent = '';
853
+
854
+ if (result.success) {
855
+ responseContent = `<div class="message-bubble">${result.message}</div>`;
856
+
857
+ // Add data display if available
858
+ if (result.data && Array.isArray(result.data) && result.data.length > 0) {
859
+ responseContent += createChatDataDisplay(result.data);
860
+ }
861
+
862
+ // Add corrections if any
863
+ if (result.corrections && result.corrections.length > 0) {
864
+ responseContent += createCorrectionsDisplay(result.corrections);
865
+ }
866
+
867
+ // Add suggestions if any
868
+ if (result.suggestions && result.suggestions.length > 0) {
869
+ responseContent += createSuggestionsDisplay(result.suggestions);
870
+ }
871
+ } else {
872
+ responseContent = `<div class="message-bubble">Sorry, I couldn't process your query: ${result.message}</div>`;
873
+ }
874
+
875
+ messageDiv.innerHTML = `
876
+ <div class="message-avatar">
877
+ <i class="fas fa-robot"></i>
878
+ </div>
879
+ <div class="message-content">
880
+ ${responseContent}
881
+ <div class="message-time">${timeString}</div>
882
+ </div>
883
+ `;
884
+
885
+ chatMessages.appendChild(messageDiv);
886
+ chatMessages.scrollTop = chatMessages.scrollHeight;
887
+ }
888
+
889
+ function createChatDataDisplay(data) {
890
+ if (!data || data.length === 0) return '';
891
+
892
+ const keys = Object.keys(data[0]);
893
+ let html = '<div class="data-display"><h4>Results:</h4><table class="data-table"><thead><tr>';
894
+
895
+ // Headers
896
+ keys.forEach(key => {
897
+ html += `<th>${key.replace(/_/g, ' ').toUpperCase()}</th>`;
898
+ });
899
+ html += '</tr></thead><tbody>';
900
+
901
+ // Rows (limit to first 5 for chat)
902
+ data.slice(0, 5).forEach(row => {
903
+ html += '<tr>';
904
+ keys.forEach(key => {
905
+ html += `<td>${row[key]}</td>`;
906
+ });
907
+ html += '</tr>';
908
+ });
909
+
910
+ html += '</tbody></table>';
911
+
912
+ if (data.length > 5) {
913
+ html += `<p style="font-size: 0.8rem; color: #6c757d; margin-top: 5px;">Showing first 5 of ${data.length} results</p>`;
914
+ }
915
+
916
+ html += '</div>';
917
+ return html;
918
+ }
919
+
920
+ function createCorrectionsDisplay(corrections) {
921
+ let html = '<div class="corrections"><strong>Spell Corrections:</strong>';
922
+ corrections.forEach(correction => {
923
+ html += `<div class="correction-item">"${correction.original}" → "${correction.corrected}" (${correction.method})</div>`;
924
+ });
925
+ html += '</div>';
926
+ return html;
927
+ }
928
+
929
+ function createSuggestionsDisplay(suggestions) {
930
+ let html = '<div class="suggestions"><strong>Suggestions:</strong>';
931
+ suggestions.forEach(suggestion => {
932
+ html += `<div class="suggestion-item" onclick="useSuggestion('${suggestion}')">${suggestion}</div>`;
933
+ });
934
+ html += '</div>';
935
+ return html;
936
+ }
937
+
938
+ function showTypingIndicator() {
939
+ const chatMessages = document.getElementById('chat-messages');
940
+ const typingDiv = document.createElement('div');
941
+ typingDiv.className = 'message bot';
942
+ typingDiv.id = 'typing-indicator';
943
+
944
+ typingDiv.innerHTML = `
945
+ <div class="message-avatar">
946
+ <i class="fas fa-robot"></i>
947
+ </div>
948
+ <div class="message-content">
949
+ <div class="message-bubble">
950
+ <i class="fas fa-spinner fa-spin"></i> Processing...
951
+ </div>
952
+ </div>
953
+ `;
954
+
955
+ chatMessages.appendChild(typingDiv);
956
+ chatMessages.scrollTop = chatMessages.scrollHeight;
957
+ }
958
+
959
+ function removeTypingIndicator() {
960
+ const typingIndicator = document.getElementById('typing-indicator');
961
+ if (typingIndicator) {
962
+ typingIndicator.remove();
963
+ }
964
+ }
965
+
966
+ function useSuggestion(suggestion) {
967
+ document.getElementById('chat-input').value = suggestion;
968
+ document.getElementById('chat-send-btn').disabled = false;
969
+ sendChatMessage();
970
+ }
971
+
972
+ // Auto-refresh status every 30 seconds
973
+ setInterval(loadStatus, 30000);
974
+ </script>
975
+ </body>
976
+ </html>
977
+
translation_service.py ADDED
@@ -0,0 +1,702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Translation Service for Sinhala-English Translation
4
+ Handles translation of queries and responses with multiple free alternatives
5
+ """
6
+
7
+ import requests
8
+ import json
9
+ import re
10
+ import openai
11
+ from typing import Dict, Any, Optional
12
+ from config import Config
13
+ from logger import get_logger
14
+
15
+ class TranslationService:
16
+ def __init__(self):
17
+ self.config = Config()
18
+ self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
19
+ self.logger = get_logger(self.__class__.__name__)
20
+ # Controls
21
+ import os
22
+ self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
23
+ self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
24
+ self.last_translation_method: Optional[str] = None
25
+
26
+ # Free translation APIs
27
+ self.libre_translate_url = "https://libretranslate.de/translate" # Free public instance
28
+ self.mymemory_url = "https://api.mymemory.translated.net/get"
29
+
30
+ # Common transport terms in Sinhala and their English equivalents
31
+ self.transport_terms = {
32
+ # Fare related
33
+ 'කීයද': 'how much',
34
+ 'මිල': 'price',
35
+ 'වාරික': 'fare',
36
+ 'වාරිකය': 'fare',
37
+ 'වාරිකව': 'fare',
38
+ 'ගාස්තු': 'fare',
39
+ 'ගාස්තුව': 'fare',
40
+ 'ප්‍රවාහන ගාස්තු': 'transport fare',
41
+ 'බස් ගාස්තු': 'bus fare',
42
+ 'බස් ගාස්තුව': 'bus fare',
43
+ 'රේල් ගාස්තු': 'train fare',
44
+ 'රේල් ගාස්තුව': 'train fare',
45
+
46
+ # Locations
47
+ 'කොළඹ': 'Colombo',
48
+ 'මහනුවර': 'Kandy',
49
+ 'මහනුවරට': 'Kandy',
50
+ 'ගාල්ල': 'Galle',
51
+ 'ගාල්ලට': 'Galle',
52
+ 'මාතර': 'Matara',
53
+ 'මාතරට': 'Matara',
54
+ 'අනුරාධපුර': 'Anuradhapura',
55
+ 'අනුරාධපුරට': 'Anuradhapura',
56
+ 'පානදුර': 'Panadura',
57
+ 'පානදුරට': 'Panadura',
58
+ 'අලුත්ගම': 'Aluthgama',
59
+ 'අලුත්ගමට': 'Aluthgama',
60
+ 'නුගේගොඩ': 'Nugegoda',
61
+ 'නුගේගොඩට': 'Nugegoda',
62
+ 'දෙහිවල': 'Dehiwala',
63
+ 'දෙහිවලට': 'Dehiwala',
64
+ 'මොරටුව': 'Moratuwa',
65
+ 'මොරටුවට': 'Moratuwa',
66
+
67
+ # Direction words
68
+ 'වලින්': 'from',
69
+ 'වල': 'from',
70
+ 'ට': 'to',
71
+ 'වෙත': 'to',
72
+ 'සිට': 'from',
73
+ 'දක්වා': 'to',
74
+ 'සි': 'from',
75
+
76
+ # Question words
77
+ 'කොහෙද': 'where',
78
+ 'කවදාද': 'when',
79
+ 'කොහොමද': 'how',
80
+ 'මොනවාද': 'what',
81
+ 'කවුද': 'who',
82
+
83
+ # Comparison words
84
+ 'සමඟ': 'with',
85
+ 'සහ': 'and',
86
+ 'හෝ': 'or',
87
+ 'වඩා': 'more',
88
+ 'අඩු': 'less',
89
+ 'සමාන': 'same',
90
+ 'වෙනස': 'different',
91
+ 'සසඳන්න': 'compare',
92
+ 'සසඳන': 'compare',
93
+
94
+ # Time words
95
+ 'දැන්': 'now',
96
+ 'අද': 'today',
97
+ 'හෙට': 'tomorrow',
98
+ 'ඊයේ': 'yesterday',
99
+
100
+ # Common verbs
101
+ 'යන්න': 'go',
102
+ 'යන': 'go',
103
+ 'එන්න': 'come',
104
+ 'බලන්න': 'see',
105
+ 'දැනගන්න': 'know',
106
+ 'සොයන්න': 'find',
107
+ 'සොයන': 'find',
108
+ 'ඉගෙනගන්න': 'learn',
109
+ 'නිර්දේශ': 'recommend',
110
+ 'නිර්දේශ කරන්න': 'recommend',
111
+ 'පෙන්වන්න': 'show',
112
+ 'පෙන්වන': 'show',
113
+
114
+ # Numbers and currency
115
+ 'රුපියල්': 'rupees',
116
+ 'රු': 'rupees',
117
+ 'රුපියල': 'rupees',
118
+
119
+ # Common phrases
120
+ 'අතර': 'between',
121
+ 'සහිත': 'with',
122
+ 'මාර්ග': 'routes',
123
+ 'මාර්ගවල': 'routes',
124
+ 'ගමනාන්ත': 'destinations',
125
+ 'ප්‍රසිද්ධ': 'popular',
126
+ 'සාමාන්‍ය': 'average',
127
+ 'සාමාන්‍යය': 'average',
128
+ 'දත්ත': 'data',
129
+ 'සංඛ්‍යාලේ���න': 'statistics'
130
+ }
131
+
132
+ # Sinhala script detection pattern
133
+ self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
134
+
135
+ def is_sinhala_text(self, text: str) -> bool:
136
+ """Check if text contains Sinhala characters"""
137
+ detected = bool(self.sinhala_pattern.search(text))
138
+ self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
139
+ return detected
140
+
141
+ def _map_sinhala_place(self, text: str) -> str:
142
+ """Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
143
+ candidate = text.strip()
144
+ # Direct map
145
+ if candidate in self.transport_terms:
146
+ return self.transport_terms[candidate]
147
+ # Strip common Sinhala case particles/suffixes and try again
148
+ base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
149
+ if base in self.transport_terms:
150
+ return self.transport_terms[base]
151
+ return candidate
152
+
153
+ def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
154
+ """Detect simple Sinhala fare queries and build a clean English query.
155
+ Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
156
+ """
157
+ try:
158
+ # Quick check for fare-related tokens to avoid false positives
159
+ if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
160
+ return None
161
+ # Extract source and destination around Sinhala "from" and "to" particles
162
+ m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
163
+ if not m:
164
+ return None
165
+ src_si = m.group(1).strip()
166
+ dst_si = m.group(2).strip()
167
+ src_en = self._map_sinhala_place(src_si)
168
+ dst_en = self._map_sinhala_place(dst_si)
169
+ return f"What is the fare from {src_en} to {dst_en}?"
170
+ except Exception:
171
+ return None
172
+
173
+ def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
174
+ """Translate using OpenAI LLM (new SDK). Preserve original intent (comparison, lists, conjunctions)."""
175
+ if not self.openai_api_key:
176
+ return None
177
+
178
+ try:
179
+ # Determine source language
180
+ if source_lang == 'auto':
181
+ source_lang = 'si' if self.is_sinhala_text(text) else 'en'
182
+
183
+ # Create language mapping
184
+ lang_map = {
185
+ ('si', 'en'): 'Sinhala to English',
186
+ ('en', 'si'): 'English to Sinhala'
187
+ }
188
+
189
+ direction = lang_map.get((source_lang, target_lang))
190
+ if not direction:
191
+ return None
192
+
193
+ prompt = f"""
194
+ Translate the following text from {direction}.
195
+ Output only the translated text without quotes or extra commentary.
196
+ Critically: Preserve the original intent and structure. Do not simplify.
197
+ - If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
198
+ - Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
199
+ - Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
200
+ Use standard English city names:
201
+ - මහනුවර = Kandy (not Mahanuwara)
202
+ - කොළඹ = Colombo
203
+ - ගාල්ල = Galle
204
+ - මාතර = Matara
205
+ - අනුරාධපුර = Anuradhapura
206
+
207
+ Text to translate: {text}
208
+ """
209
+
210
+ # Build few-shot examples to preserve comparison/imperative structure
211
+ examples = [
212
+ (
213
+ "කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
214
+ "What is the bus fare from Colombo to Kandy?"
215
+ ),
216
+ (
217
+ "කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
218
+ "What is the ticket price from Colombo to Galle?"
219
+ ),
220
+ (
221
+ "කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.",
222
+ "Compare fares from Colombo to Panadura and from Colombo to Galle."
223
+ ),
224
+ (
225
+ "රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
226
+ "Show routes with fares under 500 rupees."
227
+ ),
228
+ (
229
+ "අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
230
+ "Recommend cheap routes."
231
+ ),
232
+ ]
233
+
234
+ # Compose messages with few-shot conditioning
235
+ def build_messages(txt: str):
236
+ msgs = [
237
+ {
238
+ "role": "system",
239
+ "content": (
240
+ "You are a professional translator. Translate accurately and naturally. "
241
+ "Preserve imperative/comparative intent and list structure. Do not paraphrase. "
242
+ "Return only the English translation without quotes. "
243
+ "Canonical phrasing rules (use exactly): \n"
244
+ "- Use 'Compare' for comparison requests.\n"
245
+ "- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
246
+ "- Use 'How much is the' for 'කීයද' fare/price questions.\n"
247
+ "- Use 'cheap' (not 'affordable').\n"
248
+ "- Use 'under' (not 'below') for '< value'.\n"
249
+ ),
250
+ },
251
+ {
252
+ "role": "user",
253
+ "content": (
254
+ "Instructions: Preserve structure. Use 'Compare' for 'සසඳ', use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
255
+ "Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura."
256
+ ),
257
+ },
258
+ ]
259
+ for si, en in examples:
260
+ msgs.append({"role": "user", "content": f"Sinhala: {si}\nEnglish:"})
261
+ msgs.append({"role": "assistant", "content": en})
262
+ msgs.append({"role": "user", "content": f"Sinhala: {txt}\nEnglish:"})
263
+ return msgs
264
+
265
+ # Use new OpenAI SDK
266
+ try:
267
+ from openai import OpenAI
268
+ client = OpenAI(api_key=self.openai_api_key)
269
+ response = client.chat.completions.create(
270
+ model="gpt-3.5-turbo",
271
+ max_tokens=150,
272
+ temperature=0.3,
273
+ messages=build_messages(text)
274
+ )
275
+ translated = response.choices[0].message.content.strip()
276
+ self.last_translation_method = 'llm'
277
+ except Exception as sdk_err:
278
+ # Fallback to legacy API if available
279
+ import openai
280
+ try:
281
+ openai.api_key = self.openai_api_key
282
+ response = openai.ChatCompletion.create(
283
+ model="gpt-3.5-turbo",
284
+ max_tokens=150,
285
+ temperature=0.3,
286
+ messages=build_messages(text)
287
+ )
288
+ translated = response.choices[0].message.content.strip()
289
+ self.last_translation_method = 'llm'
290
+ except Exception:
291
+ raise sdk_err
292
+
293
+ if translated.startswith('"') and translated.endswith('"'):
294
+ translated = translated[1:-1]
295
+ return translated if translated else None
296
+ except Exception as e:
297
+ self.logger.warning(f"LLM translation error: {e}")
298
+ return None
299
+
300
+ def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
301
+ """Translate using LibreTranslate (free public API)"""
302
+ try:
303
+ # Map language codes
304
+ lang_map = {
305
+ 'si': 'si', # Sinhala
306
+ 'en': 'en', # English
307
+ 'auto': 'auto'
308
+ }
309
+
310
+ source = lang_map.get(source_lang, 'auto')
311
+ target = lang_map.get(target_lang, 'en')
312
+
313
+ payload = {
314
+ 'q': text,
315
+ 'source': source,
316
+ 'target': target,
317
+ 'format': 'text'
318
+ }
319
+
320
+ headers = {
321
+ 'Content-Type': 'application/json'
322
+ }
323
+
324
+ response = requests.post(
325
+ self.libre_translate_url,
326
+ json=payload,
327
+ headers=headers,
328
+ timeout=10
329
+ )
330
+
331
+ if response.status_code == 200:
332
+ result = response.json()
333
+ translated = result.get('translatedText')
334
+ self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
335
+ self.last_translation_method = 'libretranslate'
336
+ return translated
337
+
338
+ return None
339
+
340
+ except Exception as e:
341
+ self.logger.warning(f"LibreTranslate error: {e}")
342
+ return None
343
+
344
+ def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
345
+ """Translate using MyMemory (free API)"""
346
+ try:
347
+ # Map language codes
348
+ lang_map = {
349
+ 'si': 'si', # Sinhala
350
+ 'en': 'en', # English
351
+ 'auto': 'auto'
352
+ }
353
+
354
+ source = lang_map.get(source_lang, 'auto')
355
+ langpair = f"{source}|{target_lang}"
356
+
357
+ params = {
358
+ 'q': text,
359
+ 'langpair': langpair
360
+ }
361
+
362
+ response = requests.get(
363
+ self.mymemory_url,
364
+ params=params,
365
+ timeout=10
366
+ )
367
+
368
+ if response.status_code == 200:
369
+ result = response.json()
370
+ translated = result.get('responseData', {}).get('translatedText')
371
+ self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
372
+ self.last_translation_method = 'mymemory'
373
+ return translated
374
+
375
+ return None
376
+
377
+ except Exception as e:
378
+ self.logger.warning(f"MyMemory translation error: {e}")
379
+ return None
380
+
381
+
382
+
383
+ def translate_with_dictionary(self, text: str, target_lang: str) -> str:
384
+ """Translate using dictionary-based approach"""
385
+ if target_lang == 'en':
386
+ # Sinhala to English
387
+ translated = text
388
+ for sinhala, english in self.transport_terms.items():
389
+ translated = translated.replace(sinhala, english)
390
+ return translated
391
+ elif target_lang == 'si':
392
+ # English to Sinhala
393
+ translated = text
394
+ for sinhala, english in self.transport_terms.items():
395
+ translated = translated.replace(english, sinhala)
396
+ return translated
397
+
398
+ return text
399
+
400
+ def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
401
+ """Main translation method with multiple fallbacks"""
402
+ if not text or not text.strip():
403
+ return text
404
+
405
+ # Try translation methods
406
+ if self.force_llm_translation:
407
+ translation_methods = [
408
+ ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
409
+ ]
410
+ else:
411
+ translation_methods = [
412
+ ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
413
+ ('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
414
+ ('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
415
+ ('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
416
+ ]
417
+
418
+ for method_name, method_func in translation_methods:
419
+ try:
420
+ result = method_func()
421
+ if result and result.strip():
422
+ self.logger.info(f"Translation successful using {method_name}")
423
+ if not self.last_translation_method:
424
+ self.last_translation_method = method_name.lower()
425
+ return result.strip()
426
+ except Exception as e:
427
+ self.logger.warning(f"{method_name} translation failed: {e}")
428
+ continue
429
+
430
+ # Final fallback
431
+ result = self.translate_with_dictionary(text, target_lang)
432
+ self.last_translation_method = 'dictionary'
433
+ return result
434
+
435
+ def translate_query(self, query: str) -> Dict[str, Any]:
436
+ """Translate a user query from Sinhala to English"""
437
+ if not self.is_sinhala_text(query):
438
+ return {
439
+ 'is_sinhala': False,
440
+ 'original_query': query,
441
+ 'translated_query': query,
442
+ 'translation_method': 'none'
443
+ }
444
+
445
+ # Optional: Sinhala-specific fare parsing (disabled by default unless USE_PATTERN_TRANSLATION=true)
446
+ if self.use_pattern_translation:
447
+ parsed = self._parse_sinhala_fare_query(query)
448
+ if parsed:
449
+ self.logger.info(f"Pattern-based Sinhala fare parse: '{query}' -> '{parsed}'")
450
+ return {
451
+ 'is_sinhala': True,
452
+ 'original_query': query,
453
+ 'translated_query': parsed,
454
+ 'translation_method': 'pattern'
455
+ }
456
+
457
+ # Fallback: general translation to English
458
+ translated = self.translate_text(query, 'en', 'si')
459
+ # Normalize English synonyms to expected NLP vocabulary
460
+ translated = self._normalize_english_query(translated)
461
+ method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
462
+ self.logger.info(f"Translated Sinhala query ({method}): '{query}' -> '{translated}'")
463
+
464
+ return {
465
+ 'is_sinhala': True,
466
+ 'original_query': query,
467
+ 'translated_query': translated,
468
+ 'translation_method': method
469
+ }
470
+
471
+ def _normalize_english_query(self, text: str) -> str:
472
+ """Normalize English synonyms to match NLP patterns (fare/price/cost)."""
473
+ if not text:
474
+ return text
475
+ normalized = text
476
+ replacements = {
477
+ 'fees': 'fare',
478
+ 'fee': 'fare',
479
+ 'charges': 'cost',
480
+ 'charge': 'cost',
481
+ 'ticket price': 'fare',
482
+ 'ticket fare': 'fare',
483
+ 'bus ticket': 'bus fare',
484
+ }
485
+ # Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
486
+ lower = normalized.lower()
487
+ for old, new in replacements.items():
488
+ lower = lower.replace(old, new)
489
+ return lower
490
+
491
+ def translate_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
492
+ """Translate response back to Sinhala"""
493
+ translated_response = response.copy()
494
+
495
+ # Translate the main message
496
+ if 'message' in response:
497
+ translated_response['message'] = self.translate_text(
498
+ response['message'], 'si', 'en'
499
+ )
500
+
501
+ # Translate suggestions if any
502
+ if 'suggestions' in response and response['suggestions']:
503
+ translated_response['suggestions'] = [
504
+ self.translate_text(suggestion, 'si', 'en')
505
+ for suggestion in response['suggestions']
506
+ ]
507
+
508
+ # Translate corrections if any
509
+ if 'corrections' in response and response['corrections']:
510
+ translated_corrections = []
511
+ for correction in response['corrections']:
512
+ translated_correction = correction.copy()
513
+ if 'original' in correction:
514
+ translated_correction['original'] = self.translate_text(
515
+ correction['original'], 'si', 'en'
516
+ )
517
+ if 'corrected' in correction:
518
+ translated_correction['corrected'] = self.translate_text(
519
+ correction['corrected'], 'si', 'en'
520
+ )
521
+ translated_corrections.append(translated_correction)
522
+ translated_response['corrections'] = translated_corrections
523
+
524
+ # Add translation metadata
525
+ translated_response['translation_info'] = {
526
+ 'translated': True,
527
+ 'translation_method': 'llm' if self.openai_api_key else 'dictionary'
528
+ }
529
+
530
+ return translated_response
531
+
532
+ def get_sinhala_examples(self) -> Dict[str, Any]:
533
+ """Get example queries in Sinhala"""
534
+ sinhala_examples = {
535
+ 'fare_queries': [
536
+ {
537
+ 'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
538
+ 'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
539
+ },
540
+ {
541
+ 'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
542
+ 'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
543
+ },
544
+ {
545
+ 'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
546
+ 'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
547
+ }
548
+ ],
549
+ 'comparison_queries': [
550
+ {
551
+ 'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
552
+ 'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
553
+ },
554
+ {
555
+ 'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
556
+ 'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
557
+ }
558
+ ],
559
+ 'range_queries': [
560
+ {
561
+ 'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
562
+ 'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
563
+ },
564
+ {
565
+ 'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
566
+ 'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
567
+ }
568
+ ],
569
+ 'recommendation_queries': [
570
+ {
571
+ 'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
572
+ 'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
573
+ },
574
+ {
575
+ 'query': 'ප්‍රසිද්ධ ගමනාන්ත පෙන්වන්න',
576
+ 'description': 'ප්‍රසිද්ධ ගමනාන්ත සොයන්න'
577
+ }
578
+ ],
579
+ 'statistical_queries': [
580
+ {
581
+ 'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
582
+ 'description': 'සාමාන්‍ය ගාස්තුව සොයන්න'
583
+ },
584
+ {
585
+ 'query': 'දත්ත ගබඩා සංඛ්‍යාලේඛන',
586
+ 'description': 'දත්ත ගබඩා සංඛ්‍යාලේඛන සොයන්න'
587
+ }
588
+ ]
589
+ }
590
+
591
+ return sinhala_examples
592
+
593
+ def test_translation(self) -> Dict[str, Any]:
594
+ """Test translation functionality on transportation-related Sinhala queries."""
595
+ test_cases = [
596
+ {
597
+ 'sinhala': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
598
+ 'expected_english': 'What is the bus fare from Colombo to Kandy?'
599
+ },
600
+ {
601
+ 'sinhala': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
602
+ 'expected_english': 'How much is the price from Matara to Galle?'
603
+ },
604
+ {
605
+ 'sinhala': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
606
+ 'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
607
+ },
608
+ {
609
+ 'sinhala': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
610
+ 'expected_english': 'Show routes with fares under 500 rupees.'
611
+ },
612
+ {
613
+ 'sinhala': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
614
+ 'expected_english': 'Recommend cheap routes.'
615
+ },
616
+ {
617
+ 'sinhala': 'කොළඹ සිට යන මාර්ග මොනවාද?',
618
+ 'expected_english': 'What routes depart from Colombo?'
619
+ },
620
+ {
621
+ 'sinhala': 'සාමාන්‍ය ගාස්තුව කීයද?',
622
+ 'expected_english': 'What is the average fare?'
623
+ },
624
+ {
625
+ 'sinhala': 'කඩුවෙල සිට මාතර දක්වා සහ ගාල්ල දක්වා බස් ගාස්තු සසඳන්න.',
626
+ 'expected_english': 'Compare bus fares from Kaduwela to Matara and to Galle.'
627
+ },
628
+ {
629
+ 'sinhala': 'කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?',
630
+ 'expected_english': 'What is the ticket price from Colombo to Galle?'
631
+ },
632
+ {
633
+ 'sinhala': 'රුපියල් 1000 ට වැඩි ගාස්තු සහිත මාර්ග සදහන් කරන්න.',
634
+ 'expected_english': 'List routes with fares over 1000 rupees.'
635
+ }
636
+ ]
637
+
638
+ results = []
639
+ total_exact = 0
640
+ total_good = 0
641
+ total_tests = len(test_cases)
642
+
643
+ for test_case in test_cases:
644
+ sinhala = test_case['sinhala']
645
+ expected = test_case['expected_english']
646
+ is_sinhala = self.is_sinhala_text(sinhala)
647
+
648
+ # Reset method tracker and translate
649
+ self.last_translation_method = None
650
+ translated = self.translate_text(sinhala, 'en', 'si') or ''
651
+
652
+ tr = translated.strip()
653
+ ex = expected.strip()
654
+ tr_low = tr.lower()
655
+ ex_low = ex.lower()
656
+
657
+ # Accuracy heuristic
658
+ if tr_low == ex_low:
659
+ accuracy = 'exact'
660
+ total_exact += 1
661
+ total_good += 1
662
+ elif tr_low in ex_low or ex_low in tr_low:
663
+ accuracy = 'good'
664
+ total_good += 1
665
+ else:
666
+ accuracy = 'partial'
667
+
668
+ # Intent preservation check for comparisons
669
+ intent_preserved = True
670
+ if 'ස��ඳ' in sinhala or 'සසඳා' in sinhala:
671
+ intent_preserved = ('compare' in tr_low)
672
+
673
+ results.append({
674
+ 'sinhala_query': sinhala,
675
+ 'is_sinhala_detected': is_sinhala,
676
+ 'translated_english': tr,
677
+ 'expected_english': ex,
678
+ 'translation_accuracy': accuracy,
679
+ 'intent_preserved': intent_preserved,
680
+ 'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
681
+ })
682
+
683
+ summary = {
684
+ 'total_tests': total_tests,
685
+ 'exact_matches': total_exact,
686
+ 'good_or_better': total_good,
687
+ 'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
688
+ }
689
+
690
+ self.logger.info(f"Translation test summary: {summary}")
691
+
692
+ return {
693
+ 'translation_service_status': 'active',
694
+ 'available_methods': {
695
+ 'llm': self.openai_api_key is not None,
696
+ 'libre_translate': True,
697
+ 'mymemory': True,
698
+ 'dictionary': True
699
+ },
700
+ 'summary': summary,
701
+ 'test_results': results
702
+ }