Spaces:

TuanMinhajSeedin
/

Transport

Sleeping

App Files Files Community

TuanMinhajSeedin commited on Aug 22

Commit

d75ff9c

verified ·

1 Parent(s): a1c87f9

Upload 15 files

Browse files

Files changed (15) hide show

.env +14 -0
.gitattributes +14 -35
.gitignore +175 -0
Dockerfile +32 -0
README.md +107 -11
app.py +974 -0
config.py +263 -0
enhanced_nlp_processor.py +904 -0
llm_query_processor.py +351 -0
logger.py +53 -0
neo4j_service.py +222 -0
requirements.txt +10 -0
spell_corrector.py +257 -0
templates/index.html +977 -0
translation_service.py +702 -0

.env ADDED Viewed

	@@ -0,0 +1,14 @@

+GIT = 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
+NEO4J_URI = bolt://44.201.107.35:7687
+NEO4J_USER = neo4j
+# NEO4J_PASSWORD = "20665130@mM"
+NEO4J_PASSWORD = "securities-arrays-entrapments"
+# OpenAI Configuration (for LLM)
+OPENAI_API_KEY = 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
+OPENAI_MODEL = gpt-3.5-turbo
+# Flask Configuration
+SECRET_KEY = transport-query-app-secret-key

.gitattributes CHANGED Viewed

@@ -1,35 +1,14 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.py linguist-language=Python
+*.html linguist-language=HTML
+*.css linguist-language=CSS
+*.js linguist-language=JavaScript
+*.md linguist-language=Markdown
+*.txt linguist-language=Text
+*.json linguist-language=JSON
+*.csv linguist-language=CSV
+*.pdf linguist-documentation
+*.png linguist-documentation
+*.jpg linguist-documentation
+*.jpeg linguist-documentation
+*.gif linguist-documentation
+*.svg linguist-documentation

.gitignore ADDED Viewed

	@@ -0,0 +1,175 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+config.py
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM python:3.9-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create logs directory
+RUN mkdir -p logs
+# Expose port
+EXPOSE 7860
+# Set environment variables
+ENV FLASK_ENV=production
+ENV PORT=7860
+# Run the application
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,107 @@
----
-title: Transport
-emoji: 🏆
-colorFrom: blue
-colorTo: indigo
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 🚌 Natural Language Transport Query System
+A sophisticated Flask application that provides natural language querying capabilities for Sri Lankan transport information, featuring Sinhala-English translation and Neo4j integration.
+## 🌟 Features
+- **Natural Language Processing**: Advanced NLP for understanding transport queries
+- **Multilingual Support**: Sinhala-English translation with LLM integration
+- **Spell Correction**: Fuzzy matching and LLM-based location correction
+- **Neo4j Integration**: Graph database for efficient route and fare queries
+- **Enhanced Query Types**: Support for comparisons, ranges, recommendations
+- **RESTful API**: Comprehensive API endpoints for all functionality
+## 🚀 Quick Start
+### Local Development
+```bash
+# Install dependencies
+pip install -r requirements.txt
+# Set environment variables
+export OPENAI_API_KEY="your_openai_key"
+export NEO4J_URI="your_neo4j_uri"
+export NEO4J_USER="your_neo4j_user"
+export NEO4J_PASSWORD="your_neo4j_password"
+# Run the application
+python app.py
+```
+### Hugging Face Spaces
+This application is deployed on Hugging Face Spaces and is accessible via the provided URL.
+## 📡 API Endpoints
+### Core Query Processing
+- `POST /api/query` - Process natural language transport queries
+- `GET /api/status` - System status and statistics
+- `GET /api/places` - Get all available places
+### NLP Capabilities
+- `GET /api/nlp/capabilities` - View enhanced NLP capabilities
+- `GET /api/nlp/demo` - Get comprehensive demo queries
+- `POST /api/nlp/test` - Test queries with detailed analysis
+- `GET /api/nlp/test-all-types` - Test all query types
+### Translation Services
+- `POST /api/translation/translate` - Translate text between languages
+- `GET /api/translation/test` - Test translation functionality
+- `GET /api/sinhala/examples` - Get Sinhala example queries
+### Utilities
+- `POST /api/suggestions` - Get location suggestions for autocomplete
+- `GET /api/examples` - Get categorized example queries
+## 🔧 Configuration
+The application uses environment variables for configuration:
+```bash
+# OpenAI Configuration
+OPENAI_API_KEY=your_openai_api_key
+# Neo4j Configuration
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=password
+# Translation Configuration
+FORCE_LLM_TRANSLATION=true
+USE_PATTERN_TRANSLATION=false
+# Logging Configuration
+LOG_LEVEL=INFO
+LOG_DIR=logs
+```
+## 📊 Query Examples
+### English Queries
+- "What is the fare from Colombo to Kandy?"
+- "Show me routes from Galle to Matara"
+- "Compare fares from Colombo to Panadura and Colombo to Galle"
+- "Find routes under 500 LKR"
+### Sinhala Queries
+- "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?"
+- "ගාල්ල සිට මාතර දක්වා මාර්ග පෙන්වන්න"
+- "කොළඹ සිට පානදුර සහ කොළඹ සිට ගාල්ල ගාස්තු සසඳා බලන්න"
+## 🏗️ Architecture
+- **Flask**: Web framework
+- **OpenAI GPT**: LLM for translation and query interpretation
+- **Neo4j**: Graph database for transport data
+- **FuzzyWuzzy**: Spell correction and fuzzy matching
+- **Pandas**: Data processing and manipulation
+## 📝 License
+This project is licensed under the MIT License.
+## 🤝 Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.

app.py ADDED Viewed

	@@ -0,0 +1,974 @@

+#!/usr/bin/env python3
+"""
+Main Flask Application for Transport Query System
+"""
+from flask import Flask, render_template, request, jsonify, session
+import os
+from llm_query_processor import LLMQueryProcessor
+from enhanced_nlp_processor import EnhancedNLPProcessor
+from spell_corrector import SpellCorrector
+from neo4j_service import Neo4jService
+from translation_service import TranslationService
+from logger import get_logger
+from config import Config
+app = Flask(__name__)
+app.config.from_object(Config)
+logger = get_logger("FlaskApp")
+# Initialize services
+query_processor = LLMQueryProcessor()
+enhanced_nlp_processor = EnhancedNLPProcessor()
+spell_corrector = SpellCorrector()
+neo4j_service = Neo4jService()
+translation_service = TranslationService()
+@app.route('/')
+def index():
+    """Main page"""
+    return render_template('index.html')
+@app.route('/api/query', methods=['POST'])
+def process_query():
+    """Process user query with enhanced NLP and translation support"""
+    try:
+        data = request.get_json()
+        user_query = data.get('query', '').strip()
+        use_enhanced_nlp = data.get('enhanced_nlp', True)  # Default to enhanced NLP
+        if not user_query:
+            return jsonify({
+                'success': False,
+                'message': 'Please enter a query.'
+            })
+        # Check if query is in Sinhala and translate if needed
+        translation_info = translation_service.translate_query(user_query)
+        # Use translated query for processing
+        query_to_process = translation_info['translated_query']
+        # Log translation info to console
+        if translation_info['is_sinhala']:
+            logger.info(f"Translation: si->en method={translation_info['translation_method']} original='{translation_info['original_query']}' translated='{translation_info['translated_query']}'")
+        else:
+            logger.info(f"Processing English Query: '{user_query}'")
+        # Process the query with enhanced NLP or fallback to basic processor
+        if use_enhanced_nlp:
+            result = enhanced_nlp_processor.process_query(query_to_process)
+        else:
+            result = query_processor.process_query(query_to_process)
+        # If original query was in Sinhala, translate the response back
+        if translation_info['is_sinhala']:
+            print(f"   English Response: {result.get('message', 'No message')}")
+            result = translation_service.translate_response(result)
+            result['translation_info'] = translation_info
+            print(f"   Sinhala Response: {result.get('message', 'No message')}")
+            print(f"   Translation Complete ✅")
+        logger.info(f"Response success={result.get('success')} type={result.get('query_type','n/a')} message='{result.get('message','')[:120]}'")
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error processing query: {str(e)}'
+        })
+@app.route('/api/suggestions', methods=['POST'])
+def get_suggestions():
+    """Get location suggestions for autocomplete"""
+    try:
+        data = request.get_json()
+        partial_location = data.get('location', '').strip()
+        if not partial_location:
+            return jsonify({'suggestions': []})
+        suggestions = spell_corrector.get_suggestions(partial_location)
+        return jsonify({
+            'suggestions': [{'name': name, 'confidence': conf} for name, conf in suggestions]
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting suggestions: {str(e)}'
+        })
+@app.route('/api/status')
+def get_status():
+    """Get system status"""
+    try:
+        neo4j_connected = neo4j_service.is_connected()
+        places = neo4j_service.get_all_places() if neo4j_connected else []
+        stats = neo4j_service.get_route_statistics() if neo4j_connected else {}
+        return jsonify({
+            'neo4j_connected': neo4j_connected,
+            'total_places': len(places),
+            'statistics': stats
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting status: {str(e)}'
+        })
+@app.route('/api/places')
+def get_places():
+    """Get all available places"""
+    try:
+        places = neo4j_service.get_all_places()
+        return jsonify({
+            'success': True,
+            'places': places
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting places: {str(e)}'
+        })
+@app.route('/api/sinhala/examples')
+def get_sinhala_examples():
+    """Get example queries in Sinhala"""
+    try:
+        sinhala_examples = translation_service.get_sinhala_examples()
+        return jsonify({
+            'success': True,
+            'examples': sinhala_examples
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting Sinhala examples: {str(e)}'
+        })
+@app.route('/api/translation/test')
+def test_translation():
+    """Test translation functionality"""
+    try:
+        test_results = translation_service.test_translation()
+        return jsonify({
+            'success': True,
+            'test_results': test_results
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error testing translation: {str(e)}'
+        })
+@app.route('/api/translation/translate', methods=['POST'])
+def translate_text():
+    """Translate text between Sinhala and English"""
+    try:
+        data = request.get_json()
+        text = data.get('text', '').strip()
+        target_lang = data.get('target_lang', 'en')  # 'en' or 'si'
+        source_lang = data.get('source_lang', 'auto')
+        if not text:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide text to translate.'
+            })
+        translated_text = translation_service.translate_text(text, target_lang, source_lang)
+        is_sinhala = translation_service.is_sinhala_text(text)
+        return jsonify({
+            'success': True,
+            'original_text': text,
+            'translated_text': translated_text,
+            'source_language': 'si' if is_sinhala else 'en',
+            'target_language': target_lang,
+            'translation_method': 'google' if translation_service.google_translate_api_key else 'dictionary'
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error translating text: {str(e)}'
+        })
+@app.route('/api/nlp/capabilities')
+def get_nlp_capabilities():
+    """Get information about natural language processing capabilities with live examples"""
+    # Test queries for each type to demonstrate actual results
+    test_queries = [
+        {
+            'type': 'fare_inquiry',
+            'description': 'Find fare between two specific locations',
+            'examples': [
+                'What is the fare from Colombo to Kandy?',
+                'fare of anuradhapura to kandy',
+                'price from panadura to galle',
+                'Colombo to Kandy fare'
+            ]
+        },
+        {
+            'type': 'comparison',
+            'description': 'Compare fares between different routes',
+            'examples': [
+                'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
+                'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
+            ]
+        },
+        {
+            'type': 'range_search',
+            'description': 'Find routes within specific price ranges',
+            'examples': [
+                'Find routes under 500 rupees',
+                'Show me routes between 200 and 800 rupees',
+                'Routes over 1000 rupees'
+            ]
+        },
+        {
+            'type': 'recommendation',
+            'description': 'Get route recommendations based on criteria',
+            'examples': [
+                'Recommend cheap routes',
+                'Show me popular destinations',
+                'What are the best routes from Colombo?'
+            ]
+        },
+        {
+            'type': 'route_inquiry',
+            'description': 'Find routes from/to specific locations',
+            'examples': [
+                'Routes from Colombo',
+                'Routes to Galle',
+                'What routes depart from Kandy?'
+            ]
+        },
+        {
+            'type': 'statistics',
+            'description': 'Get database overview and statistics',
+            'examples': [
+                'What is the average fare?',
+                'Database statistics',
+                'How many routes are there?'
+            ]
+        }
+    ]
+    # Process each test query to get actual results
+    live_examples = []
+    for query_type in test_queries:
+        type_examples = []
+        for example_query in query_type['examples'][:2]:  # Test first 2 examples
+            try:
+                result = enhanced_nlp_processor.process_query(example_query)
+                type_examples.append({
+                    'query': example_query,
+                    'result': result
+                })
+            except Exception as e:
+                type_examples.append({
+                    'query': example_query,
+                    'result': {
+                        'success': False,
+                        'message': f'Error: {str(e)}'
+                    }
+                })
+        live_examples.append({
+            'type': query_type['type'],
+            'description': query_type['description'],
+            'examples': type_examples
+        })
+    capabilities = {
+        'natural_language_processing': {
+            'description': 'Advanced NLP for transport queries with enhanced understanding',
+            'features': [
+                'Multiple query formats (fare, price, cost)',
+                'Natural language patterns (from X to Y, X to Y fare, etc.)',
+                'Question formats (What is, How much, Show me, etc.)',
+                'Compact formats (Colombo to Kandy fare)',
+                'Spell correction and fuzzy matching',
+                'Automatic location name correction',
+                'LLM-powered query interpretation',
+                'Fallback keyword-based processing',
+                'Advanced intent classification',
+                'Entity extraction and normalization',
+                'Confidence scoring for query understanding'
+            ]
+        },
+        'query_types': test_queries,
+        'live_examples': live_examples,
+        'spell_correction': {
+            'description': 'Automatic location name correction',
+            'methods': [
+                'Direct mapping (exact matches)',
+                'Fuzzy matching (similar names)',
+                'LLM correction (AI-powered)',
+                'Partial matching (substring matching)'
+            ],
+            'examples': [
+                'panadra → Panadura',
+                'gale → Galle',
+                'colmbo → Colombo',
+                'kandee → Kandy'
+            ]
+        },
+        'llm_integration': {
+            'description': 'AI-powered query interpretation with LLM Cypher generation',
+            'features': [
+                'Automatic query type detection',
+                'LLM-powered Cypher query generation',
+                'Natural language understanding',
+                'Fallback to keyword-based processing',
+                'Advanced entity extraction',
+                'Intent classification with confidence scoring',
+                'Real-time database querying'
+            ]
+        },
+        'enhanced_features': {
+            'description': 'Advanced NLP capabilities',
+            'features': [
+                'Multi-intent query understanding',
+                'Context-aware responses',
+                'Query preprocessing and normalization',
+                'Advanced pattern matching',
+                'Confidence-based result ranking',
+                'Comprehensive query analysis',
+                'Live database results for all query types'
+            ]
+        }
+    }
+    return jsonify({
+        'success': True,
+        'capabilities': capabilities
+    })
+@app.route('/api/nlp/test', methods=['POST'])
+def test_nlp_query():
+    """Test a natural language query and return detailed analysis"""
+    try:
+        data = request.get_json()
+        user_query = data.get('query', '').strip()
+        use_enhanced_nlp = data.get('enhanced_nlp', True)
+        if not user_query:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide a query to test.'
+            })
+        # Get detailed analysis
+        analysis = {
+            'original_query': user_query,
+            'processing_steps': []
+        }
+        # Step 1: Extract locations
+        locations = spell_corrector.extract_locations_from_query(user_query)
+        analysis['processing_steps'].append({
+            'step': 'Location Extraction',
+            'locations_found': len(locations),
+            'details': [
+                {
+                    'original': loc[0],
+                    'corrected': loc[1],
+                    'confidence': loc[2],
+                    'method': loc[3]
+                } for loc in locations
+            ]
+        })
+        # Step 2: Process query with enhanced NLP
+        if use_enhanced_nlp:
+            result = enhanced_nlp_processor.process_query(user_query)
+            analysis['processing_steps'].append({
+                'step': 'Enhanced NLP Processing',
+                'success': result.get('success', False),
+                'query_type': result.get('query_type', 'unknown'),
+                'message': result.get('message', ''),
+                'confidence': result.get('query_analysis', {}).get('confidence', 0),
+                'intent': result.get('query_analysis', {}).get('intent', {}),
+                'entities': result.get('query_analysis', {}).get('entities', {})
+            })
+        else:
+            result = query_processor.process_query(user_query)
+            analysis['processing_steps'].append({
+                'step': 'Basic Query Processing',
+                'success': result.get('success', False),
+                'query_type': result.get('query_type', 'unknown'),
+                'message': result.get('message', ''),
+                'cypher_query': result.get('cypher_query', ''),
+                'corrections': result.get('corrections', [])
+            })
+        # Step 3: Results
+        if result.get('success') and result.get('data'):
+            analysis['processing_steps'].append({
+                'step': 'Database Results',
+                'results_count': len(result['data']),
+                'sample_results': result['data'][:3]  # Show first 3 results
+            })
+        return jsonify({
+            'success': True,
+            'analysis': analysis,
+            'result': result
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error testing NLP query: {str(e)}'
+        })
+@app.route('/api/nlp/demo')
+def get_nlp_demo():
+    """Get a comprehensive demo of natural language capabilities"""
+    demo_queries = [
+        {
+            'category': 'Basic Fare Queries',
+            'queries': [
+                'What is the fare from Colombo to Kandy?',
+                'fare of anuradhapura to kandy',
+                'price from panadura to galle',
+                'Colombo to Kandy fare'
+            ]
+        },
+        {
+            'category': 'Comparison Queries',
+            'queries': [
+                'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
+                'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
+            ]
+        },
+        {
+            'category': 'Range Search Queries',
+            'queries': [
+                'Find routes under 500 rupees',
+                'Show me routes between 200 and 800 rupees',
+                'Routes over 1000 rupees'
+            ]
+        },
+        {
+            'category': 'Recommendation Queries',
+            'queries': [
+                'Recommend cheap routes',
+                'Show me popular destinations',
+                'What are the best routes from Colombo?'
+            ]
+        },
+        {
+            'category': 'Statistical Queries',
+            'queries': [
+                'What is the average fare?',
+                'Database statistics',
+                'How many routes are there?'
+            ]
+        },
+        {
+            'category': 'Route Queries',
+            'queries': [
+                'Show me the cheapest routes',
+                'Routes from Colombo',
+                'Routes to Galle',
+                'What routes depart from Kandy?'
+            ]
+        },
+        {
+            'category': 'Spell Correction Tests',
+            'queries': [
+                'price from panadra to gale',
+                'fare of colmbo to kandee',
+                'cost from anuradapura to kandy'
+            ]
+        }
+    ]
+    return jsonify({
+        'success': True,
+        'demo': {
+            'title': 'Enhanced Natural Language Transport Query Demo',
+            'description': 'Advanced NLP capabilities with comparison, range search, and recommendations',
+            'categories': demo_queries
+        }
+    })
+@app.route('/api/examples')
+def get_examples():
+    """Get comprehensive example queries showcasing natural language capabilities"""
+    examples = [
+        # === FARE QUERIES (Various Natural Language Formats) ===
+        {
+            'category': 'Fare Queries',
+            'examples': [
+                {
+                    # 'query': 'What is the fare from Colombo to Kandy?',
+                    'query': 'කොළඹ සිට මහනුවරට ගාස්තුව කීයද?',
+                    'description': 'Standard fare query format'
+                },
+                {
+                    'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
+                    'description': 'Alternative way to ask for fare'
+                },
+                {
+                    'query': 'අනුරාධපුර සිට මහනුවර දක්වා ගාස්තුව',
+                    'description': 'Natural language format'
+                },
+                {
+                    # 'query': 'price from panadura to galle',
+                    'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
+                    'description': 'Using "price" instead of "fare"'
+                },
+                {
+                    # 'query': 'Colombo to nuwara eliya fare',
+                    'query': 'බදුල්ල සිට කොළඹට ගාස්තුව කීයද?',
+                    'description': 'Compact format'
+                },
+                {
+                    # 'query': 'How much is the fare from matara to kandy?',
+                    'query': 'මහනුවර සිට මාතරට ගාස්තුව කීයද?',
+                    'description': 'Question format'
+                }
+            ]
+        },
+        # === COMPARISON QUERIES ===
+        {
+            'category': 'Comparison Queries',
+            'examples': [
+                {
+                    # 'query': 'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                    'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සංසන්දනය කරන්න.',
+                    'description': 'Compare two different routes'
+                },
+                {
+                    # 'query': 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
+                    'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට අනුරාධපුර දක්වා ලාභදායී වන්නේ කුමක්ද?',
+                    'description': 'Find the cheaper option'
+                },
+                {
+                    # 'query': 'What is the difference in fare between Panadura to Galle and Panadura to Matara?',
+                    'query': 'පානදුර සිට ගාල්ල දක්වා සහ පානදුර සිට මාතර දක්වා ගාස්තුවේ වෙනස කීයද?',
+                    'description': 'Calculate fare difference'
+                }
+            ]
+        },
+        # === RANGE SEARCH QUERIES ===
+        {
+            'category': 'Range Search Queries',
+            'examples': [
+                {
+                    # 'query': 'Find routes under 500 rupees',
+                    'query': 'රුපියල් 500ට අඩු මාර්ග සොයා ගන්න',
+                    'description': 'Find affordable routes'
+                },
+                {
+                    # 'query': 'Show me routes between 200 and 800 rupees',
+                    'query': 'රුපියල් 200 සහ 800 අතර මාර්ග සොයා ගන්න',
+                    'description': 'Find routes in price range'
+                },
+                {
+                    # 'query': 'Routes over 1000 rupees',
+                    'query': 'රුපියල් 1000ට ඉහළ මාර්ග සොයා ගන්න',
+                    'description': 'Find expensive routes'
+                }
+            ]
+        },
+        # === RECOMMENDATION QUERIES ===
+        {
+            'category': 'Recommendation Queries',
+            'examples': [
+                {
+                    # 'query': 'Recommend cheap routes',
+                    'query': 'ලාභ මාර්ග නිර්දේශ කරන්න',
+                    'description': 'Get budget-friendly recommendations'
+                },
+                {
+                    # 'query': 'Show me popular destinations',
+                    'query': 'මට ජනප්‍රිය ගමනාන්ත පෙන්වන්න',
+                    'description': 'Find frequently traveled routes'
+                },
+                {
+                    # 'query': 'What are the best routes from Colombo?',
+                    'query': 'කොළඹ සිට යාමට හොඳම මාර්ග මොනවාද?',
+                    'description': 'Get optimal route suggestions'
+                }
+            ]
+        },
+        # === STATISTICAL QUERIES ===
+        {
+            'category': 'Statistical Queries',
+            'examples': [
+                {
+                    # 'query': 'What is the average fare?',
+                    'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
+                    'description': 'Get average fare statistics'
+                },
+                {
+                    # 'query': 'Database statistics',
+                    'query': 'දත්ත සමුදා සංඛ්යා ලේඛන',
+                    'description': 'Get comprehensive database overview'
+                },
+                {
+                    'query': 'මාර්ග කීයක් තිබේද?',
+                    'description': 'Count total routes'
+                }
+            ]
+        },
+        # === ROUTE QUERIES ===
+        {
+            'category': 'Route Queries',
+            'examples': [
+                {
+                    # 'query': 'Show me the cheapest routes',
+                    'query': 'මට ලාභදායී  මාර්ග 10ක්  පෙන්වන්න',
+                    'description': 'Find top 10 cheapest routes'
+                },
+                {
+                    # 'query': 'Routes from Colombo',
+                    'query': 'කොළඹ සිට යාමට මාර්ග මොනවාද?',
+                    'description': 'Find all routes departing from a location'
+                },
+                {
+                    # 'query': 'Routes to Galle',
+                    'query': 'ගාල්ල යාමට මාර්ග මොනවාද?',
+                    'description': 'Find all routes going to a location'
+                },
+                {
+                    # 'query': 'What routes depart from Kandy?',
+                    'query': 'මහනුවර සිට යාමට මාර්ග මොනවාද?',
+                    'description': 'Question format for routes'
+                }
+            ]
+        },
+        # === SPELLING ERROR EXAMPLES ===
+        {
+            'category': 'Spell Correction Examples',
+            'examples': [
+                {
+                    # 'query': 'price from panadra to gale',
+                    'query': 'පාන්දුරේ ඉඳන් ගාල්ල්ට කීයක් යනවද?',
+                    'description': 'Test spell correction (Panadura, Galle)'
+                },
+                {
+                    # 'query': 'fare of colmbo to kandee',
+                    'query': 'කොළ්බ්හ  සිට මහනුවර්ට ගාස්තුව කීයද?',
+                    'description': 'Test spell correction (Colombo, Kandy)'
+                },
+                {
+                    # 'query': 'cost from anuradapura to kandy',
+                    'query': 'අනුරපුර සිට මහනුවර්රට ගාස්තුව කීයද?',
+                    'description': 'Natural format with correct spelling'
+                }
+            ]
+        }
+    ]
+    return jsonify({
+        'success': True,
+        'examples': examples
+    })
+@app.route('/api/nlp/advanced', methods=['POST'])
+def advanced_nlp_query():
+    """Advanced NLP query processing with detailed analysis"""
+    try:
+        data = request.get_json()
+        user_query = data.get('query', '').strip()
+        if not user_query:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide a query to process.'
+            })
+        # Process with enhanced NLP
+        result = enhanced_nlp_processor.process_query(user_query)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error processing advanced NLP query: {str(e)}'
+        })
+@app.route('/api/nlp/compare', methods=['POST'])
+def compare_routes():
+    """Compare multiple routes"""
+    try:
+        data = request.get_json()
+        routes = data.get('routes', [])
+        if len(routes) < 2:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide at least 2 routes to compare.'
+            })
+        # Build comparison query
+        comparison_query = "MATCH "
+        for i, route in enumerate(routes):
+            from_loc = route.get('from')
+            to_loc = route.get('to')
+            if from_loc and to_loc:
+                if i > 0:
+                    comparison_query += ", "
+                comparison_query += f"(a{i}:Place {{name: '{from_loc}'}})-[r{i}:Fare]->(b{i}:Place {{name: '{to_loc}'}})"
+        comparison_query += " RETURN "
+        for i, route in enumerate(routes):
+            if i > 0:
+                comparison_query += ", "
+            comparison_query += f"a{i}.name + ' to ' + b{i}.name as route{i+1}, r{i}.fare as fare{i+1}"
+        # Execute query
+        with neo4j_service.driver.session() as session:
+            result = session.run(comparison_query)
+            results = [dict(record) for record in result]
+        return jsonify({
+            'success': True,
+            'data': results,
+            'message': f'Comparison of {len(routes)} routes completed'
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error comparing routes: {str(e)}'
+        })
+@app.route('/api/nlp/range', methods=['POST'])
+def search_by_range():
+    """Search routes by price range"""
+    try:
+        data = request.get_json()
+        min_price = data.get('min_price')
+        max_price = data.get('max_price')
+        if min_price is None and max_price is None:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide min_price or max_price or both.'
+            })
+        # Build range query
+        range_query = "MATCH (a:Place)-[r:Fare]->(b:Place) WHERE "
+        conditions = []
+        if min_price is not None:
+            conditions.append(f"r.fare >= {min_price}")
+        if max_price is not None:
+            conditions.append(f"r.fare <= {max_price}")
+        range_query += " AND ".join(conditions)
+        range_query += " RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare"
+        # Execute query
+        with neo4j_service.driver.session() as session:
+            result = session.run(range_query)
+            results = [dict(record) for record in result]
+        return jsonify({
+            'success': True,
+            'data': results,
+            'message': f'Found {len(results)} routes in the specified range'
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error searching by range: {str(e)}'
+        })
+@app.route('/api/nlp/test-all-types')
+def test_all_query_types():
+    """Test all query types with live results from Neo4j database"""
+    try:
+        # Define test queries for each type
+        test_queries = {
+            'fare_inquiry': [
+                'What is the fare from Colombo to Kandy?',
+                'fare of anuradhapura to kandy',
+                'price from panadura to galle'
+            ],
+            'comparison': [
+                'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?'
+            ],
+            'range_search': [
+                'Find routes under 500 rupees',
+                'Show me routes between 200 and 800 rupees',
+                'Routes over 1000 rupees'
+            ],
+            'recommendation': [
+                'Recommend cheap routes',
+                'Show me popular destinations',
+                'What are the best routes from Colombo?'
+            ],
+            'route_inquiry': [
+                'Routes from Colombo',
+                'Routes to Galle',
+                'What routes depart from Kandy?'
+            ],
+            'statistics': [
+                'What is the average fare?',
+                'Database statistics',
+                'How many routes are there?'
+            ]
+        }
+        results = {}
+        for query_type, queries in test_queries.items():
+            type_results = []
+            for query in queries:
+                try:
+                    # Process with enhanced NLP (uses LLM for Cypher generation)
+                    result = enhanced_nlp_processor.process_query(query)
+                    type_results.append({
+                        'query': query,
+                        'result': result,
+                        'success': result.get('success', False)
+                    })
+                except Exception as e:
+                    type_results.append({
+                        'query': query,
+                        'result': {
+                            'success': False,
+                            'message': f'Error processing query: {str(e)}'
+                        },
+                        'success': False
+                    })
+            results[query_type] = {
+                'description': f'Test results for {query_type} queries',
+                'total_queries': len(queries),
+                'successful_queries': sum(1 for r in type_results if r['success']),
+                'examples': type_results
+            }
+        # Summary statistics
+        total_queries = sum(len(queries) for queries in test_queries.values())
+        total_successful = sum(
+            results[query_type]['successful_queries']
+            for query_type in results
+        )
+        return jsonify({
+            'success': True,
+            'message': f'Tested {total_queries} queries across {len(test_queries)} types. {total_successful} successful.',
+            'summary': {
+                'total_query_types': len(test_queries),
+                'total_queries_tested': total_queries,
+                'successful_queries': total_successful,
+                'success_rate': round((total_successful / total_queries) * 100, 2) if total_queries > 0 else 0
+            },
+            'results': results,
+            'neo4j_connected': neo4j_service.is_connected()
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error testing query types: {str(e)}',
+            'neo4j_connected': neo4j_service.is_connected()
+        })
+@app.errorhandler(404)
+def not_found(error):
+    return jsonify({
+        'success': False,
+        'message': 'Endpoint not found'
+    }), 404
+@app.errorhandler(500)
+def internal_error(error):
+    return jsonify({
+        'success': False,
+        'message': 'Internal server error'
+    }), 500
+if __name__ == '__main__':
+    port = int(os.getenv('PORT', 7860))  # Hugging Face Spaces uses port 7860 by default
+    print("🚌 Natural Language Transport Query System")
+    print("=" * 60)
+    print(f"🚀 Starting on port {port}")
+    print(f"🌐 Open your browser and go to: http://localhost:{port}")
+    # Check Neo4j connection
+    if neo4j_service.is_connected():
+        print("✅ Connected to Neo4j database")
+        stats = neo4j_service.get_route_statistics()
+        if stats:
+            print(f"📊 Database: {stats.get('total_places', 0)} places, {stats.get('total_routes', 0)} routes")
+    else:
+        print("⚠️  Neo4j not connected - some features may not work")
+    # Check LLM availability
+    if spell_corrector.llm_available:
+        print("🤖 LLM integration available for spell correction")
+    else:
+        print("⚠️  LLM not available - using fuzzy matching only")
+    print("\n🎯 Enhanced Natural Language Capabilities:")
+    print("   • Multiple query formats (fare, price, cost)")
+    print("   • Natural language patterns (from X to Y, X to Y fare)")
+    print("   • Question formats (What is, How much, Show me)")
+    print("   • Compact formats (Colombo to Kandy fare)")
+    print("   • Spell correction and fuzzy matching")
+    print("   • LLM-powered query interpretation")
+    print("   • Automatic Cypher query generation")
+    print("   • Advanced intent classification")
+    print("   • Entity extraction and normalization")
+    print("   • Comparison queries (vs, versus, compare)")
+    print("   • Range search queries (under, over, between)")
+    print("   • Recommendation queries (recommend, suggest)")
+    print("   • Confidence scoring for query understanding")
+    print("   • Sinhala language support with translation")
+    print("   • Automatic Sinhala-English translation")
+    print("   • Dictionary-based and Google Translate fallback")
+    print("\n🔗 Available API Endpoints:")
+    print("   • /api/query - Process natural language queries (enhanced NLP)")
+    print("   • /api/nlp/capabilities - View enhanced NLP capabilities with live examples")
+    print("   • /api/nlp/test-all-types - Test all query types with live results")
+    print("   • /api/nlp/test - Test queries with detailed analysis")
+    print("   • /api/nlp/demo - Get comprehensive demo queries")
+    print("   • /api/examples - Get categorized example queries")
+    print("   • /api/sinhala/examples - Get Sinhala example queries")
+    print("   • /api/translation/test - Test translation functionality")
+    print("   • /api/translation/translate - Translate text between languages")
+    print("   • /api/status - System status and statistics")
+    print("   • /api/suggestions - Get location suggestions")
+    print("   • /api/places - Get all available places")
+    print("=" * 60)
+    try:
+        app.run(debug=False, port=port, host='0.0.0.0')  # Set debug=False for production
+    except Exception as e:
+        print(f"❌ Error starting application: {e}")
+        print("💡 Try running as administrator or check if another application is using the port")

config.py ADDED Viewed

	@@ -0,0 +1,263 @@

+#!/usr/bin/env python3
+"""
+Configuration file for Transport Query Application
+"""
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+class Config:
+    """Application configuration"""
+    # Neo4j Configuration
+    # NEO4J_URI = "bolt://localhost:7687"
+    # NEO4J_URI = "bolt://44.201.107.35:7687"
+    NEO4J_URI = os.getenv("NEO4J_URI")
+    # NEO4J_USER = "neo4j"
+    NEO4J_USER = os.getenv("NEO4J_USER")
+    # NEO4J_PASSWORD = "20665130@mM"
+    # NEO4J_PASSWORD = "securities-arrays-entrapments"
+    NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
+    # OpenAI Configuration (for LLM)
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+    OPENAI_MODEL = "gpt-3.5-turbo"
+    # Flask Configuration
+    # SECRET_KEY = os.getenv("SECRET_KEY", "transport-query-app-secret-key")
+    SECRET_KEY = os.getenv("SECRET_KEY")
+    DEBUG = True
+    # Spell Correction Configuration
+    SIMILARITY_THRESHOLD = 0.8
+    MAX_SUGGESTIONS = 5
+    # Location Mapping for Common Misspellings
+    LOCATION_MAPPING = {
+        'colombo': 'Colombo',
+        'colmbo': 'Colombo',
+        'kandy': 'Kandy',
+        'panadura': 'Panadura',
+        'panaduwa': 'Panadura',
+        'galkissa': 'Mount Lavinia',
+        'mount lavinia': 'Mount Lavinia',
+        'kalutara': 'Kalutara',
+        'aluthgama': 'Aluthgama',
+        'balapitiya': 'Balapitiya',
+        'ambalangoda': 'Ambalangoda',
+        'hikkaduwa': 'Hikkaduwa',
+        'galle': 'Galle',
+        'koggala': 'Koggala',
+        'waligama': 'Waligama',
+        'matara': 'Matara',
+        'anuradapura': 'Anuradapura',
+        'anuradhapura': 'Anuradapura',
+        'kurunagala': 'Kurunagala',
+        'kurunegala': 'Kurunagala',
+        'trincomalee': 'Trincomalee',
+        'tricomalee': 'Trincomalee',
+        'jaffna': 'Jaffna',
+        'vavuniya': 'Vavuniya',
+        'vavniyava': 'Vavuniya',
+        'vavniyawa': 'Vavuniya',
+        'chilaw': 'Chilaw',
+        'chillaw': 'Chilaw',
+        'puthalama': 'Puththalama',
+        'puttalama': 'Puththalama',
+        'thangalle': 'Thangalle',
+        'thangalla': 'Thangalle',
+        'bandarawela': 'Bandarawela',
+        'bandatrawela': 'Bandarawela',
+        'nuwaraeliya': 'Nuwaraeliya',
+        'nuwara eliya': 'Nuwaraeliya',
+        'badulla': 'Badulla',
+        'monaragala': 'Monaragala',
+        'ratnapura': 'Rathnapura',
+        'rathnapura': 'Rathnapura',
+        'kegalle': 'Kegalle',
+        'mawanella': 'Mawanella',
+        'mavanalla': 'Mawanella',
+        'awissawella': 'Awissawella',
+        'awisswella': 'Awissawella',
+        'kaduwela': 'Kaduwela',
+        'kaduruwela': 'Kaduwela',
+        'maharagama': 'Maharagama',
+        'dehiwala': 'Dehiwala',
+        'moratuwa': 'Moratuwa',
+        'kalutara': 'Kalutara',
+        'beruwala': 'Beruwala',
+        'bentota': 'Bentota',
+        'induruwa': 'Induruwa',
+        'kosgoda': 'Kosgoda',
+        'ahungalla': 'Ahungalla',
+        'karandeniya': 'Karandeniya',
+        'eladuwa': 'Eladuwa',
+        'gintota': 'Gintota',
+        'boossa': 'Boossa',
+        'katunayake': 'Katunayake',
+        'negombo': 'Negombo',
+        'seeduwa': 'Seeduwa',
+        'ja-ela': 'Ja-ela',
+        'wattala': 'Wattala',
+        'kelaniya': 'Kelaniya',
+        'kiribathgoda': 'Kiribathgoda',
+        'kiribathgodas': 'Kiribathgoda',
+        'ganemulla': 'Ganemulla',
+        'mirigama': 'Mirigama',
+        'polgahawela': 'Polgahawela',
+        'warakapola': 'Warakapola',
+        'galigamuwa': 'Galigamuwa',
+        'galgamuwa': 'Galigamuwa',
+        'ambepussa': 'Ambepussa',
+        'alawwa': 'Alawwa',
+        'kandy': 'Kandy',
+        'peradeniya': 'Peradeniya',
+        'gampola': 'Gampola',
+        'nawalapitiya': 'Nawalapitiya',
+        'teldeniya': 'Teldeniya',
+        'kundasale': 'Kundasale',
+        'katugastota': 'Katugastota',
+        'pilimatalawa': 'Pilimatalawa',
+        'harispattuwa': 'Harispattuwa',
+        'akurana': 'Akurana',
+        'matale': 'Matale',
+        'dambulla': 'Dambulla',
+        'sigiriya': 'Sigiriya',
+        'habarana': 'Habarana',
+        'polonnaruwa': 'Polonnaruwa',
+        'minneriya': 'Minneriya',
+        'galoya': 'Galoya',
+        'batticaloa': 'Batticaloa',
+        'batticolo': 'Batticaloa',
+        'ampara': 'Ampara',
+        'mahiyanganaya': 'Mahiyanganaya',
+        'bibile': 'Bibile',
+        'monaragala': 'Monaragala',
+        'wellawaya': 'Wellawaya',
+        'bandarawela': 'Bandarawela',
+        'hali-ela': 'Hali-ela',
+        'passara': 'Passara',
+        'badulla': 'Badulla',
+        'mahiyanganaya': 'Mahiyanganaya',
+        'kandy': 'Kandy',
+        'nuwaraeliya': 'Nuwaraeliya',
+        'hatton': 'Hatton',
+        'talawakele': 'Talawakele',
+        'nanuoya': 'Nanuoya',
+        'ambewela': 'Ambewela',
+        'pattipola': 'Pattipola',
+        'oya': 'Oya',
+        'ella': 'Ella',
+        'demodara': 'Demodara',
+        'hali-ela': 'Hali-ela',
+        'badulla': 'Badulla',
+        'mahiyanganaya': 'Mahiyanganaya',
+        'bibile': 'Bibile',
+        'monaragala': 'Monaragala',
+        'wellawaya': 'Wellawaya',
+        'kataragama': 'Kataragama',
+        'tissamaharama': 'Tissamaharama',
+        'hambantota': 'Hambantota',
+        'tangalle': 'Tangalle',
+        'thangalle': 'Tangalle',
+        'beliatta': 'Beliatta',
+        'ambalantota': 'Ambalantota',
+        'matara': 'Matara',
+        'weligama': 'Weligama',
+        'mirissa': 'Mirissa',
+        'dikwella': 'Dikwella',
+        'kamburupitiya': 'Kamburupitiya',
+        'deniyaya': 'Deniyaya',
+        'akurassa': 'Akurassa',
+        'akuressa': 'Akurassa',
+        'galle': 'Galle',
+        'hikkaduwa': 'Hikkaduwa',
+        'koggala': 'Koggala',
+        'ahangama': 'Ahangama',
+        'midigama': 'Midigama',
+        'talpe': 'Talpe',
+        'unawatuna': 'Unawatuna',
+        'gintota': 'Gintota',
+        'boossa': 'Boossa',
+        'karandeniya': 'Karandeniya',
+        'eladuwa': 'Eladuwa',
+        'bentota': 'Bentota',
+        'induruwa': 'Induruwa',
+        'kosgoda': 'Kosgoda',
+        'ahungalla': 'Ahungalla',
+        'beruwala': 'Beruwala',
+        'kalutara': 'Kalutara',
+        'panadura': 'Panadura',
+        'moratuwa': 'Moratuwa',
+        'dehiwala': 'Dehiwala',
+        'maharagama': 'Maharagama',
+        'kaduwela': 'Kaduwela',
+        'awissawella': 'Awissawella',
+        'kegalle': 'Kegalle',
+        'mawanella': 'Mawanella',
+        'peradeniya': 'Peradeniya',
+        'gampola': 'Gampola',
+        'nawalapitiya': 'Nawalapitiya',
+        'teldeniya': 'Teldeniya',
+        'kundasale': 'Kundasale',
+        'katugastota': 'Katugastota',
+        'pilimatalawa': 'Pilimatalawa',
+        'harispattuwa': 'Harispattuwa',
+        'akurana': 'Akurana',
+        'dambulla': 'Dambulla',
+        'sigiriya': 'Sigiriya',
+        'habarana': 'Habarana',
+        'polonnaruwa': 'Polonnaruwa',
+        'minneriya': 'Minneriya',
+        'galoya': 'Galoya',
+        'batticaloa': 'Batticaloa',
+        'ampara': 'Ampara',
+        'mahiyanganaya': 'Mahiyanganaya',
+        'bibile': 'Bibile',
+        'monaragala': 'Monaragala',
+        'wellawaya': 'Wellawaya',
+        'bandarawela': 'Bandarawela',
+        'hali-ela': 'Hali-ela',
+        'passara': 'Passara',
+        'badulla': 'Badulla',
+        'hatton': 'Hatton',
+        'talawakele': 'Talawakele',
+        'nanuoya': 'Nanuoya',
+        'ambewela': 'Ambewela',
+        'pattipola': 'Pattipola',
+        'oya': 'Oya',
+        'ella': 'Ella',
+        'demodara': 'Demodara',
+        'kataragama': 'Kataragama',
+        'tissamaharama': 'Tissamaharama',
+        'hambantota': 'Hambantota',
+        'tangalle': 'Tangalle',
+        'beliatta': 'Beliatta',
+        'ambalantota': 'Ambalantota',
+        'weligama': 'Weligama',
+        'kamburupitiya': 'Kamburupitiya',
+        'deniyaya': 'Deniyaya',
+        'akurassa': 'Akurassa',
+        'ahangama': 'Ahangama',
+        'seeduwa': 'Seeduwa',
+        'ja-ela': 'Ja-ela',
+        'wattala': 'Wattala',
+        'kelaniya': 'Kelaniya',
+        'kiribathgoda': 'Kiribathgoda',
+        'ganemulla': 'Ganemulla',
+        'mirigama': 'Mirigama',
+        'polgahawela': 'Polgahawela',
+        'warakapola': 'Warakapola',
+        'galigamuwa': 'Galigamuwa',
+        'ambepussa': 'Ambepussa',
+        'alawwa': 'Alawwa',
+        'peradeniya': 'Peradeniya',
+        'gampola': 'Gampola',
+        'matale': 'Matale',
+        'polonnaruwa': 'Polonnaruwa'
+    }

enhanced_nlp_processor.py ADDED Viewed

	@@ -0,0 +1,904 @@

+#!/usr/bin/env python3
+"""
+Enhanced NLP Processor for Transport Query Application
+Advanced natural language understanding and query processing
+"""
+import re
+import json
+from typing import Dict, List, Tuple, Optional, Any
+from datetime import datetime
+from spell_corrector import SpellCorrector
+from neo4j_service import Neo4jService
+from config import Config
+from logger import get_logger
+class EnhancedNLPProcessor:
+    """Advanced NLP processor with sophisticated query understanding"""
+    def __init__(self):
+        self.config = Config()
+        self.spell_corrector = SpellCorrector()
+        self.neo4j_service = Neo4jService()
+        self.logger = get_logger(self.__class__.__name__)
+        # Query patterns and templates
+        self.query_patterns = {
+            'fare_queries': [
+                r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
+                r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
+            ],
+            'comparison_queries': [
+                r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
+            ],
+            'range_queries': [
+                r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
+                r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
+                r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
+            ],
+            'route_queries': [
+                r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
+                r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
+                r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
+                r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
+            ],
+            'statistical_queries': [
+                r'(?:average|mean|median)\s+(?:fare|price|cost)',
+                r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
+                r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
+                r'(?:database|system)\s+(?:statistics?|stats?|overview)',
+                r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
+            ],
+            'recommendation_queries': [
+                r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
+                r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:popular|frequent)\s+(?:routes?|destinations?)',
+                r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
+            ]
+        }
+        # Query intent classification
+        self.intent_keywords = {
+            'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
+            'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
+            'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
+            'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
+            'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
+            'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
+            'availability': ['available', 'exist', 'have', 'is there', 'can i']
+        }
+    def process_query(self, user_query: str) -> Dict[str, Any]:
+        """
+        Process natural language query with advanced NLP understanding
+        Args:
+            user_query: Natural language query string
+        Returns:
+            Dictionary with comprehensive query analysis and results
+        """
+        try:
+            # Step 1: Preprocess query
+            processed_query = self._preprocess_query(user_query)
+            self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
+            # Step 2: Extract entities and intent
+            entities = self._extract_entities(processed_query)
+            intent = self._classify_intent(processed_query, entities)
+            # Step 3: Generate Cypher query
+            cypher_query = self._generate_cypher_query(intent, entities, processed_query)
+            self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
+            # Step 4: Execute query and format results
+            if cypher_query:
+                results = self._execute_query(cypher_query)
+                self.logger.info(f"Query results count: {len(results)}")
+                response = self._format_response(intent, entities, results, processed_query)
+            else:
+                response = self._handle_unclear_query(processed_query)
+            # Step 5: Add metadata
+            response.update({
+                'query_analysis': {
+                    'original_query': user_query,
+                    'processed_query': processed_query,
+                    'intent': intent,
+                    'entities': entities,
+                    'confidence': self._calculate_confidence(intent, entities)
+                }
+            })
+            return response
+        except Exception as e:
+            return {
+                'success': False,
+                'message': f'Error processing query: {str(e)}',
+                'suggestions': self._get_suggestions()
+            }
+    def _preprocess_query(self, query: str) -> str:
+        """Preprocess and normalize the query"""
+        # Convert to lowercase
+        query = query.lower().strip()
+        # Remove extra whitespace
+        query = re.sub(r'\s+', ' ', query)
+        # Normalize common variations
+        replacements = {
+            'rs.': 'rupees',
+            'rs': 'rupees',
+            'lkr': 'rupees',
+            '→': 'to',
+            '->': 'to',
+            'vs': 'versus',
+            '&': 'and',
+            'w/': 'with',
+            'w/o': 'without'
+        }
+        for old, new in replacements.items():
+            query = query.replace(old, new)
+        return query
+    def _extract_entities(self, query: str) -> Dict[str, Any]:
+        """Extract entities from the query"""
+        entities = {
+            'locations': [],
+            'numbers': [],
+            'currencies': [],
+            'comparators': [],
+            'time_expressions': []
+        }
+        # Extract locations with priority for different query types
+        comparison_patterns = [
+            r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            # Simpler patterns for comparison
+            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
+        ]
+        fare_patterns = [
+            r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
+        ]
+        general_patterns = [
+            r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
+        ]
+        # Use a set to avoid duplicates
+        seen_locations = set()
+        # Try comparison patterns first (highest priority)
+        for pattern in comparison_patterns:
+            matches = re.finditer(pattern, query, re.IGNORECASE)
+            for match in matches:
+                locations = [loc.strip() for loc in match.groups() if loc.strip()]
+                for loc in locations:
+                    # Skip if we've already processed this location
+                    if loc.lower() in seen_locations:
+                        continue
+                    seen_locations.add(loc.lower())
+                    corrected, confidence, method = self.spell_corrector.correct_location(loc)
+                    if confidence > 0.5:
+                        entities['locations'].append({
+                            'original': loc,
+                            'corrected': corrected,
+                            'confidence': confidence,
+                            'method': method
+                        })
+        # If no locations found with comparison patterns, try fare patterns
+        if not entities['locations']:
+            for pattern in fare_patterns:
+                matches = re.finditer(pattern, query, re.IGNORECASE)
+                for match in matches:
+                    locations = [loc.strip() for loc in match.groups() if loc.strip()]
+                    for loc in locations:
+                        # Skip if we've already processed this location
+                        if loc.lower() in seen_locations:
+                            continue
+                        seen_locations.add(loc.lower())
+                        corrected, confidence, method = self.spell_corrector.correct_location(loc)
+                        if confidence > 0.5:
+                            entities['locations'].append({
+                                'original': loc,
+                                'corrected': corrected,
+                                'confidence': confidence,
+                                'method': method
+                            })
+        # If no locations found with fare patterns, try general patterns
+        if not entities['locations']:
+            for pattern in general_patterns:
+                matches = re.finditer(pattern, query, re.IGNORECASE)
+                for match in matches:
+                    locations = [loc.strip() for loc in match.groups() if loc.strip()]
+                    for loc in locations:
+                        # Skip if we've already processed this location
+                        if loc.lower() in seen_locations:
+                            continue
+                        seen_locations.add(loc.lower())
+                        corrected, confidence, method = self.spell_corrector.correct_location(loc)
+                        if confidence > 0.5:
+                            entities['locations'].append({
+                                'original': loc,
+                                'corrected': corrected,
+                                'confidence': confidence,
+                                'method': method
+                            })
+        # Extract numbers and currencies
+        number_patterns = [
+            r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
+            r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
+            r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
+        ]
+        for pattern in number_patterns:
+            matches = re.finditer(pattern, query, re.IGNORECASE)
+            for match in matches:
+                groups = match.groups()
+                if len(groups) >= 2:
+                    if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
+                        # Pattern: (under|below|less than|over|above|more than) (number) (currency)
+                        comparator = groups[0]
+                        number = groups[1]
+                        currency = groups[2] if len(groups) >= 3 else 'rupees'
+                        entities['numbers'].append({
+                            'value': float(number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': comparator
+                        })
+                    elif 'between' in pattern:
+                        # Pattern: between (number1) and (number2) (currency)
+                        min_number = groups[0]
+                        max_number = groups[1]
+                        currency = groups[2] if len(groups) >= 3 else 'rupees'
+                        entities['numbers'].append({
+                            'value': float(min_number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': 'between_min'
+                        })
+                        entities['numbers'].append({
+                            'value': float(max_number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': 'between_max'
+                        })
+                    else:
+                        # Pattern: (number) (currency)
+                        number = groups[0]
+                        currency = groups[1] if len(groups) >= 2 else 'rupees'
+                        entities['numbers'].append({
+                            'value': float(number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': None
+                        })
+        # Extract comparators
+        comparator_patterns = [
+            r'(cheaper|more\s+expensive|better|worse|faster|slower)',
+            r'(compare|difference|vs|versus)',
+            r'(under|below|less\s+than|over|above|more\s+than)'
+        ]
+        for pattern in comparator_patterns:
+            matches = re.finditer(pattern, query, re.IGNORECASE)
+            for match in matches:
+                entities['comparators'].append(match.group(1).lower())
+        return entities
+    def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
+        """Classify the intent of the query"""
+        intent_scores = {}
+        for intent, keywords in self.intent_keywords.items():
+            score = 0
+            for keyword in keywords:
+                if keyword in query:
+                    score += 1
+            intent_scores[intent] = score
+        # Get primary intent
+        primary_intent = max(intent_scores.items(), key=lambda x: x[1])
+        # Check for specific patterns with priority
+        if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
+            primary_intent = ('comparison', 10)
+        elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
+            primary_intent = ('recommendation', 10)
+        elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
+            primary_intent = ('range_search', 10)
+        elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
+            # Check if we have at least 2 locations
+            if entities and len(entities.get('locations', [])) >= 2:
+                primary_intent = ('fare_inquiry', 10)
+        elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
+            primary_intent = ('route_inquiry', 10)
+        return {
+            'primary': primary_intent[0],
+            'confidence': primary_intent[1] / 10,
+            'all_scores': intent_scores
+        }
+    def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
+        """Generate Cypher query using LLM for better understanding"""
+        try:
+            # Try LLM-based query generation first
+            llm_query = self._generate_cypher_with_llm(query, intent, entities)
+            if llm_query:
+                return llm_query
+        except Exception as e:
+            print(f"LLM query generation failed: {e}")
+        # Fallback to rule-based generation
+        primary_intent = intent['primary']
+        if primary_intent == 'fare_inquiry':
+            return self._generate_fare_query(entities)
+        elif primary_intent == 'comparison':
+            return self._generate_comparison_query(entities)
+        elif primary_intent == 'route_inquiry':
+            return self._generate_route_query(entities, query)
+        elif primary_intent == 'statistics':
+            return self._generate_statistics_query(entities)
+        elif primary_intent == 'recommendation':
+            return self._generate_recommendation_query(entities, query)
+        elif primary_intent == 'range_search':
+            return self._generate_range_query(entities)
+        else:
+            return self._generate_fallback_query(query)
+    def _generate_fare_query(self, entities: Dict) -> Optional[str]:
+        """Generate fare inquiry Cypher query"""
+        locations = entities.get('locations', [])
+        if len(locations) >= 2:
+            from_loc = locations[0]['corrected']
+            to_loc = locations[1]['corrected']
+            return f"""
+            MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare,
+                'Direct route' as route_type
+            """
+        return None
+    def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
+        """Generate comparison Cypher query"""
+        locations = entities.get('locations', [])
+        if len(locations) >= 3:
+            # Handle case where we have same origin, different destinations
+            if len(locations) == 3:
+                # Pattern: "Colombo to Kandy and Colombo to Anuradapura"
+                route1_from = locations[0]['corrected']
+                route1_to = locations[1]['corrected']
+                route2_from = locations[0]['corrected']  # Same origin
+                route2_to = locations[2]['corrected']
+            elif len(locations) >= 4:
+                # Pattern: "Colombo to Kandy and Anuradapura to Galle"
+                route1_from = locations[0]['corrected']
+                route1_to = locations[1]['corrected']
+                route2_from = locations[2]['corrected']
+                route2_to = locations[3]['corrected']
+            else:
+                return None
+            return f"""
+            MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
+            MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
+            RETURN
+                a1.name + ' to ' + b1.name as route1,
+                r1.fare as fare1,
+                a2.name + ' to ' + b2.name as route2,
+                r2.fare as fare2,
+                r1.fare - r2.fare as difference,
+                CASE
+                    WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
+                    WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
+                    ELSE 'Both routes have the same fare'
+                END as comparison
+            """
+        return None
+    def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
+        """Generate route inquiry Cypher query"""
+        locations = entities.get('locations', [])
+        if 'from' in query and locations:
+            location = locations[0]['corrected']
+            return f"""
+            MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare
+            """
+        elif 'to' in query and locations:
+            location = locations[0]['corrected']
+            return f"""
+            MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare
+            """
+        return None
+    def _generate_statistics_query(self, entities: Dict) -> str:
+        """Generate statistics Cypher query"""
+        return """
+        MATCH (p:Place)
+        MATCH ()-[r:Fare]->()
+        RETURN
+            count(DISTINCT p) as total_places,
+            count(r) as total_routes,
+            round(avg(r.fare), 2) as average_fare,
+            min(r.fare) as minimum_fare,
+            max(r.fare) as maximum_fare,
+            round(stdDev(r.fare), 2) as fare_standard_deviation
+        """
+    def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
+        """Generate recommendation Cypher query"""
+        if 'cheap' in query or 'budget' in query or 'affordable' in query:
+            return """
+            MATCH (a:Place)-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare ASC
+            LIMIT 10
+            """
+        elif 'popular' in query or 'frequent' in query:
+            return """
+            MATCH (a:Place)-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare DESC
+            LIMIT 10
+            """
+        else:
+            return """
+            MATCH (a:Place)-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare ASC
+            LIMIT 5
+            """
+    def _generate_range_query(self, entities: Dict) -> Optional[str]:
+        """Generate range search Cypher query"""
+        numbers = entities.get('numbers', [])
+        if numbers:
+            # Check for between range
+            between_min = None
+            between_max = None
+            single_value = None
+            single_comparator = None
+            for number in numbers:
+                comparator = number.get('comparator', '')
+                value = number['value']
+                if comparator == 'between_min':
+                    between_min = value
+                elif comparator == 'between_max':
+                    between_max = value
+                elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
+                    single_value = value
+                    single_comparator = comparator
+            # Generate query based on type
+            if between_min is not None and between_max is not None:
+                return f"""
+                MATCH (a:Place)-[r:Fare]->(b:Place)
+                WHERE r.fare >= {between_min} AND r.fare <= {between_max}
+                RETURN
+                    a.name as from_place,
+                    b.name as to_place,
+                    r.fare as fare
+                ORDER BY r.fare ASC
+                """
+            elif single_value is not None and single_comparator is not None:
+                if single_comparator in ['under', 'below', 'less than']:
+                    return f"""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    WHERE r.fare < {single_value}
+                    RETURN
+                        a.name as from_place,
+                        b.name as to_place,
+                        r.fare as fare
+                    ORDER BY r.fare ASC
+                    """
+                elif single_comparator in ['over', 'above', 'more than']:
+                    return f"""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    WHERE r.fare > {single_value}
+                    RETURN
+                        a.name as from_place,
+                        b.name as to_place,
+                        r.fare as fare
+                    ORDER BY r.fare DESC
+                    """
+        return None
+    def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
+        """Generate Cypher query using LLM for better understanding"""
+        try:
+            if not self.config.OPENAI_API_KEY:
+                return None
+            # Get available places for context
+            available_places = list(self.neo4j_service.get_all_places())
+            # Create comprehensive prompt for Cypher generation
+            prompt = f"""
+            You are a Neo4j Cypher query generator for a transport database.
+            Database Schema:
+            - Nodes: Place (with property 'name')
+            - Relationships: Fare (with property 'fare')
+            Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
+            User Query: "{query}"
+            Detected Intent: {intent.get('primary', 'unknown')}
+            Extracted Entities: {entities}
+            Your task is to generate a valid Cypher query that answers the user's question.
+            Query Types and Examples:
+            1. FARE INQUIRY:
+               - "What is the fare from Colombo to Kandy?"
+               - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
+            2. COMPARISON:
+               - "Compare fares from Colombo to Kandy vs Colombo to Galle"
+               - Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
+            3. RANGE SEARCH:
+               - "Find routes under 500 rupees"
+               - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
+            4. RECOMMENDATION:
+               - "Recommend cheap routes"
+               - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
+            5. STATISTICS:
+               - "What is the average fare?"
+               - Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
+            6. ROUTE INQUIRY:
+               - "Routes from Colombo"
+               - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
+            Important Rules:
+            1. Always use proper Cypher syntax
+            2. Use exact place names from the available places list
+            3. For comparisons, use multiple MATCH clauses
+            4. For ranges, use WHERE clauses with appropriate operators
+            5. For statistics, use aggregation functions
+            6. Always include meaningful column aliases
+            7. Use ORDER BY for sorted results
+            8. Use LIMIT for large result sets
+            Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
+            """
+            cypher_query = None
+            # Prefer new SDK
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
+                response = client.chat.completions.create(
+                    model=self.config.OPENAI_MODEL,
+                    messages=[
+                        {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=300,
+                    temperature=0.1
+                )
+                cypher_query = response.choices[0].message.content.strip()
+            except Exception as sdk_err:
+                import openai
+                try:
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    response = openai.ChatCompletion.create(
+                        model=self.config.OPENAI_MODEL,
+                        messages=[
+                            {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
+                            {"role": "user", "content": prompt}
+                        ],
+                        max_tokens=300,
+                        temperature=0.1
+                    )
+                    cypher_query = response.choices[0].message.content.strip()
+                except Exception:
+                    raise sdk_err
+            # Validate the response
+            if cypher_query.upper() == "FALLBACK":
+                return None
+            # Basic validation - check if it starts with MATCH
+            if cypher_query.upper().startswith('MATCH'):
+                return cypher_query
+            return None
+        except Exception as e:
+            print(f"LLM Cypher generation error: {e}")
+            return None
+    def _generate_fallback_query(self, query: str) -> Optional[str]:
+        """Generate fallback query when intent is unclear"""
+        # Try to extract locations using spell corrector
+        locations = self.spell_corrector.extract_locations_from_query(query)
+        if len(locations) >= 2:
+            from_loc = locations[0][1]
+            to_loc = locations[1][1]
+            return f"""
+            MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            """
+        # Additional fallback: direct pattern matching for fare queries
+        if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
+            import re
+            fare_patterns = [
+                r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
+            ]
+            for pattern in fare_patterns:
+                match = re.search(pattern, query.lower())
+                if match:
+                    from_loc = match.group(1).strip()
+                    to_loc = match.group(2).strip()
+                    # Correct locations
+                    from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
+                    to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
+                    if from_conf > 0.5 and to_conf > 0.5:
+                        return f"""
+                        MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
+                        RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                        """
+        return None
+    def _execute_query(self, cypher_query: str) -> List[Dict]:
+        """Execute Cypher query and return results"""
+        try:
+            with self.neo4j_service.driver.session() as session:
+                result = session.run(cypher_query)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Query execution error: {e}")
+            return []
+    def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
+        """Format the response based on intent and results"""
+        primary_intent = intent['primary']
+        if not results:
+            return {
+                'success': False,
+                'message': 'No results found for your query.',
+                'suggestions': self._get_suggestions()
+            }
+        if primary_intent == 'fare_inquiry':
+            return self._format_fare_response(results, entities)
+        elif primary_intent == 'comparison':
+            return self._format_comparison_response(results, entities)
+        elif primary_intent == 'route_inquiry':
+            return self._format_route_response(results, entities)
+        elif primary_intent == 'statistics':
+            return self._format_statistics_response(results)
+        elif primary_intent == 'recommendation':
+            return self._format_recommendation_response(results, query)
+        elif primary_intent == 'range_search':
+            return self._format_range_response(results, entities)
+        else:
+            return self._format_generic_response(results)
+    def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format fare inquiry response"""
+        if results:
+            result = results[0]
+            return {
+                'success': True,
+                'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
+                'data': results,
+                'query_type': 'fare_inquiry',
+                'summary': {
+                    'from_place': result['from_place'],
+                    'to_place': result['to_place'],
+                    'fare': result['fare']
+                }
+            }
+        return {'success': False, 'message': 'Fare information not found.'}
+    def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format comparison response"""
+        if results:
+            result = results[0]
+            return {
+                'success': True,
+                'message': result.get('comparison', 'Comparison completed'),
+                'data': results,
+                'query_type': 'comparison',
+                'summary': {
+                    'route1': result.get('route1'),
+                    'route2': result.get('route2'),
+                    'difference': result.get('difference')
+                }
+            }
+        return {'success': False, 'message': 'Comparison not possible.'}
+    def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format route inquiry response"""
+        return {
+            'success': True,
+            'message': f"Found {len(results)} routes",
+            'data': results,
+            'query_type': 'route_inquiry',
+            'summary': {
+                'total_routes': len(results),
+                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
+            }
+        }
+    def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
+        """Format statistics response"""
+        if results:
+            stats = results[0]
+            return {
+                'success': True,
+                'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
+                'data': results,
+                'query_type': 'statistics',
+                'summary': {
+                    'total_places': stats['total_places'],
+                    'total_routes': stats['total_routes'],
+                    'average_fare': stats['average_fare'],
+                    'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
+                }
+            }
+        return {'success': False, 'message': 'Statistics not available.'}
+    def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
+        """Format recommendation response"""
+        return {
+            'success': True,
+            'message': f"Here are {len(results)} recommended routes",
+            'data': results,
+            'query_type': 'recommendation',
+            'summary': {
+                'recommendations_count': len(results),
+                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
+            }
+        }
+    def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format range search response"""
+        return {
+            'success': True,
+            'message': f"Found {len(results)} routes in your specified range",
+            'data': results,
+            'query_type': 'range_search',
+            'summary': {
+                'routes_found': len(results),
+                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
+            }
+        }
+    def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
+        """Format generic response"""
+        return {
+            'success': True,
+            'message': f"Found {len(results)} results",
+            'data': results,
+            'query_type': 'generic'
+        }
+    def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
+        """Handle unclear or ambiguous queries"""
+        return {
+            'success': False,
+            'message': 'I could not understand your query. Please try rephrasing it.',
+            'suggestions': self._get_suggestions(),
+            'query_type': 'unclear'
+        }
+    def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
+        """Calculate confidence score for the query interpretation"""
+        confidence = 0.0
+        # Intent confidence
+        confidence += intent.get('confidence', 0) * 0.4
+        # Entity confidence
+        locations = entities.get('locations', [])
+        if locations:
+            avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
+            confidence += avg_location_confidence * 0.4
+        # Query complexity bonus
+        if len(locations) >= 2:
+            confidence += 0.2
+        return min(confidence, 1.0)
+    def _get_suggestions(self) -> List[str]:
+        """Get query suggestions"""
+        return [
+            "What is the fare from Colombo to Kandy?",
+            "Compare fares from Colombo to Kandy vs Colombo to Galle",
+            "Show me routes from Panadura",
+            "Find routes under 500 rupees",
+            "What are the cheapest routes?",
+            "Show me popular destinations",
+            "Give me database statistics",
+            "Recommend affordable routes"
+        ]

llm_query_processor.py ADDED Viewed

	@@ -0,0 +1,351 @@

+#!/usr/bin/env python3
+"""
+LLM-Based Query Processor for Transport Query Application
+Uses AI to interpret queries and generate Cypher queries
+"""
+import re
+from typing import Dict, List, Tuple, Optional
+from spell_corrector import SpellCorrector
+from neo4j_service import Neo4jService
+from config import Config
+class LLMQueryProcessor:
+    """Process natural language queries using LLM for interpretation and Cypher generation"""
+    def __init__(self):
+        self.config = Config()
+        self.spell_corrector = SpellCorrector()
+        self.neo4j_service = Neo4jService()
+    def process_query(self, user_query: str) -> Dict:
+        """
+        Process a natural language query using LLM for interpretation
+        Returns:
+            Dictionary with query results and metadata
+        """
+        try:
+            # First, extract and correct locations from the query
+            locations = self.spell_corrector.extract_locations_from_query(user_query)
+            # Use LLM to interpret the query and generate Cypher
+            interpretation = self._interpret_query_with_llm(user_query, locations)
+            if interpretation['success']:
+                # Execute the generated Cypher query
+                result = self._execute_cypher_query(interpretation['cypher_query'])
+                return {
+                    'success': True,
+                    'message': interpretation['message'],
+                    'cypher_query': interpretation['cypher_query'],
+                    'data': result,
+                    'corrections': self._format_corrections(locations),
+                    'query_type': interpretation['query_type']
+                }
+            else:
+                return {
+                    'success': False,
+                    'message': interpretation['message'],
+                    'suggestions': self._get_query_suggestions()
+                }
+        except Exception as e:
+            print(f"Query processing error: {e}")
+            return {
+                'success': False,
+                'message': 'An error occurred while processing your query.',
+                'suggestions': self._get_query_suggestions()
+            }
+    def _interpret_query_with_llm(self, query: str, locations: List[Tuple]) -> Dict:
+        """Use LLM to interpret the query and generate appropriate Cypher"""
+        try:
+            if not self.config.OPENAI_API_KEY:
+                return self._fallback_interpretation(query, locations)
+            # Get available places for context
+            available_places = list(self.neo4j_service.get_all_places())
+            # Create comprehensive prompt for query interpretation
+            prompt = f"""
+            You are an intelligent transport query interpreter for a Neo4j database containing Sri Lankan transport data.
+            Database Schema:
+            - Nodes: Place (with property 'name')
+            - Relationships: Fare (with property 'fare')
+            Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
+            User Query: "{query}"
+            Extracted Locations: {[f"{orig}->{corr}" for orig, corr, conf, method in locations]}
+            Your task is to:
+            1. Determine the query type (fare, cheapest, expensive, places, routes_from, routes_to, statistics, lowest_fare)
+            2. Generate the appropriate Cypher query
+            3. Provide a clear response message
+                         Query Types:
+             - fare: Find fare between two specific locations
+             - cheapest: Find cheapest routes (top 10)
+             - expensive: Find most expensive routes (top 10)
+             - places: List all places
+             - routes_from: Find routes departing from a location
+             - routes_to: Find routes arriving at a location
+             - statistics: Get database statistics
+             - lowest_fare: Find the single lowest fare with route details
+            Return your response in this exact JSON format:
+            {{
+                "query_type": "fare|cheapest|expensive|places|routes_from|routes_to|statistics|lowest_fare",
+                "cypher_query": "MATCH ... RETURN ...",
+                "message": "Clear response message for the user"
+            }}
+                         Examples:
+             - "What is the fare from Colombo to Kandy?" → fare query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
+             - "fare of anuradhapura to kandy?" → fare query: MATCH (a:Place {name: 'Anuradapura'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
+             - "Show me the cheapest routes" → cheapest query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
+             - "What is the lowest fare?" → lowest_fare query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 1
+             - "List all places" → places query: MATCH (p:Place) RETURN DISTINCT p.name as place ORDER BY p.name
+             - "Routes from Colombo" → routes_from query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
+             - "Database statistics" → statistics query: MATCH (p:Place) MATCH ()-[r:Fare]->() RETURN count(DISTINCT p) as total_places, count(r) as total_routes, avg(r.fare) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
+             Keep Cypher queries simple and avoid complex functions like shortestPath. Use direct relationships only.
+             For fare queries, recognize various formats like "fare of X to Y", "fare from X to Y", "price from X to Y", etc.
+            """
+            # Call LLM using new SDK first, legacy as fallback
+            import json
+            interpretation = None
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
+                response = client.chat.completions.create(
+                    model=self.config.OPENAI_MODEL,
+                    messages=[
+                        {"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=500,
+                    temperature=0.1
+                )
+                interpretation = json.loads(response.choices[0].message.content.strip())
+            except Exception as sdk_err:
+                try:
+                    import openai
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    response = openai.ChatCompletion.create(
+                        model=self.config.OPENAI_MODEL,
+                        messages=[
+                            {"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
+                            {"role": "user", "content": prompt}
+                        ],
+                        max_tokens=500,
+                        temperature=0.1
+                    )
+                    interpretation = json.loads(response.choices[0].message.content.strip())
+                except Exception:
+                    raise sdk_err
+            # Validate the response
+            if interpretation and 'query_type' in interpretation and 'cypher_query' in interpretation and 'message' in interpretation:
+                return {
+                    'success': True,
+                    'query_type': interpretation['query_type'],
+                    'cypher_query': interpretation['cypher_query'],
+                    'message': interpretation['message']
+                }
+            else:
+                return self._fallback_interpretation(query, locations)
+        except Exception as e:
+            print(f"LLM interpretation error: {e}")
+            return self._fallback_interpretation(query, locations)
+    def _fallback_interpretation(self, query: str, locations: List[Tuple]) -> Dict:
+        """Fallback interpretation when LLM is not available"""
+        query_lower = query.lower()
+        # Simple keyword-based interpretation
+        if 'lowest' in query_lower or 'minimum' in query_lower or 'cheapest' in query_lower:
+            if 'lowest fare' in query_lower or 'minimum fare' in query_lower:
+                                 return {
+                     'success': True,
+                     'query_type': 'lowest_fare',
+                     'cypher_query': """
+                     MATCH (a:Place)-[r:Fare]->(b:Place)
+                     WITH a, b, r, r.fare as fare
+                     ORDER BY r.fare ASC
+                     LIMIT 1
+                     RETURN a.name as from_place, b.name as to_place, fare
+                     """,
+                     'message': 'Finding the lowest fare in the database...'
+                 }
+            else:
+                return {
+                    'success': True,
+                    'query_type': 'cheapest',
+                    'cypher_query': """
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare ASC
+                    LIMIT 10
+                    """,
+                    'message': 'Finding the cheapest routes...'
+                }
+        elif 'expensive' in query_lower or 'highest' in query_lower or 'maximum' in query_lower:
+            return {
+                'success': True,
+                'query_type': 'expensive',
+                'cypher_query': """
+                MATCH (a:Place)-[r:Fare]->(b:Place)
+                RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                ORDER BY r.fare DESC
+                LIMIT 10
+                """,
+                'message': 'Finding the most expensive routes...'
+            }
+        elif 'places' in query_lower or 'locations' in query_lower or 'list all' in query_lower:
+            return {
+                'success': True,
+                'query_type': 'places',
+                'cypher_query': """
+                MATCH (p:Place)
+                RETURN DISTINCT p.name as place
+                ORDER BY p.name
+                """,
+                'message': 'Listing all places...'
+            }
+        elif 'statistics' in query_lower or 'stats' in query_lower:
+            return {
+                'success': True,
+                'query_type': 'statistics',
+                'cypher_query': """
+                MATCH (p:Place)
+                MATCH ()-[r:Fare]->()
+                RETURN
+                    count(DISTINCT p) as total_places,
+                    count(r) as total_routes,
+                    avg(r.fare) as average_fare,
+                    min(r.fare) as min_fare,
+                    max(r.fare) as max_fare
+                """,
+                'message': 'Getting database statistics...'
+            }
+        elif len(locations) >= 2:
+            # Fare query between two locations
+            from_location = locations[0][1]
+            to_location = locations[1][1]
+            return {
+                'success': True,
+                'query_type': 'fare',
+                'cypher_query': f"""
+                MATCH (a:Place {{name: '{from_location}'}})-[r:Fare]->(b:Place {{name: '{to_location}'}})
+                RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                """,
+                'message': f'Finding fare from {from_location} to {to_location}...'
+            }
+        elif 'fare' in query_lower and 'to' in query_lower:
+             # Handle queries like "fare of X to Y" where locations might not be extracted properly
+             # Try to extract locations using a simpler pattern
+             import re
+             fare_patterns = [
+                 r'fare\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'price\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'cost\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)',
+                 r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)'
+             ]
+             for pattern in fare_patterns:
+                 match = re.search(pattern, query_lower)
+                 if match:
+                     from_loc = match.group(1).strip()
+                     to_loc = match.group(2).strip()
+                     # Correct the locations
+                     from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
+                     to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
+                     if from_conf > 0.5 and to_conf > 0.5:
+                         return {
+                             'success': True,
+                             'query_type': 'fare',
+                             'cypher_query': f"""
+                             MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
+                             RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                             """,
+                             'message': f'Finding fare from {from_corrected} to {to_corrected}...'
+                         }
+        elif len(locations) == 1:
+            # Routes from/to a single location
+            location = locations[0][1]
+            if 'from' in query_lower:
+                return {
+                    'success': True,
+                    'query_type': 'routes_from',
+                    'cypher_query': f"""
+                    MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                    """,
+                    'message': f'Finding routes from {location}...'
+                }
+            else:
+                return {
+                    'success': True,
+                    'query_type': 'routes_to',
+                    'cypher_query': f"""
+                    MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                    """,
+                    'message': f'Finding routes to {location}...'
+                }
+        else:
+            return {
+                'success': False,
+                'message': 'I could not understand your query. Please try rephrasing it.'
+            }
+    def _execute_cypher_query(self, cypher_query: str) -> List[Dict]:
+        """Execute the generated Cypher query"""
+        try:
+            with self.neo4j_service.driver.session() as session:
+                result = session.run(cypher_query)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Cypher execution error: {e}")
+            return []
+    def _format_corrections(self, locations: List[Tuple]) -> List[Dict]:
+        """Format location corrections for display"""
+        corrections = []
+        for original, corrected, confidence, method in locations:
+            if original.lower() != corrected.lower():
+                corrections.append({
+                    'original': original,
+                    'corrected': corrected,
+                    'confidence': confidence,
+                    'method': method
+                })
+        return corrections
+    def _get_query_suggestions(self) -> List[str]:
+        """Get query suggestions"""
+        return [
+            "What is the fare from Colombo to Kandy?",
+            "What is the lowest fare price?",
+            "Show me the cheapest routes",
+            "Show me the most expensive routes",
+            "List all places",
+            "Routes from Panadura",
+            "Routes to Galle",
+            "Database statistics"
+        ]

logger.py ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env python3
+"""
+Centralized logging setup for the Transport Query Application.
+Provides a rotating file handler and console output.
+"""
+import logging
+import os
+from logging.handlers import RotatingFileHandler
+def get_logger(name: str) -> logging.Logger:
+    """Create or retrieve a configured logger with file and console handlers."""
+    logger = logging.getLogger(name)
+    if getattr(logger, "_configured", False):
+        return logger
+    log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
+    log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
+    try:
+        log_dir = os.path.abspath(log_dir)
+        os.makedirs(log_dir, exist_ok=True)
+    except Exception:
+        # Fallback to current directory if path invalid
+        log_dir = os.getcwd()
+    log_path = os.path.join(log_dir, "app.log")
+    formatter = logging.Formatter(
+        fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    # File handler with rotation (1 MB, keep 5 backups)
+    file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
+    file_handler.setFormatter(formatter)
+    # Console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(formatter)
+    # Configure logger
+    logger.setLevel(getattr(logging, log_level_str, logging.INFO))
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    logger.propagate = False
+    logger._configured = True  # type: ignore[attr-defined]
+    logger.debug(f"Logger initialized. Level={log_level_str}, File={log_path}")
+    return logger

neo4j_service.py ADDED Viewed

	@@ -0,0 +1,222 @@

+#!/usr/bin/env python3
+"""
+Neo4j Service for Transport Query Application
+Handles all database operations
+"""
+from neo4j import GraphDatabase
+from typing import List, Dict, Optional, Tuple
+from config import Config
+class Neo4jService:
+    """Neo4j database service"""
+    def __init__(self):
+        self.config = Config()
+        self.driver = None
+        self._connect()
+    def _connect(self):
+        """Connect to Neo4j database"""
+        try:
+            self.driver = GraphDatabase.driver(
+                self.config.NEO4J_URI,
+                auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
+            )
+            # Test connection
+            with self.driver.session() as session:
+                session.run("RETURN 1")
+            print("✅ Connected to Neo4j database")
+        except Exception as e:
+            print(f"❌ Failed to connect to Neo4j: {e}")
+            self.driver = None
+    def is_connected(self) -> bool:
+        """Check if connected to Neo4j"""
+        return self.driver is not None
+    def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
+        """Get fare between two locations"""
+        if not self.is_connected():
+            return None
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                """, from_location=from_location, to_location=to_location)
+                record = result.single()
+                if record:
+                    return {
+                        'from_place': record['from_place'],
+                        'to_place': record['to_place'],
+                        'fare': record['fare']
+                    }
+                return None
+        except Exception as e:
+            print(f"Error getting fare: {e}")
+            return None
+    def get_all_places(self) -> List[str]:
+        """Get all available places"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (p:Place)
+                    RETURN DISTINCT p.name as place
+                    ORDER BY p.name
+                """)
+                return [record['place'] for record in result]
+        except Exception as e:
+            print(f"Error getting places: {e}")
+            return []
+    def get_routes_from_location(self, from_location: str) -> List[Dict]:
+        """Get all routes from a specific location"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                """, from_location=from_location)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting routes from location: {e}")
+            return []
+    def get_routes_to_location(self, to_location: str) -> List[Dict]:
+        """Get all routes to a specific location"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                """, to_location=to_location)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting routes to location: {e}")
+            return []
+    def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
+        """Get cheapest routes"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare ASC
+                    LIMIT $limit
+                """, limit=limit)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting cheapest routes: {e}")
+            return []
+    def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
+        """Get most expensive routes"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare DESC
+                    LIMIT $limit
+                """, limit=limit)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting most expensive routes: {e}")
+            return []
+    def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
+        """Search routes within a fare range"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    WHERE r.fare >= $min_fare AND r.fare <= $max_fare
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                """, min_fare=min_fare, max_fare=max_fare)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error searching routes by fare range: {e}")
+            return []
+    def get_route_statistics(self) -> Dict:
+        """Get database statistics"""
+        if not self.is_connected():
+            return {}
+        try:
+            with self.driver.session() as session:
+                # Count places
+                places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
+                place_count = places_result.single()['place_count']
+                # Count routes
+                routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
+                route_count = routes_result.single()['route_count']
+                # Average fare
+                avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
+                avg_fare = avg_result.single()['avg_fare']
+                # Min and max fares
+                fare_range_result = session.run("""
+                    MATCH ()-[r:Fare]->()
+                    RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
+                """)
+                fare_range = fare_range_result.single()
+                return {
+                    'total_places': place_count,
+                    'total_routes': route_count,
+                    'average_fare': round(avg_fare, 2) if avg_fare else 0,
+                    'min_fare': fare_range['min_fare'],
+                    'max_fare': fare_range['max_fare']
+                }
+        except Exception as e:
+            print(f"Error getting statistics: {e}")
+            return {}
+    def close(self):
+        """Close database connection"""
+        if self.driver:
+            self.driver.close()
+            self.driver = None

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+Flask==2.3.3
+neo4j==5.14.1
+requests==2.31.0
+openai==1.3.0
+python-dotenv==1.0.0
+fuzzywuzzy==0.18.0
+python-Levenshtein==0.23.0
+pandas==2.1.3
+numpy==1.24.3
+Werkzeug==2.3.7

spell_corrector.py ADDED Viewed

	@@ -0,0 +1,257 @@

+#!/usr/bin/env python3
+"""
+Spell Correction Module for Transport Query Application
+Handles location name corrections using fuzzy matching and LLM
+"""
+import re
+from fuzzywuzzy import fuzz
+from typing import List, Tuple, Optional
+import openai
+from config import Config
+class SpellCorrector:
+    """Spell correction for location names"""
+    def __init__(self):
+        self.config = Config()
+        self.location_mapping = self.config.LOCATION_MAPPING
+        self.available_locations = set(self.location_mapping.values())
+        # Initialize OpenAI if API key is available
+        if self.config.OPENAI_API_KEY:
+            try:
+                # Prefer new SDK client if installed; otherwise set legacy api key
+                try:
+                    from openai import OpenAI  # noqa: F401
+                    self.llm_available = True
+                except Exception:
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    self.llm_available = True
+            except Exception:
+                self.llm_available = False
+        else:
+            self.llm_available = False
+    def correct_location(self, location: str) -> Tuple[str, float, str]:
+        """
+        Correct a location name using multiple methods
+        Returns:
+            Tuple of (corrected_name, confidence_score, correction_method)
+        """
+        location = location.strip().lower()
+        # Method 1: Direct mapping
+        if location in self.location_mapping:
+            corrected = self.location_mapping[location]
+            return corrected, 1.0, "direct_mapping"
+        # Method 2: Fuzzy matching
+        best_match, confidence = self._fuzzy_match(location)
+        if confidence >= self.config.SIMILARITY_THRESHOLD:
+            return best_match, confidence, "fuzzy_matching"
+        # Method 3: LLM correction (if available)
+        if self.llm_available:
+            llm_corrected = self._llm_correct(location)
+            if llm_corrected:
+                # Verify LLM suggestion with fuzzy matching
+                llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
+                if llm_confidence >= 0.6:  # Lower threshold for LLM suggestions
+                    return llm_corrected, llm_confidence, "llm_correction"
+        # Method 4: Partial matching
+        partial_match = self._partial_match(location)
+        if partial_match:
+            return partial_match, 0.7, "partial_matching"
+        # No correction found
+        return location.title(), 0.0, "no_correction"
+    def _fuzzy_match(self, location: str) -> Tuple[str, float]:
+        """Find best fuzzy match for location"""
+        best_match = None
+        best_score = 0
+        for available_location in self.available_locations:
+            score = fuzz.ratio(location.lower(), available_location.lower()) / 100
+            if score > best_score:
+                best_score = score
+                best_match = available_location
+        return best_match, best_score
+    def _partial_match(self, location: str) -> Optional[str]:
+        """Find partial matches (substring matching)"""
+        location_lower = location.lower()
+        for available_location in self.available_locations:
+            available_lower = available_location.lower()
+            # Check if location is contained in available location
+            if location_lower in available_lower or available_lower in location_lower:
+                return available_location
+        return None
+    def _llm_correct(self, location: str) -> Optional[str]:
+        """Use LLM to correct location name"""
+        try:
+            prompt = f"""
+            You are a location name correction system for Sri Lankan cities and towns.
+            Given a potentially misspelled location name, return the correct spelling.
+            Available locations include: {', '.join(sorted(self.available_locations))}
+            Input location: "{location}"
+            Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
+            """
+            corrected = None
+            # Try new SDK first
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
+                response = client.chat.completions.create(
+                    model=self.config.OPENAI_MODEL,
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant that corrects location names."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=50,
+                    temperature=0.1
+                )
+                corrected = response.choices[0].message.content.strip()
+            except Exception as sdk_err:
+                # Fallback to legacy API if present
+                import openai
+                try:
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    response = openai.ChatCompletion.create(
+                        model=self.config.OPENAI_MODEL,
+                        messages=[
+                            {"role": "system", "content": "You are a helpful assistant that corrects location names."},
+                            {"role": "user", "content": prompt}
+                        ],
+                        max_tokens=50,
+                        temperature=0.1
+                    )
+                    corrected = response.choices[0].message.content.strip()
+                except Exception:
+                    raise sdk_err
+            # Validate LLM response
+            if corrected.upper() == "UNKNOWN":
+                return None
+            # Check if corrected location exists in our database
+            if corrected in self.available_locations:
+                return corrected
+            # Try fuzzy matching on LLM response
+            llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
+            if confidence >= 0.8:
+                return llm_fuzzy_match
+            return None
+        except Exception as e:
+            print(f"LLM correction error: {e}")
+            return None
+    def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
+        """
+        Extract and correct locations from a natural language query
+        Returns:
+            List of tuples: (original, corrected, confidence, method)
+        """
+        # Common patterns for location extraction
+        patterns = [
+             r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
+             r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
+         ]
+        locations = []
+        # Try all patterns to find locations
+        for pattern in patterns:
+            match = re.search(pattern, query, re.IGNORECASE)
+            if match:
+                # Extract locations from the match
+                groups = match.groups()
+                if len(groups) >= 2:
+                    from_location = groups[0].strip()
+                    to_location = groups[1].strip()
+                    # Skip if locations are too short or common words
+                    if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
+                        from_corrected, from_confidence, from_method = self.correct_location(from_location)
+                        if from_confidence > 0.5:
+                            locations.append((
+                                from_location,
+                                from_corrected,
+                                from_confidence,
+                                from_method
+                            ))
+                    if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
+                        to_corrected, to_confidence, to_method = self.correct_location(to_location)
+                        if to_confidence > 0.5:
+                            locations.append((
+                                to_location,
+                                to_corrected,
+                                to_confidence,
+                                to_method
+                            ))
+                    # If we found locations, break to avoid duplicates
+                    if len(locations) >= 2:
+                        break
+        return locations
+    def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
+        """Get location suggestions for autocomplete"""
+        suggestions = []
+        partial_lower = partial_location.lower()
+        for location in self.available_locations:
+            location_lower = location.lower()
+            # Check if partial location is a prefix
+            if location_lower.startswith(partial_lower):
+                suggestions.append((location, 1.0))
+            # Check fuzzy similarity
+            elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
+                suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
+        # Sort by confidence and return top suggestions
+        suggestions.sort(key=lambda x: x[1], reverse=True)
+        return suggestions[:self.config.MAX_SUGGESTIONS]
+    def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
+        """Validate if a route exists in the database"""
+        from_corrected, from_confidence, _ = self.correct_location(from_location)
+        to_corrected, to_confidence, _ = self.correct_location(to_location)
+        if from_confidence < 0.5:
+            return False, f"Could not identify departure location: '{from_location}'"
+        if to_confidence < 0.5:
+            return False, f"Could not identify destination location: '{to_location}'"
+        if from_corrected == to_corrected:
+            return False, f"Departure and destination cannot be the same: '{from_corrected}'"
+        return True, f"Route: {from_corrected} → {to_corrected}"

templates/index.html ADDED Viewed

	@@ -0,0 +1,977 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>🚌 Transport Query System</title>
+    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            color: #333;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        .header {
+            text-align: center;
+            margin-bottom: 30px;
+            color: white;
+        }
+        .header h1 {
+            font-size: 2.5rem;
+            margin-bottom: 10px;
+            text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
+        }
+        .header p {
+            font-size: 1.1rem;
+            opacity: 0.9;
+        }
+        .main-content {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 30px;
+            margin-bottom: 30px;
+        }
+        .query-section {
+            background: white;
+            border-radius: 15px;
+            padding: 30px;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+        }
+        .query-section h2 {
+            color: #667eea;
+            margin-bottom: 20px;
+            font-size: 1.5rem;
+        }
+        .query-input {
+            position: relative;
+            margin-bottom: 20px;
+        }
+        .query-input input {
+            width: 100%;
+            padding: 15px 20px;
+            border: 2px solid #e1e5e9;
+            border-radius: 10px;
+            font-size: 1rem;
+            transition: all 0.3s ease;
+        }
+        .query-input input:focus {
+            outline: none;
+            border-color: #667eea;
+            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
+        }
+        .query-button {
+            width: 100%;
+            padding: 15px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border: none;
+            border-radius: 10px;
+            font-size: 1.1rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s ease;
+        }
+        .query-button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 5px 15px rgba(0,0,0,0.2);
+        }
+        .query-button:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .results-section {
+            background: white;
+            border-radius: 15px;
+            padding: 30px;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+            max-height: 600px;
+            overflow-y: auto;
+        }
+        .results-section h2 {
+            color: #667eea;
+            margin-bottom: 20px;
+            font-size: 1.5rem;
+        }
+        .result-item {
+            background: #f8f9fa;
+            border-radius: 10px;
+            padding: 20px;
+            margin-bottom: 15px;
+            border-left: 4px solid #667eea;
+        }
+        .result-item.success {
+            border-left-color: #28a745;
+        }
+        .result-item.error {
+            border-left-color: #dc3545;
+        }
+        .result-message {
+            font-size: 1.1rem;
+            margin-bottom: 10px;
+            font-weight: 500;
+        }
+        .result-data {
+            background: #e9ecef;
+            border-radius: 8px;
+            padding: 15px;
+            margin: 10px 0;
+            font-family: 'Courier New', monospace;
+            font-size: 0.9rem;
+            overflow-x: auto;
+        }
+        .corrections {
+            margin-top: 15px;
+            padding: 10px;
+            background: #fff3cd;
+            border-radius: 8px;
+            border: 1px solid #ffeaa7;
+        }
+        .correction-item {
+            margin: 5px 0;
+            font-size: 0.9rem;
+        }
+        .suggestions {
+            margin-top: 15px;
+        }
+        .suggestion-item {
+            background: #e3f2fd;
+            border-radius: 5px;
+            padding: 8px 12px;
+            margin: 5px 0;
+            cursor: pointer;
+            transition: background 0.3s ease;
+        }
+        .suggestion-item:hover {
+            background: #bbdefb;
+        }
+        .chat-container {
+            background: white;
+            border-radius: 15px;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+            margin-bottom: 30px;
+            overflow: hidden;
+        }
+        .chat-header {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 20px 30px;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+        .chat-header h2 {
+            margin: 0;
+            font-size: 1.5rem;
+        }
+        .chat-status {
+            font-size: 0.9rem;
+            opacity: 0.9;
+        }
+        .chat-body {
+            display: grid;
+            grid-template-columns: 1fr 300px;
+            min-height: 500px;
+        }
+        .chat-messages {
+            padding: 20px;
+            max-height: 400px;
+            overflow-y: auto;
+            border-right: 1px solid #e9ecef;
+        }
+        .message {
+            margin-bottom: 20px;
+            display: flex;
+            align-items: flex-start;
+            gap: 10px;
+        }
+        .message.user {
+            flex-direction: row-reverse;
+        }
+        .message-avatar {
+            width: 40px;
+            height: 40px;
+            border-radius: 50%;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-size: 1.2rem;
+            flex-shrink: 0;
+        }
+        .message.user .message-avatar {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+        }
+        .message.bot .message-avatar {
+            background: #f8f9fa;
+            color: #667eea;
+            border: 2px solid #667eea;
+        }
+        .message-content {
+            flex: 1;
+            max-width: 70%;
+        }
+        .message.user .message-content {
+            text-align: right;
+        }
+        .message-bubble {
+            background: #f8f9fa;
+            border-radius: 15px;
+            padding: 15px;
+            display: inline-block;
+            max-width: 100%;
+            word-wrap: break-word;
+        }
+        .message.user .message-bubble {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+        }
+        .message-time {
+            font-size: 0.8rem;
+            color: #6c757d;
+            margin-top: 5px;
+        }
+        .message.user .message-time {
+            text-align: right;
+        }
+        .chat-input-section {
+            padding: 20px;
+            border-top: 1px solid #e9ecef;
+            background: #f8f9fa;
+        }
+        .chat-input-container {
+            display: flex;
+            gap: 10px;
+            align-items: center;
+        }
+        .chat-input {
+            flex: 1;
+            padding: 12px 15px;
+            border: 2px solid #e1e5e9;
+            border-radius: 25px;
+            font-size: 1rem;
+            transition: all 0.3s ease;
+        }
+        .chat-input:focus {
+            outline: none;
+            border-color: #667eea;
+            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
+        }
+        .chat-send-btn {
+            width: 45px;
+            height: 45px;
+            border-radius: 50%;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border: none;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+        }
+        .chat-send-btn:hover {
+            transform: scale(1.1);
+        }
+        .chat-send-btn:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .examples-sidebar {
+            background: #f8f9fa;
+            padding: 20px;
+            border-left: 1px solid #e9ecef;
+        }
+        .examples-sidebar h3 {
+            color: #667eea;
+            margin-bottom: 15px;
+            font-size: 1.2rem;
+        }
+        .example-categories {
+            display: flex;
+            flex-direction: column;
+            gap: 10px;
+        }
+        .example-category {
+            background: white;
+            border-radius: 10px;
+            padding: 15px;
+            border: 2px solid transparent;
+            transition: all 0.3s ease;
+        }
+        .example-category:hover {
+            border-color: #667eea;
+            transform: translateX(5px);
+        }
+        .category-title {
+            font-weight: 600;
+            color: #667eea;
+            margin-bottom: 10px;
+            font-size: 0.9rem;
+        }
+        .example-queries {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+        .example-query-btn {
+            background: #e3f2fd;
+            border: none;
+            border-radius: 8px;
+            padding: 8px 12px;
+            text-align: left;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            font-size: 0.85rem;
+            color: #333;
+        }
+        .example-query-btn:hover {
+            background: #bbdefb;
+            transform: translateX(3px);
+        }
+        .example-query-btn i {
+            margin-right: 5px;
+            color: #667eea;
+        }
+        .data-display {
+            background: #f8f9fa;
+            border-radius: 10px;
+            padding: 15px;
+            margin-top: 10px;
+        }
+        .data-display h4 {
+            color: #667eea;
+            margin-bottom: 10px;
+            font-size: 1rem;
+        }
+        .data-table {
+            width: 100%;
+            border-collapse: collapse;
+            background: white;
+            border-radius: 8px;
+            overflow: hidden;
+            font-size: 0.85rem;
+        }
+        .data-table th,
+        .data-table td {
+            padding: 8px 12px;
+            text-align: left;
+            border-bottom: 1px solid #e9ecef;
+        }
+        .data-table th {
+            background: #667eea;
+            color: white;
+            font-weight: 600;
+        }
+        .data-table tr:hover {
+            background: #f8f9fa;
+        }
+        .corrections {
+            background: #fff3cd;
+            border: 1px solid #ffeaa7;
+            border-radius: 8px;
+            padding: 10px;
+            margin-top: 10px;
+            font-size: 0.85rem;
+        }
+        .corrections strong {
+            color: #856404;
+        }
+        .correction-item {
+            margin: 3px 0;
+            color: #856404;
+        }
+        .suggestions {
+            margin-top: 10px;
+        }
+        .suggestion-item {
+            background: #e3f2fd;
+            border-radius: 5px;
+            padding: 6px 10px;
+            margin: 3px 0;
+            cursor: pointer;
+            transition: background 0.3s ease;
+            font-size: 0.85rem;
+        }
+        .suggestion-item:hover {
+            background: #bbdefb;
+        }
+        @media (max-width: 768px) {
+            .chat-body {
+                grid-template-columns: 1fr;
+            }
+            .examples-sidebar {
+                border-left: none;
+                border-top: 1px solid #e9ecef;
+            }
+            .message-content {
+                max-width: 85%;
+            }
+        }
+        .status-bar {
+            background: white;
+            border-radius: 15px;
+            padding: 20px;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
+            margin-bottom: 20px;
+        }
+        .status-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+            gap: 20px;
+        }
+        .status-item {
+            text-align: center;
+            padding: 15px;
+            background: #f8f9fa;
+            border-radius: 10px;
+        }
+        .status-label {
+            font-size: 0.9rem;
+            color: #6c757d;
+            margin-bottom: 5px;
+        }
+        .status-value {
+            font-size: 1.5rem;
+            font-weight: 600;
+            color: #667eea;
+        }
+        .status-value.success {
+            color: #28a745;
+        }
+        .status-value.error {
+            color: #dc3545;
+        }
+        .loading {
+            display: none;
+            text-align: center;
+            padding: 20px;
+        }
+        .loading i {
+            font-size: 2rem;
+            color: #667eea;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        .table-container {
+            overflow-x: auto;
+            margin-top: 15px;
+        }
+        .data-table {
+            width: 100%;
+            border-collapse: collapse;
+            background: white;
+            border-radius: 8px;
+            overflow: hidden;
+        }
+        .data-table th,
+        .data-table td {
+            padding: 12px;
+            text-align: left;
+            border-bottom: 1px solid #e9ecef;
+        }
+        .data-table th {
+            background: #667eea;
+            color: white;
+            font-weight: 600;
+        }
+        .data-table tr:hover {
+            background: #f8f9fa;
+        }
+        @media (max-width: 768px) {
+            .main-content {
+                grid-template-columns: 1fr;
+            }
+            .header h1 {
+                font-size: 2rem;
+            }
+            .container {
+                padding: 10px;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1><i class="fas fa-bus"></i> Transport Query System</h1>
+            <p>Ask questions about transport fares and routes with intelligent spell correction</p>
+        </div>
+        <div class="status-bar">
+            <div class="status-grid">
+                <div class="status-item">
+                    <div class="status-label">Neo4j Status</div>
+                    <div class="status-value" id="neo4j-status">Checking...</div>
+                </div>
+                <div class="status-item">
+                    <div class="status-label">Total Places</div>
+                    <div class="status-value" id="total-places">-</div>
+                </div>
+                <div class="status-item">
+                    <div class="status-label">Total Routes</div>
+                    <div class="status-value" id="total-routes">-</div>
+                </div>
+                <div class="status-item">
+                    <div class="status-label">Average Fare</div>
+                    <div class="status-value" id="avg-fare">-</div>
+                </div>
+            </div>
+        </div>
+        <div class="chat-container">
+            <div class="chat-header">
+                <h2><i class="fas fa-comments"></i> Transport Query Chat</h2>
+                <div class="chat-status">
+                    <i class="fas fa-circle" id="status-indicator"></i>
+                    <span id="status-text">Ready</span>
+                </div>
+            </div>
+            <div class="chat-body">
+                <div class="chat-messages" id="chat-messages">
+                    <div class="message bot">
+                        <div class="message-avatar">
+                            <i class="fas fa-robot"></i>
+                        </div>
+                        <div class="message-content">
+                            <div class="message-bubble">
+                                Hello! I'm your transport assistant. I can help you find fares, compare routes, and get transport information. Try clicking on an example query or type your own question!
+                            </div>
+                            <div class="message-time" id="welcome-time"></div>
+                        </div>
+                    </div>
+                </div>
+                <div class="examples-sidebar">
+                    <h3><i class="fas fa-lightbulb"></i> Example Queries</h3>
+                    <div class="example-categories" id="example-categories">
+                        <!-- Example categories will be loaded here -->
+                    </div>
+                </div>
+            </div>
+            <div class="chat-input-section">
+                <div class="chat-input-container">
+                    <input type="text" class="chat-input" id="chat-input" placeholder="Type your transport question here..." autocomplete="off">
+                    <button class="chat-send-btn" id="chat-send-btn" onclick="sendChatMessage()">
+                        <i class="fas fa-paper-plane"></i>
+                    </button>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        // Global variables
+        let currentQuery = '';
+        // Initialize the application
+        document.addEventListener('DOMContentLoaded', function() {
+            loadStatus();
+            loadExampleCategories();
+            setupChatEventListeners();
+            setWelcomeTime();
+        });
+        function setupChatEventListeners() {
+            const chatInput = document.getElementById('chat-input');
+            const chatSendBtn = document.getElementById('chat-send-btn');
+            // Enter key to send message
+            chatInput.addEventListener('keypress', function(e) {
+                if (e.key === 'Enter') {
+                    sendChatMessage();
+                }
+            });
+            // Input validation
+            chatInput.addEventListener('input', function() {
+                chatSendBtn.disabled = !this.value.trim();
+            });
+        }
+        function setWelcomeTime() {
+            const now = new Date();
+            const timeString = now.toLocaleTimeString();
+            document.getElementById('welcome-time').textContent = timeString;
+        }
+        async function loadStatus() {
+            try {
+                const response = await fetch('/api/status');
+                const data = await response.json();
+                document.getElementById('neo4j-status').textContent = data.neo4j_connected ? 'Connected' : 'Disconnected';
+                document.getElementById('neo4j-status').className = data.neo4j_connected ? 'status-value success' : 'status-value error';
+                if (data.statistics) {
+                    document.getElementById('total-places').textContent = data.statistics.total_places || 0;
+                    document.getElementById('total-routes').textContent = data.statistics.total_routes || 0;
+                    document.getElementById('avg-fare').textContent = `Rs. ${data.statistics.average_fare || 0}`;
+                }
+                // Update chat status
+                const statusIndicator = document.getElementById('status-indicator');
+                const statusText = document.getElementById('status-text');
+                if (data.neo4j_connected) {
+                    statusIndicator.style.color = '#28a745';
+                    statusText.textContent = 'Connected to Database';
+                } else {
+                    statusIndicator.style.color = '#dc3545';
+                    statusText.textContent = 'Database Disconnected';
+                }
+            } catch (error) {
+                console.error('Error loading status:', error);
+                document.getElementById('status-indicator').style.color = '#dc3545';
+                document.getElementById('status-text').textContent = 'Connection Error';
+            }
+        }
+        async function loadExampleCategories() {
+            try {
+                const response = await fetch('/api/examples');
+                const data = await response.json();
+                const categoriesContainer = document.getElementById('example-categories');
+                categoriesContainer.innerHTML = '';
+                data.examples.forEach(category => {
+                    const categoryDiv = document.createElement('div');
+                    categoryDiv.className = 'example-category';
+                    let categoryHTML = `<div class="category-title">${category.category}</div>`;
+                    categoryHTML += '<div class="example-queries">';
+                    category.examples.forEach(example => {
+                        categoryHTML += `
+                            <button class="example-query-btn" onclick="useExampleQuery('${example.query.replace(/'/g, "\\'")}')">
+                                <i class="fas fa-arrow-right"></i>
+                                ${example.query}
+                            </button>
+                        `;
+                    });
+                    categoryHTML += '</div>';
+                    categoryDiv.innerHTML = categoryHTML;
+                    categoriesContainer.appendChild(categoryDiv);
+                });
+            } catch (error) {
+                console.error('Error loading example categories:', error);
+            }
+        }
+        function useExampleQuery(query) {
+            document.getElementById('chat-input').value = query;
+            document.getElementById('chat-send-btn').disabled = false;
+            sendChatMessage();
+        }
+        async function sendChatMessage() {
+            const chatInput = document.getElementById('chat-input');
+            const query = chatInput.value.trim();
+            if (!query) return;
+            currentQuery = query;
+            // Add user message to chat
+            addChatMessage('user', query);
+            // Clear input
+            chatInput.value = '';
+            document.getElementById('chat-send-btn').disabled = true;
+            // Show typing indicator
+            showTypingIndicator();
+            try {
+                const response = await fetch('/api/query', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({ query: query })
+                });
+                const result = await response.json();
+                // Remove typing indicator
+                removeTypingIndicator();
+                // Add bot response
+                addBotResponse(result);
+            } catch (error) {
+                removeTypingIndicator();
+                addChatMessage('bot', 'Sorry, I encountered an error processing your query. Please try again.');
+            }
+        }
+        function addChatMessage(sender, message) {
+            const chatMessages = document.getElementById('chat-messages');
+            const now = new Date();
+            const timeString = now.toLocaleTimeString();
+            const messageDiv = document.createElement('div');
+            messageDiv.className = `message ${sender}`;
+            const avatarIcon = sender === 'user' ? 'fas fa-user' : 'fas fa-robot';
+            messageDiv.innerHTML = `
+                <div class="message-avatar">
+                    <i class="${avatarIcon}"></i>
+                </div>
+                <div class="message-content">
+                    <div class="message-bubble">${message}</div>
+                    <div class="message-time">${timeString}</div>
+                </div>
+            `;
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+        function addBotResponse(result) {
+            const chatMessages = document.getElementById('chat-messages');
+            const now = new Date();
+            const timeString = now.toLocaleTimeString();
+            const messageDiv = document.createElement('div');
+            messageDiv.className = 'message bot';
+            let responseContent = '';
+            if (result.success) {
+                responseContent = `<div class="message-bubble">${result.message}</div>`;
+                // Add data display if available
+                if (result.data && Array.isArray(result.data) && result.data.length > 0) {
+                    responseContent += createChatDataDisplay(result.data);
+                }
+                // Add corrections if any
+                if (result.corrections && result.corrections.length > 0) {
+                    responseContent += createCorrectionsDisplay(result.corrections);
+                }
+                // Add suggestions if any
+                if (result.suggestions && result.suggestions.length > 0) {
+                    responseContent += createSuggestionsDisplay(result.suggestions);
+                }
+            } else {
+                responseContent = `<div class="message-bubble">Sorry, I couldn't process your query: ${result.message}</div>`;
+            }
+            messageDiv.innerHTML = `
+                <div class="message-avatar">
+                    <i class="fas fa-robot"></i>
+                </div>
+                <div class="message-content">
+                    ${responseContent}
+                    <div class="message-time">${timeString}</div>
+                </div>
+            `;
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+        function createChatDataDisplay(data) {
+            if (!data || data.length === 0) return '';
+            const keys = Object.keys(data[0]);
+            let html = '<div class="data-display"><h4>Results:</h4><table class="data-table"><thead><tr>';
+            // Headers
+            keys.forEach(key => {
+                html += `<th>${key.replace(/_/g, ' ').toUpperCase()}</th>`;
+            });
+            html += '</tr></thead><tbody>';
+            // Rows (limit to first 5 for chat)
+            data.slice(0, 5).forEach(row => {
+                html += '<tr>';
+                keys.forEach(key => {
+                    html += `<td>${row[key]}</td>`;
+                });
+                html += '</tr>';
+            });
+            html += '</tbody></table>';
+            if (data.length > 5) {
+                html += `<p style="font-size: 0.8rem; color: #6c757d; margin-top: 5px;">Showing first 5 of ${data.length} results</p>`;
+            }
+            html += '</div>';
+            return html;
+        }
+        function createCorrectionsDisplay(corrections) {
+            let html = '<div class="corrections"><strong>Spell Corrections:</strong>';
+            corrections.forEach(correction => {
+                html += `<div class="correction-item">"${correction.original}" → "${correction.corrected}" (${correction.method})</div>`;
+            });
+            html += '</div>';
+            return html;
+        }
+        function createSuggestionsDisplay(suggestions) {
+            let html = '<div class="suggestions"><strong>Suggestions:</strong>';
+            suggestions.forEach(suggestion => {
+                html += `<div class="suggestion-item" onclick="useSuggestion('${suggestion}')">${suggestion}</div>`;
+            });
+            html += '</div>';
+            return html;
+        }
+        function showTypingIndicator() {
+            const chatMessages = document.getElementById('chat-messages');
+            const typingDiv = document.createElement('div');
+            typingDiv.className = 'message bot';
+            typingDiv.id = 'typing-indicator';
+            typingDiv.innerHTML = `
+                <div class="message-avatar">
+                    <i class="fas fa-robot"></i>
+                </div>
+                <div class="message-content">
+                    <div class="message-bubble">
+                        <i class="fas fa-spinner fa-spin"></i> Processing...
+                    </div>
+                </div>
+            `;
+            chatMessages.appendChild(typingDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+        function removeTypingIndicator() {
+            const typingIndicator = document.getElementById('typing-indicator');
+            if (typingIndicator) {
+                typingIndicator.remove();
+            }
+        }
+        function useSuggestion(suggestion) {
+            document.getElementById('chat-input').value = suggestion;
+            document.getElementById('chat-send-btn').disabled = false;
+            sendChatMessage();
+        }
+        // Auto-refresh status every 30 seconds
+        setInterval(loadStatus, 30000);
+    </script>
+</body>
+</html>

translation_service.py ADDED Viewed

	@@ -0,0 +1,702 @@

+#!/usr/bin/env python3
+"""
+Translation Service for Sinhala-English Translation
+Handles translation of queries and responses with multiple free alternatives
+"""
+import requests
+import json
+import re
+import openai
+from typing import Dict, Any, Optional
+from config import Config
+from logger import get_logger
+class TranslationService:
+    def __init__(self):
+        self.config = Config()
+        self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
+        self.logger = get_logger(self.__class__.__name__)
+        # Controls
+        import os
+        self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
+        self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
+        self.last_translation_method: Optional[str] = None
+        # Free translation APIs
+        self.libre_translate_url = "https://libretranslate.de/translate"  # Free public instance
+        self.mymemory_url = "https://api.mymemory.translated.net/get"
+        # Common transport terms in Sinhala and their English equivalents
+        self.transport_terms = {
+            # Fare related
+            'කීයද': 'how much',
+            'මිල': 'price',
+            'වාරික': 'fare',
+            'වාරිකය': 'fare',
+            'වාරිකව': 'fare',
+            'ගාස්තු': 'fare',
+            'ගාස්තුව': 'fare',
+            'ප්‍රවාහන ගාස්තු': 'transport fare',
+            'බස් ගාස්තු': 'bus fare',
+            'බස් ගාස්තුව': 'bus fare',
+            'රේල් ගාස්තු': 'train fare',
+            'රේල් ගාස්තුව': 'train fare',
+            # Locations
+            'කොළඹ': 'Colombo',
+            'මහනුවර': 'Kandy',
+            'මහනුවරට': 'Kandy',
+            'ගාල්ල': 'Galle',
+            'ගාල්ලට': 'Galle',
+            'මාතර': 'Matara',
+            'මාතරට': 'Matara',
+            'අනුරාධපුර': 'Anuradhapura',
+            'අනුරාධපුරට': 'Anuradhapura',
+            'පානදුර': 'Panadura',
+            'පානදුරට': 'Panadura',
+            'අලුත්ගම': 'Aluthgama',
+            'අලුත්ගමට': 'Aluthgama',
+            'නුගේගොඩ': 'Nugegoda',
+            'නුගේගොඩට': 'Nugegoda',
+            'දෙහිවල': 'Dehiwala',
+            'දෙහිවලට': 'Dehiwala',
+            'මොරටුව': 'Moratuwa',
+            'මොරටුවට': 'Moratuwa',
+            # Direction words
+            'වලින්': 'from',
+            'වල': 'from',
+            'ට': 'to',
+            'වෙත': 'to',
+            'සිට': 'from',
+            'දක්වා': 'to',
+            'සි': 'from',
+            # Question words
+            'කොහෙද': 'where',
+            'කවදාද': 'when',
+            'කොහොමද': 'how',
+            'මොනවාද': 'what',
+            'කවුද': 'who',
+            # Comparison words
+            'සමඟ': 'with',
+            'සහ': 'and',
+            'හෝ': 'or',
+            'වඩා': 'more',
+            'අඩු': 'less',
+            'සමාන': 'same',
+            'වෙනස': 'different',
+            'සසඳන්න': 'compare',
+            'සසඳන': 'compare',
+            # Time words
+            'දැන්': 'now',
+            'අද': 'today',
+            'හෙට': 'tomorrow',
+            'ඊයේ': 'yesterday',
+            # Common verbs
+            'යන්න': 'go',
+            'යන': 'go',
+            'එන්න': 'come',
+            'බලන්න': 'see',
+            'දැනගන්න': 'know',
+            'සොයන්න': 'find',
+            'සොයන': 'find',
+            'ඉගෙනගන්න': 'learn',
+            'නිර්දේශ': 'recommend',
+            'නිර්දේශ කරන්න': 'recommend',
+            'පෙන්වන්න': 'show',
+            'පෙන්වන': 'show',
+            # Numbers and currency
+            'රුපියල්': 'rupees',
+            'රු': 'rupees',
+            'රුපියල': 'rupees',
+            # Common phrases
+            'අතර': 'between',
+            'සහිත': 'with',
+            'මාර්ග': 'routes',
+            'මාර්ගවල': 'routes',
+            'ගමනාන්ත': 'destinations',
+            'ප්‍රසිද්ධ': 'popular',
+            'සාමාන්‍ය': 'average',
+            'සාමාන්‍යය': 'average',
+            'දත්ත': 'data',
+            'සංඛ්‍යාලේ���න': 'statistics'
+        }
+        # Sinhala script detection pattern
+        self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
+    def is_sinhala_text(self, text: str) -> bool:
+        """Check if text contains Sinhala characters"""
+        detected = bool(self.sinhala_pattern.search(text))
+        self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
+        return detected
+    def _map_sinhala_place(self, text: str) -> str:
+        """Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
+        candidate = text.strip()
+        # Direct map
+        if candidate in self.transport_terms:
+            return self.transport_terms[candidate]
+        # Strip common Sinhala case particles/suffixes and try again
+        base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
+        if base in self.transport_terms:
+            return self.transport_terms[base]
+        return candidate
+    def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
+        """Detect simple Sinhala fare queries and build a clean English query.
+        Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
+        """
+        try:
+            # Quick check for fare-related tokens to avoid false positives
+            if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
+                return None
+            # Extract source and destination around Sinhala "from" and "to" particles
+            m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
+            if not m:
+                return None
+            src_si = m.group(1).strip()
+            dst_si = m.group(2).strip()
+            src_en = self._map_sinhala_place(src_si)
+            dst_en = self._map_sinhala_place(dst_si)
+            return f"What is the fare from {src_en} to {dst_en}?"
+        except Exception:
+            return None
+    def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
+        """Translate using OpenAI LLM (new SDK). Preserve original intent (comparison, lists, conjunctions)."""
+        if not self.openai_api_key:
+            return None
+        try:
+            # Determine source language
+            if source_lang == 'auto':
+                source_lang = 'si' if self.is_sinhala_text(text) else 'en'
+            # Create language mapping
+            lang_map = {
+                ('si', 'en'): 'Sinhala to English',
+                ('en', 'si'): 'English to Sinhala'
+            }
+            direction = lang_map.get((source_lang, target_lang))
+            if not direction:
+                return None
+            prompt = f"""
+            Translate the following text from {direction}.
+            Output only the translated text without quotes or extra commentary.
+            Critically: Preserve the original intent and structure. Do not simplify.
+            - If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
+            - Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
+            - Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
+            Use standard English city names:
+            - මහනුවර = Kandy (not Mahanuwara)
+            - කොළඹ = Colombo
+            - ගාල්ල = Galle
+            - මාතර = Matara
+            - අනුරාධපුර = Anuradhapura
+            Text to translate: {text}
+            """
+            # Build few-shot examples to preserve comparison/imperative structure
+            examples = [
+                (
+                    "කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
+                    "What is the bus fare from Colombo to Kandy?"
+                ),
+                (
+                    "කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
+                    "What is the ticket price from Colombo to Galle?"
+                ),
+                (
+                    "කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.",
+                    "Compare fares from Colombo to Panadura and from Colombo to Galle."
+                ),
+                (
+                    "රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
+                    "Show routes with fares under 500 rupees."
+                ),
+                (
+                    "අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
+                    "Recommend cheap routes."
+                ),
+            ]
+            # Compose messages with few-shot conditioning
+            def build_messages(txt: str):
+                msgs = [
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are a professional translator. Translate accurately and naturally. "
+                            "Preserve imperative/comparative intent and list structure. Do not paraphrase. "
+                            "Return only the English translation without quotes. "
+                            "Canonical phrasing rules (use exactly): \n"
+                            "- Use 'Compare' for comparison requests.\n"
+                            "- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
+                            "- Use 'How much is the' for 'කීයද' fare/price questions.\n"
+                            "- Use 'cheap' (not 'affordable').\n"
+                            "- Use 'under' (not 'below') for '< value'.\n"
+                        ),
+                    },
+                    {
+                        "role": "user",
+                        "content": (
+                            "Instructions: Preserve structure. Use 'Compare' for 'සසඳ', use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
+                            "Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura."
+                        ),
+                    },
+                ]
+                for si, en in examples:
+                    msgs.append({"role": "user", "content": f"Sinhala: {si}\nEnglish:"})
+                    msgs.append({"role": "assistant", "content": en})
+                msgs.append({"role": "user", "content": f"Sinhala: {txt}\nEnglish:"})
+                return msgs
+            # Use new OpenAI SDK
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.openai_api_key)
+                response = client.chat.completions.create(
+                    model="gpt-3.5-turbo",
+                    max_tokens=150,
+                    temperature=0.3,
+                    messages=build_messages(text)
+                )
+                translated = response.choices[0].message.content.strip()
+                self.last_translation_method = 'llm'
+            except Exception as sdk_err:
+                # Fallback to legacy API if available
+                import openai
+                try:
+                    openai.api_key = self.openai_api_key
+                    response = openai.ChatCompletion.create(
+                        model="gpt-3.5-turbo",
+                        max_tokens=150,
+                        temperature=0.3,
+                        messages=build_messages(text)
+                    )
+                    translated = response.choices[0].message.content.strip()
+                    self.last_translation_method = 'llm'
+                except Exception:
+                    raise sdk_err
+            if translated.startswith('"') and translated.endswith('"'):
+                translated = translated[1:-1]
+            return translated if translated else None
+        except Exception as e:
+            self.logger.warning(f"LLM translation error: {e}")
+            return None
+    def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
+        """Translate using LibreTranslate (free public API)"""
+        try:
+            # Map language codes
+            lang_map = {
+                'si': 'si',  # Sinhala
+                'en': 'en',  # English
+                'auto': 'auto'
+            }
+            source = lang_map.get(source_lang, 'auto')
+            target = lang_map.get(target_lang, 'en')
+            payload = {
+                'q': text,
+                'source': source,
+                'target': target,
+                'format': 'text'
+            }
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            response = requests.post(
+                self.libre_translate_url,
+                json=payload,
+                headers=headers,
+                timeout=10
+            )
+            if response.status_code == 200:
+                result = response.json()
+                translated = result.get('translatedText')
+                self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
+                self.last_translation_method = 'libretranslate'
+                return translated
+            return None
+        except Exception as e:
+            self.logger.warning(f"LibreTranslate error: {e}")
+            return None
+    def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
+        """Translate using MyMemory (free API)"""
+        try:
+            # Map language codes
+            lang_map = {
+                'si': 'si',  # Sinhala
+                'en': 'en',  # English
+                'auto': 'auto'
+            }
+            source = lang_map.get(source_lang, 'auto')
+            langpair = f"{source}|{target_lang}"
+            params = {
+                'q': text,
+                'langpair': langpair
+            }
+            response = requests.get(
+                self.mymemory_url,
+                params=params,
+                timeout=10
+            )
+            if response.status_code == 200:
+                result = response.json()
+                translated = result.get('responseData', {}).get('translatedText')
+                self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
+                self.last_translation_method = 'mymemory'
+                return translated
+            return None
+        except Exception as e:
+            self.logger.warning(f"MyMemory translation error: {e}")
+            return None
+    def translate_with_dictionary(self, text: str, target_lang: str) -> str:
+        """Translate using dictionary-based approach"""
+        if target_lang == 'en':
+            # Sinhala to English
+            translated = text
+            for sinhala, english in self.transport_terms.items():
+                translated = translated.replace(sinhala, english)
+            return translated
+        elif target_lang == 'si':
+            # English to Sinhala
+            translated = text
+            for sinhala, english in self.transport_terms.items():
+                translated = translated.replace(english, sinhala)
+            return translated
+        return text
+    def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
+        """Main translation method with multiple fallbacks"""
+        if not text or not text.strip():
+            return text
+        # Try translation methods
+        if self.force_llm_translation:
+            translation_methods = [
+                ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
+            ]
+        else:
+            translation_methods = [
+                ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
+                ('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
+                ('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
+                ('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
+            ]
+        for method_name, method_func in translation_methods:
+            try:
+                result = method_func()
+                if result and result.strip():
+                    self.logger.info(f"Translation successful using {method_name}")
+                    if not self.last_translation_method:
+                        self.last_translation_method = method_name.lower()
+                    return result.strip()
+            except Exception as e:
+                self.logger.warning(f"{method_name} translation failed: {e}")
+                continue
+        # Final fallback
+        result = self.translate_with_dictionary(text, target_lang)
+        self.last_translation_method = 'dictionary'
+        return result
+    def translate_query(self, query: str) -> Dict[str, Any]:
+        """Translate a user query from Sinhala to English"""
+        if not self.is_sinhala_text(query):
+            return {
+                'is_sinhala': False,
+                'original_query': query,
+                'translated_query': query,
+                'translation_method': 'none'
+            }
+        # Optional: Sinhala-specific fare parsing (disabled by default unless USE_PATTERN_TRANSLATION=true)
+        if self.use_pattern_translation:
+            parsed = self._parse_sinhala_fare_query(query)
+            if parsed:
+                self.logger.info(f"Pattern-based Sinhala fare parse: '{query}' -> '{parsed}'")
+                return {
+                    'is_sinhala': True,
+                    'original_query': query,
+                    'translated_query': parsed,
+                    'translation_method': 'pattern'
+                }
+        # Fallback: general translation to English
+        translated = self.translate_text(query, 'en', 'si')
+        # Normalize English synonyms to expected NLP vocabulary
+        translated = self._normalize_english_query(translated)
+        method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
+        self.logger.info(f"Translated Sinhala query ({method}): '{query}' -> '{translated}'")
+        return {
+            'is_sinhala': True,
+            'original_query': query,
+            'translated_query': translated,
+            'translation_method': method
+        }
+    def _normalize_english_query(self, text: str) -> str:
+        """Normalize English synonyms to match NLP patterns (fare/price/cost)."""
+        if not text:
+            return text
+        normalized = text
+        replacements = {
+            'fees': 'fare',
+            'fee': 'fare',
+            'charges': 'cost',
+            'charge': 'cost',
+            'ticket price': 'fare',
+            'ticket fare': 'fare',
+            'bus ticket': 'bus fare',
+        }
+        # Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
+        lower = normalized.lower()
+        for old, new in replacements.items():
+            lower = lower.replace(old, new)
+        return lower
+    def translate_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
+        """Translate response back to Sinhala"""
+        translated_response = response.copy()
+        # Translate the main message
+        if 'message' in response:
+            translated_response['message'] = self.translate_text(
+                response['message'], 'si', 'en'
+            )
+        # Translate suggestions if any
+        if 'suggestions' in response and response['suggestions']:
+            translated_response['suggestions'] = [
+                self.translate_text(suggestion, 'si', 'en')
+                for suggestion in response['suggestions']
+            ]
+        # Translate corrections if any
+        if 'corrections' in response and response['corrections']:
+            translated_corrections = []
+            for correction in response['corrections']:
+                translated_correction = correction.copy()
+                if 'original' in correction:
+                    translated_correction['original'] = self.translate_text(
+                        correction['original'], 'si', 'en'
+                    )
+                if 'corrected' in correction:
+                    translated_correction['corrected'] = self.translate_text(
+                        correction['corrected'], 'si', 'en'
+                    )
+                translated_corrections.append(translated_correction)
+            translated_response['corrections'] = translated_corrections
+        # Add translation metadata
+        translated_response['translation_info'] = {
+            'translated': True,
+            'translation_method': 'llm' if self.openai_api_key else 'dictionary'
+        }
+        return translated_response
+    def get_sinhala_examples(self) -> Dict[str, Any]:
+        """Get example queries in Sinhala"""
+        sinhala_examples = {
+            'fare_queries': [
+                {
+                    'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
+                    'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
+                },
+                {
+                    'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
+                    'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
+                },
+                {
+                    'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
+                    'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
+                }
+            ],
+            'comparison_queries': [
+                {
+                    'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
+                    'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
+                },
+                {
+                    'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
+                    'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
+                }
+            ],
+            'range_queries': [
+                {
+                    'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
+                    'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
+                },
+                {
+                    'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
+                    'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
+                }
+            ],
+            'recommendation_queries': [
+                {
+                    'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
+                    'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
+                },
+                {
+                    'query': 'ප්‍රසිද්ධ ගමනාන්ත පෙන්වන්න',
+                    'description': 'ප්‍රසිද්ධ ගමනාන්ත සොයන්න'
+                }
+            ],
+            'statistical_queries': [
+                {
+                    'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
+                    'description': 'සාමාන්‍ය ගාස්තුව සොයන්න'
+                },
+                {
+                    'query': 'දත්ත ගබඩා සංඛ්‍යාලේඛන',
+                    'description': 'දත්ත ගබඩා සංඛ්‍යාලේඛන සොයන්න'
+                }
+            ]
+        }
+        return sinhala_examples
+    def test_translation(self) -> Dict[str, Any]:
+        """Test translation functionality on transportation-related Sinhala queries."""
+        test_cases = [
+            {
+                'sinhala': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
+                'expected_english': 'What is the bus fare from Colombo to Kandy?'
+            },
+            {
+                'sinhala': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
+                'expected_english': 'How much is the price from Matara to Galle?'
+            },
+            {
+                'sinhala': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
+                'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
+            },
+            {
+                'sinhala': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
+                'expected_english': 'Show routes with fares under 500 rupees.'
+            },
+            {
+                'sinhala': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
+                'expected_english': 'Recommend cheap routes.'
+            },
+            {
+                'sinhala': 'කොළඹ සිට යන මාර්ග මොනවාද?',
+                'expected_english': 'What routes depart from Colombo?'
+            },
+            {
+                'sinhala': 'සාමාන්‍ය ගාස්තුව කීයද?',
+                'expected_english': 'What is the average fare?'
+            },
+            {
+                'sinhala': 'කඩුවෙල සිට මාතර දක්වා සහ ගාල්ල දක්වා බස් ගාස්තු සසඳන්න.',
+                'expected_english': 'Compare bus fares from Kaduwela to Matara and to Galle.'
+            },
+            {
+                'sinhala': 'කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?',
+                'expected_english': 'What is the ticket price from Colombo to Galle?'
+            },
+            {
+                'sinhala': 'රුපියල් 1000 ට වැඩි ගාස්තු සහිත මාර්ග සදහන් කරන්න.',
+                'expected_english': 'List routes with fares over 1000 rupees.'
+            }
+        ]
+        results = []
+        total_exact = 0
+        total_good = 0
+        total_tests = len(test_cases)
+        for test_case in test_cases:
+            sinhala = test_case['sinhala']
+            expected = test_case['expected_english']
+            is_sinhala = self.is_sinhala_text(sinhala)
+            # Reset method tracker and translate
+            self.last_translation_method = None
+            translated = self.translate_text(sinhala, 'en', 'si') or ''
+            tr = translated.strip()
+            ex = expected.strip()
+            tr_low = tr.lower()
+            ex_low = ex.lower()
+            # Accuracy heuristic
+            if tr_low == ex_low:
+                accuracy = 'exact'
+                total_exact += 1
+                total_good += 1
+            elif tr_low in ex_low or ex_low in tr_low:
+                accuracy = 'good'
+                total_good += 1
+            else:
+                accuracy = 'partial'
+            # Intent preservation check for comparisons
+            intent_preserved = True
+            if 'ස��ඳ' in sinhala or 'සසඳා' in sinhala:
+                intent_preserved = ('compare' in tr_low)
+            results.append({
+                'sinhala_query': sinhala,
+                'is_sinhala_detected': is_sinhala,
+                'translated_english': tr,
+                'expected_english': ex,
+                'translation_accuracy': accuracy,
+                'intent_preserved': intent_preserved,
+                'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
+            })
+        summary = {
+            'total_tests': total_tests,
+            'exact_matches': total_exact,
+            'good_or_better': total_good,
+            'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
+        }
+        self.logger.info(f"Translation test summary: {summary}")
+        return {
+            'translation_service_status': 'active',
+            'available_methods': {
+                'llm': self.openai_api_key is not None,
+                'libre_translate': True,
+                'mymemory': True,
+                'dictionary': True
+            },
+            'summary': summary,
+            'test_results': results
+        }