Spaces:

TuanMinhajSeedin
/

Transport

Sleeping

App Files Files Community

TuanMinhajSeedin commited on Sep 8

Commit

eaa41b7

verified ·

1 Parent(s): e19f1a0

Upload 10 files

Browse files

Files changed (9) hide show

.gitignore +151 -175
app.py +1215 -974
enhanced_nlp_processor.py +904 -904
language_detector.py +251 -0
llm_query_processor.py +384 -351
logger.py +61 -61
neo4j_service.py +222 -222
spell_corrector.py +257 -257
translation_service.py +1057 -702

.gitignore CHANGED Viewed

@@ -1,175 +1,151 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-config.py
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-# Ruff stuff:
-.ruff_cache/
-# PyPI configuration file
-.pypirc

+# Environment variables
+.env
+.env.local
+.env.*.local
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Logs
+logs/
+*.log
+# Database
+*.db
+*.sqlite
+# Temporary files
+*.tmp
+*.temp

app.py CHANGED Viewed

@@ -1,974 +1,1215 @@
-#!/usr/bin/env python3
-"""
-Main Flask Application for Transport Query System
-"""
-from flask import Flask, render_template, request, jsonify, session
-import os
-from llm_query_processor import LLMQueryProcessor
-from enhanced_nlp_processor import EnhancedNLPProcessor
-from spell_corrector import SpellCorrector
-from neo4j_service import Neo4jService
-from translation_service import TranslationService
-from logger import get_logger
-from config import Config
-app = Flask(__name__)
-app.config.from_object(Config)
-logger = get_logger("FlaskApp")
-# Initialize services
-query_processor = LLMQueryProcessor()
-enhanced_nlp_processor = EnhancedNLPProcessor()
-spell_corrector = SpellCorrector()
-neo4j_service = Neo4jService()
-translation_service = TranslationService()
-@app.route('/')
-def index():
-    """Main page"""
-    return render_template('index.html')
-@app.route('/api/query', methods=['POST'])
-def process_query():
-    """Process user query with enhanced NLP and translation support"""
-    try:
-        data = request.get_json()
-        user_query = data.get('query', '').strip()
-        use_enhanced_nlp = data.get('enhanced_nlp', True)  # Default to enhanced NLP
-        if not user_query:
-            return jsonify({
-                'success': False,
-                'message': 'Please enter a query.'
-            })
-        # Check if query is in Sinhala and translate if needed
-        translation_info = translation_service.translate_query(user_query)
-        # Use translated query for processing
-        query_to_process = translation_info['translated_query']
-        # Log translation info to console
-        if translation_info['is_sinhala']:
-            logger.info(f"Translation: si->en method={translation_info['translation_method']} original='{translation_info['original_query']}' translated='{translation_info['translated_query']}'")
-        else:
-            logger.info(f"Processing English Query: '{user_query}'")
-        # Process the query with enhanced NLP or fallback to basic processor
-        if use_enhanced_nlp:
-            result = enhanced_nlp_processor.process_query(query_to_process)
-        else:
-            result = query_processor.process_query(query_to_process)
-        # If original query was in Sinhala, translate the response back
-        if translation_info['is_sinhala']:
-            print(f"   English Response: {result.get('message', 'No message')}")
-            result = translation_service.translate_response(result)
-            result['translation_info'] = translation_info
-            print(f"   Sinhala Response: {result.get('message', 'No message')}")
-            print(f"   Translation Complete ✅")
-        logger.info(f"Response success={result.get('success')} type={result.get('query_type','n/a')} message='{result.get('message','')[:120]}'")
-        return jsonify(result)
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error processing query: {str(e)}'
-        })
-@app.route('/api/suggestions', methods=['POST'])
-def get_suggestions():
-    """Get location suggestions for autocomplete"""
-    try:
-        data = request.get_json()
-        partial_location = data.get('location', '').strip()
-        if not partial_location:
-            return jsonify({'suggestions': []})
-        suggestions = spell_corrector.get_suggestions(partial_location)
-        return jsonify({
-            'suggestions': [{'name': name, 'confidence': conf} for name, conf in suggestions]
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error getting suggestions: {str(e)}'
-        })
-@app.route('/api/status')
-def get_status():
-    """Get system status"""
-    try:
-        neo4j_connected = neo4j_service.is_connected()
-        places = neo4j_service.get_all_places() if neo4j_connected else []
-        stats = neo4j_service.get_route_statistics() if neo4j_connected else {}
-        return jsonify({
-            'neo4j_connected': neo4j_connected,
-            'total_places': len(places),
-            'statistics': stats
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error getting status: {str(e)}'
-        })
-@app.route('/api/places')
-def get_places():
-    """Get all available places"""
-    try:
-        places = neo4j_service.get_all_places()
-        return jsonify({
-            'success': True,
-            'places': places
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error getting places: {str(e)}'
-        })
-@app.route('/api/sinhala/examples')
-def get_sinhala_examples():
-    """Get example queries in Sinhala"""
-    try:
-        sinhala_examples = translation_service.get_sinhala_examples()
-        return jsonify({
-            'success': True,
-            'examples': sinhala_examples
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error getting Sinhala examples: {str(e)}'
-        })
-@app.route('/api/translation/test')
-def test_translation():
-    """Test translation functionality"""
-    try:
-        test_results = translation_service.test_translation()
-        return jsonify({
-            'success': True,
-            'test_results': test_results
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error testing translation: {str(e)}'
-        })
-@app.route('/api/translation/translate', methods=['POST'])
-def translate_text():
-    """Translate text between Sinhala and English"""
-    try:
-        data = request.get_json()
-        text = data.get('text', '').strip()
-        target_lang = data.get('target_lang', 'en')  # 'en' or 'si'
-        source_lang = data.get('source_lang', 'auto')
-        if not text:
-            return jsonify({
-                'success': False,
-                'message': 'Please provide text to translate.'
-            })
-        translated_text = translation_service.translate_text(text, target_lang, source_lang)
-        is_sinhala = translation_service.is_sinhala_text(text)
-        return jsonify({
-            'success': True,
-            'original_text': text,
-            'translated_text': translated_text,
-            'source_language': 'si' if is_sinhala else 'en',
-            'target_language': target_lang,
-            'translation_method': 'google' if translation_service.google_translate_api_key else 'dictionary'
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error translating text: {str(e)}'
-        })
-@app.route('/api/nlp/capabilities')
-def get_nlp_capabilities():
-    """Get information about natural language processing capabilities with live examples"""
-    # Test queries for each type to demonstrate actual results
-    test_queries = [
-        {
-            'type': 'fare_inquiry',
-            'description': 'Find fare between two specific locations',
-            'examples': [
-                'What is the fare from Colombo to Kandy?',
-                'fare of anuradhapura to kandy',
-                'price from panadura to galle',
-                'Colombo to Kandy fare'
-            ]
-        },
-        {
-            'type': 'comparison',
-            'description': 'Compare fares between different routes',
-            'examples': [
-                'Compare fares from Colombo to Kandy vs Colombo to Galle',
-                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
-                'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
-            ]
-        },
-        {
-            'type': 'range_search',
-            'description': 'Find routes within specific price ranges',
-            'examples': [
-                'Find routes under 500 rupees',
-                'Show me routes between 200 and 800 rupees',
-                'Routes over 1000 rupees'
-            ]
-        },
-        {
-            'type': 'recommendation',
-            'description': 'Get route recommendations based on criteria',
-            'examples': [
-                'Recommend cheap routes',
-                'Show me popular destinations',
-                'What are the best routes from Colombo?'
-            ]
-        },
-        {
-            'type': 'route_inquiry',
-            'description': 'Find routes from/to specific locations',
-            'examples': [
-                'Routes from Colombo',
-                'Routes to Galle',
-                'What routes depart from Kandy?'
-            ]
-        },
-        {
-            'type': 'statistics',
-            'description': 'Get database overview and statistics',
-            'examples': [
-                'What is the average fare?',
-                'Database statistics',
-                'How many routes are there?'
-            ]
-        }
-    ]
-    # Process each test query to get actual results
-    live_examples = []
-    for query_type in test_queries:
-        type_examples = []
-        for example_query in query_type['examples'][:2]:  # Test first 2 examples
-            try:
-                result = enhanced_nlp_processor.process_query(example_query)
-                type_examples.append({
-                    'query': example_query,
-                    'result': result
-                })
-            except Exception as e:
-                type_examples.append({
-                    'query': example_query,
-                    'result': {
-                        'success': False,
-                        'message': f'Error: {str(e)}'
-                    }
-                })
-        live_examples.append({
-            'type': query_type['type'],
-            'description': query_type['description'],
-            'examples': type_examples
-        })
-    capabilities = {
-        'natural_language_processing': {
-            'description': 'Advanced NLP for transport queries with enhanced understanding',
-            'features': [
-                'Multiple query formats (fare, price, cost)',
-                'Natural language patterns (from X to Y, X to Y fare, etc.)',
-                'Question formats (What is, How much, Show me, etc.)',
-                'Compact formats (Colombo to Kandy fare)',
-                'Spell correction and fuzzy matching',
-                'Automatic location name correction',
-                'LLM-powered query interpretation',
-                'Fallback keyword-based processing',
-                'Advanced intent classification',
-                'Entity extraction and normalization',
-                'Confidence scoring for query understanding'
-            ]
-        },
-        'query_types': test_queries,
-        'live_examples': live_examples,
-        'spell_correction': {
-            'description': 'Automatic location name correction',
-            'methods': [
-                'Direct mapping (exact matches)',
-                'Fuzzy matching (similar names)',
-                'LLM correction (AI-powered)',
-                'Partial matching (substring matching)'
-            ],
-            'examples': [
-                'panadra → Panadura',
-                'gale → Galle',
-                'colmbo → Colombo',
-                'kandee → Kandy'
-            ]
-        },
-        'llm_integration': {
-            'description': 'AI-powered query interpretation with LLM Cypher generation',
-            'features': [
-                'Automatic query type detection',
-                'LLM-powered Cypher query generation',
-                'Natural language understanding',
-                'Fallback to keyword-based processing',
-                'Advanced entity extraction',
-                'Intent classification with confidence scoring',
-                'Real-time database querying'
-            ]
-        },
-        'enhanced_features': {
-            'description': 'Advanced NLP capabilities',
-            'features': [
-                'Multi-intent query understanding',
-                'Context-aware responses',
-                'Query preprocessing and normalization',
-                'Advanced pattern matching',
-                'Confidence-based result ranking',
-                'Comprehensive query analysis',
-                'Live database results for all query types'
-            ]
-        }
-    }
-    return jsonify({
-        'success': True,
-        'capabilities': capabilities
-    })
-@app.route('/api/nlp/test', methods=['POST'])
-def test_nlp_query():
-    """Test a natural language query and return detailed analysis"""
-    try:
-        data = request.get_json()
-        user_query = data.get('query', '').strip()
-        use_enhanced_nlp = data.get('enhanced_nlp', True)
-        if not user_query:
-            return jsonify({
-                'success': False,
-                'message': 'Please provide a query to test.'
-            })
-        # Get detailed analysis
-        analysis = {
-            'original_query': user_query,
-            'processing_steps': []
-        }
-        # Step 1: Extract locations
-        locations = spell_corrector.extract_locations_from_query(user_query)
-        analysis['processing_steps'].append({
-            'step': 'Location Extraction',
-            'locations_found': len(locations),
-            'details': [
-                {
-                    'original': loc[0],
-                    'corrected': loc[1],
-                    'confidence': loc[2],
-                    'method': loc[3]
-                } for loc in locations
-            ]
-        })
-        # Step 2: Process query with enhanced NLP
-        if use_enhanced_nlp:
-            result = enhanced_nlp_processor.process_query(user_query)
-            analysis['processing_steps'].append({
-                'step': 'Enhanced NLP Processing',
-                'success': result.get('success', False),
-                'query_type': result.get('query_type', 'unknown'),
-                'message': result.get('message', ''),
-                'confidence': result.get('query_analysis', {}).get('confidence', 0),
-                'intent': result.get('query_analysis', {}).get('intent', {}),
-                'entities': result.get('query_analysis', {}).get('entities', {})
-            })
-        else:
-            result = query_processor.process_query(user_query)
-            analysis['processing_steps'].append({
-                'step': 'Basic Query Processing',
-                'success': result.get('success', False),
-                'query_type': result.get('query_type', 'unknown'),
-                'message': result.get('message', ''),
-                'cypher_query': result.get('cypher_query', ''),
-                'corrections': result.get('corrections', [])
-            })
-        # Step 3: Results
-        if result.get('success') and result.get('data'):
-            analysis['processing_steps'].append({
-                'step': 'Database Results',
-                'results_count': len(result['data']),
-                'sample_results': result['data'][:3]  # Show first 3 results
-            })
-        return jsonify({
-            'success': True,
-            'analysis': analysis,
-            'result': result
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error testing NLP query: {str(e)}'
-        })
-@app.route('/api/nlp/demo')
-def get_nlp_demo():
-    """Get a comprehensive demo of natural language capabilities"""
-    demo_queries = [
-        {
-            'category': 'Basic Fare Queries',
-            'queries': [
-                'What is the fare from Colombo to Kandy?',
-                'fare of anuradhapura to kandy',
-                'price from panadura to galle',
-                'Colombo to Kandy fare'
-            ]
-        },
-        {
-            'category': 'Comparison Queries',
-            'queries': [
-                'Compare fares from Colombo to Kandy vs Colombo to Galle',
-                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
-                'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
-            ]
-        },
-        {
-            'category': 'Range Search Queries',
-            'queries': [
-                'Find routes under 500 rupees',
-                'Show me routes between 200 and 800 rupees',
-                'Routes over 1000 rupees'
-            ]
-        },
-        {
-            'category': 'Recommendation Queries',
-            'queries': [
-                'Recommend cheap routes',
-                'Show me popular destinations',
-                'What are the best routes from Colombo?'
-            ]
-        },
-        {
-            'category': 'Statistical Queries',
-            'queries': [
-                'What is the average fare?',
-                'Database statistics',
-                'How many routes are there?'
-            ]
-        },
-        {
-            'category': 'Route Queries',
-            'queries': [
-                'Show me the cheapest routes',
-                'Routes from Colombo',
-                'Routes to Galle',
-                'What routes depart from Kandy?'
-            ]
-        },
-        {
-            'category': 'Spell Correction Tests',
-            'queries': [
-                'price from panadra to gale',
-                'fare of colmbo to kandee',
-                'cost from anuradapura to kandy'
-            ]
-        }
-    ]
-    return jsonify({
-        'success': True,
-        'demo': {
-            'title': 'Enhanced Natural Language Transport Query Demo',
-            'description': 'Advanced NLP capabilities with comparison, range search, and recommendations',
-            'categories': demo_queries
-        }
-    })
-@app.route('/api/examples')
-def get_examples():
-    """Get comprehensive example queries showcasing natural language capabilities"""
-    examples = [
-        # === FARE QUERIES (Various Natural Language Formats) ===
-        {
-            'category': 'Fare Queries',
-            'examples': [
-                {
-                    # 'query': 'What is the fare from Colombo to Kandy?',
-                    'query': 'කොළඹ සිට මහනුවරට ගාස්තුව කීයද?',
-                    'description': 'Standard fare query format'
-                },
-                {
-                    'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
-                    'description': 'Alternative way to ask for fare'
-                },
-                {
-                    'query': 'අනුරාධපුර සිට මහනුවර දක්වා ගාස්තුව',
-                    'description': 'Natural language format'
-                },
-                {
-                    # 'query': 'price from panadura to galle',
-                    'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
-                    'description': 'Using "price" instead of "fare"'
-                },
-                {
-                    # 'query': 'Colombo to nuwara eliya fare',
-                    'query': 'බදුල්ල සිට කොළඹට ගාස්තුව කීයද?',
-                    'description': 'Compact format'
-                },
-                {
-                    # 'query': 'How much is the fare from matara to kandy?',
-                    'query': 'මහනුවර සිට මාතරට ගාස්තුව කීයද?',
-                    'description': 'Question format'
-                }
-            ]
-        },
-        # === COMPARISON QUERIES ===
-        {
-            'category': 'Comparison Queries',
-            'examples': [
-                {
-                    # 'query': 'Compare fares from Colombo to Kandy vs Colombo to Galle',
-                    'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සංසන්දනය කරන්න.',
-                    'description': 'Compare two different routes'
-                },
-                {
-                    # 'query': 'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
-                    'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට අනුරාධපුර දක්වා ලාභදායී වන්නේ කුමක්ද?',
-                    'description': 'Find the cheaper option'
-                },
-                {
-                    # 'query': 'What is the difference in fare between Panadura to Galle and Panadura to Matara?',
-                    'query': 'පානදුර සිට ගාල්ල දක්වා සහ පානදුර සිට මාතර දක්වා ගාස්තුවේ වෙනස කීයද?',
-                    'description': 'Calculate fare difference'
-                }
-            ]
-        },
-        # === RANGE SEARCH QUERIES ===
-        {
-            'category': 'Range Search Queries',
-            'examples': [
-                {
-                    # 'query': 'Find routes under 500 rupees',
-                    'query': 'රුපියල් 500ට අඩු මාර්ග සොයා ගන්න',
-                    'description': 'Find affordable routes'
-                },
-                {
-                    # 'query': 'Show me routes between 200 and 800 rupees',
-                    'query': 'රුපියල් 200 සහ 800 අතර මාර්ග සොයා ගන්න',
-                    'description': 'Find routes in price range'
-                },
-                {
-                    # 'query': 'Routes over 1000 rupees',
-                    'query': 'රුපියල් 1000ට ඉහළ මාර්ග සොයා ගන්න',
-                    'description': 'Find expensive routes'
-                }
-            ]
-        },
-        # === RECOMMENDATION QUERIES ===
-        {
-            'category': 'Recommendation Queries',
-            'examples': [
-                {
-                    # 'query': 'Recommend cheap routes',
-                    'query': 'ලාභ මාර්ග නිර්දේශ කරන්න',
-                    'description': 'Get budget-friendly recommendations'
-                },
-                {
-                    # 'query': 'Show me popular destinations',
-                    'query': 'මට ජනප්‍රිය ගමනාන්ත පෙන්වන්න',
-                    'description': 'Find frequently traveled routes'
-                },
-                {
-                    # 'query': 'What are the best routes from Colombo?',
-                    'query': 'කොළඹ සිට යාමට හොඳම මාර්ග මොනවාද?',
-                    'description': 'Get optimal route suggestions'
-                }
-            ]
-        },
-        # === STATISTICAL QUERIES ===
-        {
-            'category': 'Statistical Queries',
-            'examples': [
-                {
-                    # 'query': 'What is the average fare?',
-                    'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
-                    'description': 'Get average fare statistics'
-                },
-                {
-                    # 'query': 'Database statistics',
-                    'query': 'දත්ත සමුදා සංඛ්යා ලේඛන',
-                    'description': 'Get comprehensive database overview'
-                },
-                {
-                    'query': 'මාර්ග කීයක් තිබේද?',
-                    'description': 'Count total routes'
-                }
-            ]
-        },
-        # === ROUTE QUERIES ===
-        {
-            'category': 'Route Queries',
-            'examples': [
-                {
-                    # 'query': 'Show me the cheapest routes',
-                    'query': 'මට ලාභදායී  මාර්ග 10ක්  පෙන්වන්න',
-                    'description': 'Find top 10 cheapest routes'
-                },
-                {
-                    # 'query': 'Routes from Colombo',
-                    'query': 'කොළඹ සිට යාමට මාර්ග මොනවාද?',
-                    'description': 'Find all routes departing from a location'
-                },
-                {
-                    # 'query': 'Routes to Galle',
-                    'query': 'ගාල්ල යාමට මාර්ග මොනවාද?',
-                    'description': 'Find all routes going to a location'
-                },
-                {
-                    # 'query': 'What routes depart from Kandy?',
-                    'query': 'මහනුවර සිට යාමට මාර්ග මොනවාද?',
-                    'description': 'Question format for routes'
-                }
-            ]
-        },
-        # === SPELLING ERROR EXAMPLES ===
-        {
-            'category': 'Spell Correction Examples',
-            'examples': [
-                {
-                    # 'query': 'price from panadra to gale',
-                    'query': 'පාන්දුරේ ඉඳන් ගාල්ල්ට කීයක් යනවද?',
-                    'description': 'Test spell correction (Panadura, Galle)'
-                },
-                {
-                    # 'query': 'fare of colmbo to kandee',
-                    'query': 'කොළ්බ්හ  සිට මහනුවර්ට ගාස්තුව කීයද?',
-                    'description': 'Test spell correction (Colombo, Kandy)'
-                },
-                {
-                    # 'query': 'cost from anuradapura to kandy',
-                    'query': 'අනුරපුර සිට මහනුවර්රට ගාස්තුව කීයද?',
-                    'description': 'Natural format with correct spelling'
-                }
-            ]
-        }
-    ]
-    return jsonify({
-        'success': True,
-        'examples': examples
-    })
-@app.route('/api/nlp/advanced', methods=['POST'])
-def advanced_nlp_query():
-    """Advanced NLP query processing with detailed analysis"""
-    try:
-        data = request.get_json()
-        user_query = data.get('query', '').strip()
-        if not user_query:
-            return jsonify({
-                'success': False,
-                'message': 'Please provide a query to process.'
-            })
-        # Process with enhanced NLP
-        result = enhanced_nlp_processor.process_query(user_query)
-        return jsonify(result)
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error processing advanced NLP query: {str(e)}'
-        })
-@app.route('/api/nlp/compare', methods=['POST'])
-def compare_routes():
-    """Compare multiple routes"""
-    try:
-        data = request.get_json()
-        routes = data.get('routes', [])
-        if len(routes) < 2:
-            return jsonify({
-                'success': False,
-                'message': 'Please provide at least 2 routes to compare.'
-            })
-        # Build comparison query
-        comparison_query = "MATCH "
-        for i, route in enumerate(routes):
-            from_loc = route.get('from')
-            to_loc = route.get('to')
-            if from_loc and to_loc:
-                if i > 0:
-                    comparison_query += ", "
-                comparison_query += f"(a{i}:Place {{name: '{from_loc}'}})-[r{i}:Fare]->(b{i}:Place {{name: '{to_loc}'}})"
-        comparison_query += " RETURN "
-        for i, route in enumerate(routes):
-            if i > 0:
-                comparison_query += ", "
-            comparison_query += f"a{i}.name + ' to ' + b{i}.name as route{i+1}, r{i}.fare as fare{i+1}"
-        # Execute query
-        with neo4j_service.driver.session() as session:
-            result = session.run(comparison_query)
-            results = [dict(record) for record in result]
-        return jsonify({
-            'success': True,
-            'data': results,
-            'message': f'Comparison of {len(routes)} routes completed'
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error comparing routes: {str(e)}'
-        })
-@app.route('/api/nlp/range', methods=['POST'])
-def search_by_range():
-    """Search routes by price range"""
-    try:
-        data = request.get_json()
-        min_price = data.get('min_price')
-        max_price = data.get('max_price')
-        if min_price is None and max_price is None:
-            return jsonify({
-                'success': False,
-                'message': 'Please provide min_price or max_price or both.'
-            })
-        # Build range query
-        range_query = "MATCH (a:Place)-[r:Fare]->(b:Place) WHERE "
-        conditions = []
-        if min_price is not None:
-            conditions.append(f"r.fare >= {min_price}")
-        if max_price is not None:
-            conditions.append(f"r.fare <= {max_price}")
-        range_query += " AND ".join(conditions)
-        range_query += " RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare"
-        # Execute query
-        with neo4j_service.driver.session() as session:
-            result = session.run(range_query)
-            results = [dict(record) for record in result]
-        return jsonify({
-            'success': True,
-            'data': results,
-            'message': f'Found {len(results)} routes in the specified range'
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error searching by range: {str(e)}'
-        })
-@app.route('/api/nlp/test-all-types')
-def test_all_query_types():
-    """Test all query types with live results from Neo4j database"""
-    try:
-        # Define test queries for each type
-        test_queries = {
-            'fare_inquiry': [
-                'What is the fare from Colombo to Kandy?',
-                'fare of anuradhapura to kandy',
-                'price from panadura to galle'
-            ],
-            'comparison': [
-                'Compare fares from Colombo to Kandy vs Colombo to Galle',
-                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?'
-            ],
-            'range_search': [
-                'Find routes under 500 rupees',
-                'Show me routes between 200 and 800 rupees',
-                'Routes over 1000 rupees'
-            ],
-            'recommendation': [
-                'Recommend cheap routes',
-                'Show me popular destinations',
-                'What are the best routes from Colombo?'
-            ],
-            'route_inquiry': [
-                'Routes from Colombo',
-                'Routes to Galle',
-                'What routes depart from Kandy?'
-            ],
-            'statistics': [
-                'What is the average fare?',
-                'Database statistics',
-                'How many routes are there?'
-            ]
-        }
-        results = {}
-        for query_type, queries in test_queries.items():
-            type_results = []
-            for query in queries:
-                try:
-                    # Process with enhanced NLP (uses LLM for Cypher generation)
-                    result = enhanced_nlp_processor.process_query(query)
-                    type_results.append({
-                        'query': query,
-                        'result': result,
-                        'success': result.get('success', False)
-                    })
-                except Exception as e:
-                    type_results.append({
-                        'query': query,
-                        'result': {
-                            'success': False,
-                            'message': f'Error processing query: {str(e)}'
-                        },
-                        'success': False
-                    })
-            results[query_type] = {
-                'description': f'Test results for {query_type} queries',
-                'total_queries': len(queries),
-                'successful_queries': sum(1 for r in type_results if r['success']),
-                'examples': type_results
-            }
-        # Summary statistics
-        total_queries = sum(len(queries) for queries in test_queries.values())
-        total_successful = sum(
-            results[query_type]['successful_queries']
-            for query_type in results
-        )
-        return jsonify({
-            'success': True,
-            'message': f'Tested {total_queries} queries across {len(test_queries)} types. {total_successful} successful.',
-            'summary': {
-                'total_query_types': len(test_queries),
-                'total_queries_tested': total_queries,
-                'successful_queries': total_successful,
-                'success_rate': round((total_successful / total_queries) * 100, 2) if total_queries > 0 else 0
-            },
-            'results': results,
-            'neo4j_connected': neo4j_service.is_connected()
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'message': f'Error testing query types: {str(e)}',
-            'neo4j_connected': neo4j_service.is_connected()
-        })
-@app.errorhandler(404)
-def not_found(error):
-    return jsonify({
-        'success': False,
-        'message': 'Endpoint not found'
-    }), 404
-@app.errorhandler(500)
-def internal_error(error):
-    return jsonify({
-        'success': False,
-        'message': 'Internal server error'
-    }), 500
-if __name__ == '__main__':
-    port = int(os.getenv('PORT', 7860))  # Hugging Face Spaces uses port 7860 by default
-    print("🚌 Natural Language Transport Query System")
-    print("=" * 60)
-    print(f"🚀 Starting on port {port}")
-    print(f"🌐 Open your browser and go to: http://localhost:{port}")
-    # Check Neo4j connection
-    if neo4j_service.is_connected():
-        print("✅ Connected to Neo4j database")
-        stats = neo4j_service.get_route_statistics()
-        if stats:
-            print(f"📊 Database: {stats.get('total_places', 0)} places, {stats.get('total_routes', 0)} routes")
-    else:
-        print("⚠️  Neo4j not connected - some features may not work")
-    # Check LLM availability
-    if spell_corrector.llm_available:
-        print("🤖 LLM integration available for spell correction")
-    else:
-        print("⚠️  LLM not available - using fuzzy matching only")
-    print("\n🎯 Enhanced Natural Language Capabilities:")
-    print("   • Multiple query formats (fare, price, cost)")
-    print("   • Natural language patterns (from X to Y, X to Y fare)")
-    print("   • Question formats (What is, How much, Show me)")
-    print("   • Compact formats (Colombo to Kandy fare)")
-    print("   • Spell correction and fuzzy matching")
-    print("   • LLM-powered query interpretation")
-    print("   • Automatic Cypher query generation")
-    print("   • Advanced intent classification")
-    print("   • Entity extraction and normalization")
-    print("   • Comparison queries (vs, versus, compare)")
-    print("   • Range search queries (under, over, between)")
-    print("   • Recommendation queries (recommend, suggest)")
-    print("   • Confidence scoring for query understanding")
-    print("   • Sinhala language support with translation")
-    print("   • Automatic Sinhala-English translation")
-    print("   • Dictionary-based and Google Translate fallback")
-    print("\n🔗 Available API Endpoints:")
-    print("   • /api/query - Process natural language queries (enhanced NLP)")
-    print("   • /api/nlp/capabilities - View enhanced NLP capabilities with live examples")
-    print("   • /api/nlp/test-all-types - Test all query types with live results")
-    print("   • /api/nlp/test - Test queries with detailed analysis")
-    print("   • /api/nlp/demo - Get comprehensive demo queries")
-    print("   • /api/examples - Get categorized example queries")
-    print("   • /api/sinhala/examples - Get Sinhala example queries")
-    print("   • /api/translation/test - Test translation functionality")
-    print("   • /api/translation/translate - Translate text between languages")
-    print("   • /api/status - System status and statistics")
-    print("   • /api/suggestions - Get location suggestions")
-    print("   • /api/places - Get all available places")
-    print("=" * 60)
-    try:
-        app.run(debug=False, port=port, host='0.0.0.0')  # Set debug=False for production
-    except Exception as e:
-        print(f"❌ Error starting application: {e}")
-        print("💡 Try running as administrator or check if another application is using the port")

+#!/usr/bin/env python3
+"""
+Main Flask Application for Transport Query System
+"""
+from flask import Flask, render_template, request, jsonify, session
+import os
+from llm_query_processor import LLMQueryProcessor
+from enhanced_nlp_processor import EnhancedNLPProcessor
+from spell_corrector import SpellCorrector
+from neo4j_service import Neo4jService
+from translation_service import TranslationService
+from logger import get_logger
+from config import Config
+app = Flask(__name__)
+app.config.from_object(Config)
+logger = get_logger("FlaskApp")
+# Initialize services
+query_processor = LLMQueryProcessor()
+enhanced_nlp_processor = EnhancedNLPProcessor()
+spell_corrector = SpellCorrector()
+neo4j_service = Neo4jService()
+translation_service = TranslationService()
+@app.route('/')
+def index():
+    """Main page"""
+    return render_template('index.html')
+@app.route('/api/query', methods=['POST'])
+def process_query():
+    """Process user query with enhanced NLP and translation support"""
+    try:
+        data = request.get_json()
+        user_query = data.get('query', '').strip()
+        use_enhanced_nlp = data.get('enhanced_nlp', True)  # Default to enhanced NLP
+        if not user_query:
+            return jsonify({
+                'success': False,
+                'message': 'Please enter a query.'
+            })
+        # Auto-detect language and translate if needed
+        translation_info = translation_service.translate_query(user_query)
+        # Use translated query for processing
+        query_to_process = translation_info['translated_query']
+        # Log translation info to console
+        detected_lang = translation_info.get('detected_language', 'english')
+        if detected_lang != 'english':
+            logger.info(f"Translation: {detected_lang}->en method={translation_info['translation_method']} original='{translation_info['original_query']}' translated='{translation_info['translated_query']}'")
+        else:
+            logger.info(f"Processing English Query: '{user_query}'")
+        # Process the query with enhanced NLP or fallback to basic processor
+        if use_enhanced_nlp:
+            result = enhanced_nlp_processor.process_query(query_to_process)
+        else:
+            result = query_processor.process_query(query_to_process)
+        # If original query was not in English, translate the response back
+        detected_lang = translation_info.get('detected_language', 'english')
+        if detected_lang != 'english':
+            print(f"   English Response: {result.get('message', 'No message')}")
+            result = translation_service.translate_response(result, detected_lang)
+            result['translation_info'] = translation_info
+            print(f"   {detected_lang.title()} Response: {result.get('message', 'No message')}")
+            print(f"   Translation Complete ✅")
+        logger.info(f"Response success={result.get('success')} type={result.get('query_type','n/a')} message='{result.get('message','')[:120]}'")
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error processing query: {str(e)}'
+        })
+@app.route('/api/suggestions', methods=['POST'])
+def get_suggestions():
+    """Get location suggestions for autocomplete"""
+    try:
+        data = request.get_json()
+        partial_location = data.get('location', '').strip()
+        if not partial_location:
+            return jsonify({'suggestions': []})
+        suggestions = spell_corrector.get_suggestions(partial_location)
+        return jsonify({
+            'suggestions': [{'name': name, 'confidence': conf} for name, conf in suggestions]
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting suggestions: {str(e)}'
+        })
+@app.route('/api/status')
+def get_status():
+    """Get system status"""
+    try:
+        neo4j_connected = neo4j_service.is_connected()
+        places = neo4j_service.get_all_places() if neo4j_connected else []
+        stats = neo4j_service.get_route_statistics() if neo4j_connected else {}
+        return jsonify({
+            'neo4j_connected': neo4j_connected,
+            'total_places': len(places),
+            'statistics': stats
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting status: {str(e)}'
+        })
+@app.route('/api/places')
+def get_places():
+    """Get all available places"""
+    try:
+        places = neo4j_service.get_all_places()
+        return jsonify({
+            'success': True,
+            'places': places
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting places: {str(e)}'
+        })
+@app.route('/api/sinhala/examples')
+def get_sinhala_examples():
+    """Get example queries in Sinhala"""
+    try:
+        sinhala_examples = translation_service.get_sinhala_examples()
+        return jsonify({
+            'success': True,
+            'examples': sinhala_examples
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting Sinhala examples: {str(e)}'
+        })
+@app.route('/api/tamil/examples')
+def get_tamil_examples():
+    """Get example queries in Tamil"""
+    try:
+        tamil_examples = translation_service.get_tamil_examples()
+        return jsonify({
+            'success': True,
+            'examples': tamil_examples
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error getting Tamil examples: {str(e)}'
+        })
+@app.route('/api/language/detect', methods=['POST'])
+def detect_language():
+    """Detect the language of input text"""
+    try:
+        data = request.get_json()
+        text = data.get('text', '').strip()
+        if not text:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide text to detect language.'
+            })
+        detection_result = translation_service.language_detector.detect_language(text)
+        return jsonify({
+            'success': True,
+            'detection_result': detection_result
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error detecting language: {str(e)}'
+        })
+@app.route('/api/translation/test')
+def test_translation():
+    """Test translation functionality"""
+    try:
+        test_results = translation_service.test_translation()
+        return jsonify({
+            'success': True,
+            'test_results': test_results
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error testing translation: {str(e)}'
+        })
+@app.route('/api/translation/translate', methods=['POST'])
+def translate_text():
+    """Translate text between supported languages (Sinhala, Tamil, Singlish, English)"""
+    try:
+        data = request.get_json()
+        text = data.get('text', '').strip()
+        target_lang = data.get('target_lang', 'en')  # 'en', 'si', 'ta'
+        source_lang = data.get('source_lang', 'auto')
+        if not text:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide text to translate.'
+            })
+        # Detect source language if auto
+        detection_result = translation_service.language_detector.detect_language(text)
+        detected_language = detection_result['language']
+        # Map detected language to language code
+        if detected_language == 'sinhala':
+            detected_lang_code = 'si'
+        elif detected_language == 'tamil':
+            detected_lang_code = 'ta'
+        elif detected_language == 'singlish':
+            detected_lang_code = 'si'  # Treat Singlish as Sinhala for translation
+        else:
+            detected_lang_code = 'en'
+        translated_text = translation_service.translate_text(text, target_lang, source_lang)
+        return jsonify({
+            'success': True,
+            'original_text': text,
+            'translated_text': translated_text,
+            'detected_language': detected_language,
+            'source_language': detected_lang_code,
+            'target_language': target_lang,
+            'translation_method': translation_service.last_translation_method or 'dictionary',
+            'detection_confidence': detection_result['confidence']
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error translating text: {str(e)}'
+        })
+@app.route('/api/nlp/capabilities')
+def get_nlp_capabilities():
+    """Get information about natural language processing capabilities with live examples"""
+    # Test queries for each type to demonstrate actual results
+    test_queries = [
+        {
+            'type': 'fare_inquiry',
+            'description': 'Find fare between two specific locations',
+            'examples': [
+                'What is the fare from Colombo to Kandy?',
+                'fare of anuradhapura to kandy',
+                'price from panadura to galle',
+                'Colombo to Kandy fare'
+            ]
+        },
+        {
+            'type': 'comparison',
+            'description': 'Compare fares between different routes',
+            'examples': [
+                'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
+                'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
+            ]
+        },
+        {
+            'type': 'range_search',
+            'description': 'Find routes within specific price ranges',
+            'examples': [
+                'Find routes under 500 rupees',
+                'Show me routes between 200 and 800 rupees',
+                'Routes over 1000 rupees'
+            ]
+        },
+        {
+            'type': 'recommendation',
+            'description': 'Get route recommendations based on criteria',
+            'examples': [
+                'Recommend cheap routes',
+                'Show me popular destinations',
+                'What are the best routes from Colombo?'
+            ]
+        },
+        {
+            'type': 'route_inquiry',
+            'description': 'Find routes from/to specific locations',
+            'examples': [
+                'Routes from Colombo',
+                'Routes to Galle',
+                'What routes depart from Kandy?'
+            ]
+        },
+        {
+            'type': 'statistics',
+            'description': 'Get database overview and statistics',
+            'examples': [
+                'What is the average fare?',
+                'Database statistics',
+                'How many routes are there?'
+            ]
+        }
+    ]
+    # Process each test query to get actual results
+    live_examples = []
+    for query_type in test_queries:
+        type_examples = []
+        for example_query in query_type['examples'][:2]:  # Test first 2 examples
+            try:
+                result = enhanced_nlp_processor.process_query(example_query)
+                type_examples.append({
+                    'query': example_query,
+                    'result': result
+                })
+            except Exception as e:
+                type_examples.append({
+                    'query': example_query,
+                    'result': {
+                        'success': False,
+                        'message': f'Error: {str(e)}'
+                    }
+                })
+        live_examples.append({
+            'type': query_type['type'],
+            'description': query_type['description'],
+            'examples': type_examples
+        })
+    capabilities = {
+        'natural_language_processing': {
+            'description': 'Advanced NLP for transport queries with enhanced understanding',
+            'features': [
+                'Multiple query formats (fare, price, cost)',
+                'Natural language patterns (from X to Y, X to Y fare, etc.)',
+                'Question formats (What is, How much, Show me, etc.)',
+                'Compact formats (Colombo to Kandy fare)',
+                'Spell correction and fuzzy matching',
+                'Automatic location name correction',
+                'LLM-powered query interpretation',
+                'Fallback keyword-based processing',
+                'Advanced intent classification',
+                'Entity extraction and normalization',
+                'Confidence scoring for query understanding'
+            ]
+        },
+        'query_types': test_queries,
+        'live_examples': live_examples,
+        'spell_correction': {
+            'description': 'Automatic location name correction',
+            'methods': [
+                'Direct mapping (exact matches)',
+                'Fuzzy matching (similar names)',
+                'LLM correction (AI-powered)',
+                'Partial matching (substring matching)'
+            ],
+            'examples': [
+                'panadra → Panadura',
+                'gale → Galle',
+                'colmbo → Colombo',
+                'kandee → Kandy'
+            ]
+        },
+        'llm_integration': {
+            'description': 'AI-powered query interpretation with LLM Cypher generation',
+            'features': [
+                'Automatic query type detection',
+                'LLM-powered Cypher query generation',
+                'Natural language understanding',
+                'Fallback to keyword-based processing',
+                'Advanced entity extraction',
+                'Intent classification with confidence scoring',
+                'Real-time database querying'
+            ]
+        },
+        'enhanced_features': {
+            'description': 'Advanced NLP capabilities',
+            'features': [
+                'Multi-intent query understanding',
+                'Context-aware responses',
+                'Query preprocessing and normalization',
+                'Advanced pattern matching',
+                'Confidence-based result ranking',
+                'Comprehensive query analysis',
+                'Live database results for all query types'
+            ]
+        }
+    }
+    return jsonify({
+        'success': True,
+        'capabilities': capabilities
+    })
+@app.route('/api/nlp/test', methods=['POST'])
+def test_nlp_query():
+    """Test a natural language query and return detailed analysis"""
+    try:
+        data = request.get_json()
+        user_query = data.get('query', '').strip()
+        use_enhanced_nlp = data.get('enhanced_nlp', True)
+        if not user_query:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide a query to test.'
+            })
+        # Get detailed analysis
+        analysis = {
+            'original_query': user_query,
+            'processing_steps': []
+        }
+        # Step 1: Extract locations
+        locations = spell_corrector.extract_locations_from_query(user_query)
+        analysis['processing_steps'].append({
+            'step': 'Location Extraction',
+            'locations_found': len(locations),
+            'details': [
+                {
+                    'original': loc[0],
+                    'corrected': loc[1],
+                    'confidence': loc[2],
+                    'method': loc[3]
+                } for loc in locations
+            ]
+        })
+        # Step 2: Process query with enhanced NLP
+        if use_enhanced_nlp:
+            result = enhanced_nlp_processor.process_query(user_query)
+            analysis['processing_steps'].append({
+                'step': 'Enhanced NLP Processing',
+                'success': result.get('success', False),
+                'query_type': result.get('query_type', 'unknown'),
+                'message': result.get('message', ''),
+                'confidence': result.get('query_analysis', {}).get('confidence', 0),
+                'intent': result.get('query_analysis', {}).get('intent', {}),
+                'entities': result.get('query_analysis', {}).get('entities', {})
+            })
+        else:
+            result = query_processor.process_query(user_query)
+            analysis['processing_steps'].append({
+                'step': 'Basic Query Processing',
+                'success': result.get('success', False),
+                'query_type': result.get('query_type', 'unknown'),
+                'message': result.get('message', ''),
+                'cypher_query': result.get('cypher_query', ''),
+                'corrections': result.get('corrections', [])
+            })
+        # Step 3: Results
+        if result.get('success') and result.get('data'):
+            analysis['processing_steps'].append({
+                'step': 'Database Results',
+                'results_count': len(result['data']),
+                'sample_results': result['data'][:3]  # Show first 3 results
+            })
+        return jsonify({
+            'success': True,
+            'analysis': analysis,
+            'result': result
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error testing NLP query: {str(e)}'
+        })
+@app.route('/api/nlp/demo')
+def get_nlp_demo():
+    """Get a comprehensive demo of natural language capabilities"""
+    demo_queries = [
+        {
+            'category': 'Basic Fare Queries',
+            'queries': [
+                'What is the fare from Colombo to Kandy?',
+                'fare of anuradhapura to kandy',
+                'price from panadura to galle',
+                'Colombo to Kandy fare'
+            ]
+        },
+        {
+            'category': 'Comparison Queries',
+            'queries': [
+                'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?',
+                'What is the difference in fare between Panadura to Galle and Panadura to Matara?'
+            ]
+        },
+        {
+            'category': 'Range Search Queries',
+            'queries': [
+                'Find routes under 500 rupees',
+                'Show me routes between 200 and 800 rupees',
+                'Routes over 1000 rupees'
+            ]
+        },
+        {
+            'category': 'Recommendation Queries',
+            'queries': [
+                'Recommend cheap routes',
+                'Show me popular destinations',
+                'What are the best routes from Colombo?'
+            ]
+        },
+        {
+            'category': 'Statistical Queries',
+            'queries': [
+                'What is the average fare?',
+                'Database statistics',
+                'How many routes are there?'
+            ]
+        },
+        {
+            'category': 'Route Queries',
+            'queries': [
+                'Show me the cheapest routes',
+                'Routes from Colombo',
+                'Routes to Galle',
+                'What routes depart from Kandy?'
+            ]
+        },
+        {
+            'category': 'Spell Correction Tests',
+            'queries': [
+                'price from panadra to gale',
+                'fare of colmbo to kandee',
+                'cost from anuradapura to kandy'
+            ]
+        }
+    ]
+    return jsonify({
+        'success': True,
+        'demo': {
+            'title': 'Enhanced Natural Language Transport Query Demo',
+            'description': 'Advanced NLP capabilities with comparison, range search, and recommendations',
+            'categories': demo_queries
+        }
+    })
+@app.route('/api/examples')
+def get_examples():
+    """Get comprehensive example queries showcasing natural language capabilities"""
+    examples = [
+        # === SINHALA FARE QUERIES ===
+        {
+            'category': 'Sinhala Fare Queries (සිංහල)',
+            'examples': [
+                {
+                    'query': 'කොළඹ සිට මහනුවරට ගාස්තුව කීයද?',
+                    'description': 'Standard fare query format'
+                },
+                {
+                    'query': 'පානදුරේ ඉඳන් ගාල්ලට කීයක් යනවද?',
+                    'description': 'Alternative way to ask for fare'
+                },
+                {
+                    'query': 'අනුරාධපුර සිට මහනුවර දක්වා ගාස්තුව',
+                    'description': 'Natural language format'
+                },
+                {
+                    'query': 'මහනුවර සිට මාතරට ගාස්තුව කීයද?',
+                    'description': 'Question format'
+                }
+            ]
+        },
+        # === TAMIL FARE QUERIES ===
+        {
+            'category': 'Tamil Fare Queries (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
+                    'description': 'Standard Tamil fare query'
+                },
+                {
+                    'query': 'ம��த்தறை இருந்து காலி வரை விலை எவ்வளவு?',
+                    'description': 'Alternative Tamil fare query'
+                },
+                {
+                    'query': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம்',
+                    'description': 'Tamil natural language format'
+                },
+                {
+                    'query': 'பனதுரை இருந்து காலி வரை பேருந்து கட்டணம் எவ்வளவு?',
+                    'description': 'Tamil question format'
+                }
+            ]
+        },
+        # === SINGLISH FARE QUERIES ===
+        {
+            'category': 'Singlish Fare Queries (Mixed)',
+            'examples': [
+                {
+                    'query': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
+                    'description': 'Sinhala-English mixed query'
+                },
+                {
+                    'query': 'Colombo සිට ගාල්ලට bus fare කීයද?',
+                    'description': 'English-Sinhala mixed query'
+                },
+                {
+                    'query': 'කොළඹ සිට Panadura දක්වා price කීයද?',
+                    'description': 'Mixed language with English terms'
+                },
+                {
+                    'query': 'Galle සිට මාතරට ticket cost කීයද?',
+                    'description': 'Mixed language fare query'
+                }
+            ]
+        },
+        # === ENGLISH FARE QUERIES ===
+        {
+            'category': 'English Fare Queries',
+            'examples': [
+                {
+                    'query': 'What is the fare from Colombo to Kandy?',
+                    'description': 'Standard English fare query'
+                },
+                {
+                    'query': 'How much is the bus fare from Panadura to Galle?',
+                    'description': 'English question format'
+                },
+                {
+                    'query': 'Price from Anuradhapura to Kandy',
+                    'description': 'Compact English format'
+                },
+                {
+                    'query': 'Show me the cost from Matara to Colombo',
+                    'description': 'English request format'
+                }
+            ]
+        },
+        # === COMPARISON QUERIES ===
+        {
+            'category': 'Sinhala Comparison Queries (සිංහල)',
+            'examples': [
+                {
+                    'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සංසන්දනය කරන්න.',
+                    'description': 'Compare two different routes'
+                },
+                {
+                    'query': 'කොළඹ සිට මහනුවර දක්වා සහ කොළඹ සිට අනුරාධපුර දක්වා ලාභදායී වන්නේ කුමක්ද?',
+                    'description': 'Find the cheaper option'
+                }
+            ]
+        },
+        {
+            'category': 'Tamil Comparison Queries (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.',
+                    'description': 'Compare two different routes in Tamil'
+                },
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து அனுராதபுரம் வரை கட்டணத்தின் வித்தியாசம் எவ்வளவு?',
+                    'description': 'Calculate fare difference in Tamil'
+                }
+            ]
+        },
+        {
+            'category': 'Singlish Comparison Queries (Mixed)',
+            'examples': [
+                {
+                    'query': 'කොළඹ සිට Kandy සහ Colombo සිට Galle fares compare කරන්න.',
+                    'description': 'Mixed language comparison'
+                },
+                {
+                    'query': 'Colombo සිට මහනුවර සහ Colombo සිට අනුරාධපුර cheaper කුමක්ද?',
+                    'description': 'Mixed language cheaper option'
+                }
+            ]
+        },
+        # === RANGE SEARCH QUERIES ===
+        {
+            'category': 'Sinhala Range Queries (සිංහල)',
+            'examples': [
+                {
+                    'query': 'රුපියල් 500ට අඩු මාර්ග සොයා ගන්න',
+                    'description': 'Find affordable routes'
+                },
+                {
+                    'query': 'රුපියල් 200 සහ 800 අතර මාර්ග සොයා ගන්න',
+                    'description': 'Find routes in price range'
+                }
+            ]
+        },
+        {
+            'category': 'Tamil Range Queries (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி',
+                    'description': 'Find affordable routes in Tamil'
+                },
+                {
+                    'query': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை காட்டு',
+                    'description': 'Find routes in price range in Tamil'
+                }
+            ]
+        },
+        {
+            'category': 'Singlish Range Queries (Mixed)',
+            'examples': [
+                {
+                    'query': 'රුපියල් 500ට අඩු routes find කරන්න',
+                    'description': 'Mixed language range search'
+                },
+                {
+                    'query': 'Rs. 200 සහ 800 අතර මාර්ග show කරන්න',
+                    'description': 'Mixed language price range'
+                }
+            ]
+        },
+        # === RECOMMENDATION QUERIES ===
+        {
+            'category': 'Sinhala Recommendation Queries (සිංහල)',
+            'examples': [
+                {
+                    'query': 'ලාභ මාර්ග නිර්දේශ කරන්න',
+                    'description': 'Get budget-friendly recommendations'
+                },
+                {
+                    'query': 'මට ජනප්‍රිය ගමනාන්ත පෙන්වන්න',
+                    'description': 'Find frequently traveled routes'
+                }
+            ]
+        },
+        {
+            'category': 'Tamil Recommendation Queries (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'குறைந்த விலையில் பாதைகளை பரிந்துரை',
+                    'description': 'Get budget-friendly recommendations in Tamil'
+                },
+                {
+                    'query': 'பிரபலமான இலக்குகளை காட்டு',
+                    'description': 'Find frequently traveled routes in Tamil'
+                }
+            ]
+        },
+        {
+            'category': 'Singlish Recommendation Queries (Mixed)',
+            'examples': [
+                {
+                    'query': 'ලාභ routes recommend කරන්න',
+                    'description': 'Mixed language recommendations'
+                },
+                {
+                    'query': 'Popular destinations show කරන්න',
+                    'description': 'Mixed language popular routes'
+                }
+            ]
+        },
+        # === STATISTICAL QUERIES ===
+        {
+            'category': 'Sinhala Statistical Queries (සිංහල)',
+            'examples': [
+                {
+                    'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
+                    'description': 'Get average fare statistics'
+                },
+                {
+                    'query': 'දත්ත සමුදා සංඛ්යා ලේඛන',
+                    'description': 'Get comprehensive database overview'
+                }
+            ]
+        },
+        {
+            'category': 'Tamil Statistical Queries (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'சராசரி கட்டணம் எவ்வளவு?',
+                    'description': 'Get average fare statistics in Tamil'
+                },
+                {
+                    'query': 'தரவு சேமிப்பக புள்ளிவிவரங்கள்',
+                    'description': 'Get comprehensive database overview in Tamil'
+                }
+            ]
+        },
+        {
+            'category': 'Singlish Statistical Queries (Mixed)',
+            'examples': [
+                {
+                    'query': 'Average fare කීයද?',
+                    'description': 'Mixed language statistics'
+                },
+                {
+                    'query': 'Database statistics show කරන්න',
+                    'description': 'Mixed language database overview'
+                }
+            ]
+        },
+        # === ROUTE QUERIES ===
+        {
+            'category': 'Sinhala Route Queries (සිංහල)',
+            'examples': [
+                {
+                    'query': 'මට ලාභදායී මාර්ග 10ක් පෙන්වන්න',
+                    'description': 'Find top 10 cheapest routes'
+                },
+                {
+                    'query': 'කොළඹ සිට යාමට මාර්ග මොනවාද?',
+                    'description': 'Find all routes departing from a location'
+                }
+            ]
+        },
+        {
+            'category': 'Tamil Route Queries (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'குறைந்த விலையில் பாதைகள் 10 காட்டு',
+                    'description': 'Find top 10 cheapest routes in Tamil'
+                },
+                {
+                    'query': 'கொழும்பு இருந்து போகும் பாதைகள் என்ன?',
+                    'description': 'Find all routes departing from a location in Tamil'
+                }
+            ]
+        },
+        {
+            'category': 'Singlish Route Queries (Mixed)',
+            'examples': [
+                {
+                    'query': 'ලාභදායී routes 10ක් show කරන්න',
+                    'description': 'Mixed language cheapest routes'
+                },
+                {
+                    'query': 'Colombo සිට යන මාර්ග මොනවාද?',
+                    'description': 'Mixed language route queries'
+                }
+            ]
+        },
+        # === SPELLING ERROR EXAMPLES ===
+        {
+            'category': 'Sinhala Spell Correction (සිංහල)',
+            'examples': [
+                {
+                    'query': 'පාන්දුරේ ඉඳන් ගාල්ල්ට කීයක් යනවද?',
+                    'description': 'Test spell correction (Panadura, Galle)'
+                },
+                {
+                    'query': 'කොළ්බ්හ සිට මහනුවර්ට ගාස්තුව කීයද?',
+                    'description': 'Test spell correction (Colombo, Kandy)'
+                }
+            ]
+        },
+        {
+            'category': 'Tamil Spell Correction (தமிழ்)',
+            'examples': [
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
+                    'description': 'Test Tamil spell correction'
+                },
+                {
+                    'query': 'பனதுரை இருந்து காலி வரை விலை எவ்வளவு?',
+                    'description': 'Test Tamil with common variations'
+                }
+            ]
+        },
+        {
+            'category': 'Singlish Spell Correction (Mixed)',
+            'examples': [
+                {
+                    'query': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
+                    'description': 'Test mixed language spell correction'
+                },
+                {
+                    'query': 'Colombo සිට ගාල්ලට bus fare කීයද?',
+                    'description': 'Test Singlish with English terms'
+                }
+            ]
+        }
+    ]
+    return jsonify({
+        'success': True,
+        'examples': examples
+    })
+@app.route('/api/nlp/advanced', methods=['POST'])
+def advanced_nlp_query():
+    """Advanced NLP query processing with detailed analysis"""
+    try:
+        data = request.get_json()
+        user_query = data.get('query', '').strip()
+        if not user_query:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide a query to process.'
+            })
+        # Process with enhanced NLP
+        result = enhanced_nlp_processor.process_query(user_query)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error processing advanced NLP query: {str(e)}'
+        })
+@app.route('/api/nlp/compare', methods=['POST'])
+def compare_routes():
+    """Compare multiple routes"""
+    try:
+        data = request.get_json()
+        routes = data.get('routes', [])
+        if len(routes) < 2:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide at least 2 routes to compare.'
+            })
+        # Build comparison query
+        comparison_query = "MATCH "
+        for i, route in enumerate(routes):
+            from_loc = route.get('from')
+            to_loc = route.get('to')
+            if from_loc and to_loc:
+                if i > 0:
+                    comparison_query += ", "
+                comparison_query += f"(a{i}:Place {{name: '{from_loc}'}})-[r{i}:Fare]->(b{i}:Place {{name: '{to_loc}'}})"
+        comparison_query += " RETURN "
+        for i, route in enumerate(routes):
+            if i > 0:
+                comparison_query += ", "
+            comparison_query += f"a{i}.name + ' to ' + b{i}.name as route{i+1}, r{i}.fare as fare{i+1}"
+        # Execute query
+        with neo4j_service.driver.session() as session:
+            result = session.run(comparison_query)
+            results = [dict(record) for record in result]
+        return jsonify({
+            'success': True,
+            'data': results,
+            'message': f'Comparison of {len(routes)} routes completed'
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error comparing routes: {str(e)}'
+        })
+@app.route('/api/nlp/range', methods=['POST'])
+def search_by_range():
+    """Search routes by price range"""
+    try:
+        data = request.get_json()
+        min_price = data.get('min_price')
+        max_price = data.get('max_price')
+        if min_price is None and max_price is None:
+            return jsonify({
+                'success': False,
+                'message': 'Please provide min_price or max_price or both.'
+            })
+        # Build range query
+        range_query = "MATCH (a:Place)-[r:Fare]->(b:Place) WHERE "
+        conditions = []
+        if min_price is not None:
+            conditions.append(f"r.fare >= {min_price}")
+        if max_price is not None:
+            conditions.append(f"r.fare <= {max_price}")
+        range_query += " AND ".join(conditions)
+        range_query += " RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare"
+        # Execute query
+        with neo4j_service.driver.session() as session:
+            result = session.run(range_query)
+            results = [dict(record) for record in result]
+        return jsonify({
+            'success': True,
+            'data': results,
+            'message': f'Found {len(results)} routes in the specified range'
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error searching by range: {str(e)}'
+        })
+@app.route('/api/nlp/test-all-types')
+def test_all_query_types():
+    """Test all query types with live results from Neo4j database"""
+    try:
+        # Define test queries for each type
+        test_queries = {
+            'fare_inquiry': [
+                'What is the fare from Colombo to Kandy?',
+                'fare of anuradhapura to kandy',
+                'price from panadura to galle'
+            ],
+            'comparison': [
+                'Compare fares from Colombo to Kandy vs Colombo to Galle',
+                'Which is cheaper between Colombo to Kandy and Colombo to Anuradapura?'
+            ],
+            'range_search': [
+                'Find routes under 500 rupees',
+                'Show me routes between 200 and 800 rupees',
+                'Routes over 1000 rupees'
+            ],
+            'recommendation': [
+                'Recommend cheap routes',
+                'Show me popular destinations',
+                'What are the best routes from Colombo?'
+            ],
+            'route_inquiry': [
+                'Routes from Colombo',
+                'Routes to Galle',
+                'What routes depart from Kandy?'
+            ],
+            'statistics': [
+                'What is the average fare?',
+                'Database statistics',
+                'How many routes are there?'
+            ]
+        }
+        results = {}
+        for query_type, queries in test_queries.items():
+            type_results = []
+            for query in queries:
+                try:
+                    # Process with enhanced NLP (uses LLM for Cypher generation)
+                    result = enhanced_nlp_processor.process_query(query)
+                    type_results.append({
+                        'query': query,
+                        'result': result,
+                        'success': result.get('success', False)
+                    })
+                except Exception as e:
+                    type_results.append({
+                        'query': query,
+                        'result': {
+                            'success': False,
+                            'message': f'Error processing query: {str(e)}'
+                        },
+                        'success': False
+                    })
+            results[query_type] = {
+                'description': f'Test results for {query_type} queries',
+                'total_queries': len(queries),
+                'successful_queries': sum(1 for r in type_results if r['success']),
+                'examples': type_results
+            }
+        # Summary statistics
+        total_queries = sum(len(queries) for queries in test_queries.values())
+        total_successful = sum(
+            results[query_type]['successful_queries']
+            for query_type in results
+        )
+        return jsonify({
+            'success': True,
+            'message': f'Tested {total_queries} queries across {len(test_queries)} types. {total_successful} successful.',
+            'summary': {
+                'total_query_types': len(test_queries),
+                'total_queries_tested': total_queries,
+                'successful_queries': total_successful,
+                'success_rate': round((total_successful / total_queries) * 100, 2) if total_queries > 0 else 0
+            },
+            'results': results,
+            'neo4j_connected': neo4j_service.is_connected()
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'message': f'Error testing query types: {str(e)}',
+            'neo4j_connected': neo4j_service.is_connected()
+        })
+@app.errorhandler(404)
+def not_found(error):
+    return jsonify({
+        'success': False,
+        'message': 'Endpoint not found'
+    }), 404
+@app.errorhandler(500)
+def internal_error(error):
+    return jsonify({
+        'success': False,
+        'message': 'Internal server error'
+    }), 500
+if __name__ == '__main__':
+    port = int(os.getenv('PORT', 7860))  # Hugging Face Spaces uses port 7860 by default
+    print("🚌 Natural Language Transport Query System")
+    print("=" * 60)
+    print(f"🚀 Starting on port {port}")
+    print(f"🌐 Open your browser and go to: http://localhost:{port}")
+    # Check Neo4j connection
+    if neo4j_service.is_connected():
+        print("✅ Connected to Neo4j database")
+        stats = neo4j_service.get_route_statistics()
+        if stats:
+            print(f"📊 Database: {stats.get('total_places', 0)} places, {stats.get('total_routes', 0)} routes")
+    else:
+        print("⚠️  Neo4j not connected - some features may not work")
+    # Check LLM availability
+    if spell_corrector.llm_available:
+        print("🤖 LLM integration available for spell correction")
+    else:
+        print("⚠️  LLM not available - using fuzzy matching only")
+    print("\n🎯 Enhanced Natural Language Capabilities:")
+    print("   • Multiple query formats (fare, price, cost)")
+    print("   • Natural language patterns (from X to Y, X to Y fare)")
+    print("   • Question formats (What is, How much, Show me)")
+    print("   • Compact formats (Colombo to Kandy fare)")
+    print("   • Spell correction and fuzzy matching")
+    print("   • LLM-powered query interpretation")
+    print("   • Automatic Cypher query generation")
+    print("   • Advanced intent classification")
+    print("   • Entity extraction and normalization")
+    print("   • Comparison queries (vs, versus, compare)")
+    print("   • Range search queries (under, over, between)")
+    print("   • Recommendation queries (recommend, suggest)")
+    print("   • Confidence scoring for query understanding")
+    print("   • Multi-language support: Sinhala, Tamil, Singlish, English")
+    print("   • Automatic language detection and translation")
+    print("   • Dictionary-based, LLM, and API translation methods")
+    print("   • Response translation back to detected language")
+    print("\n🔗 Available API Endpoints:")
+    print("   • /api/query - Process natural language queries (enhanced NLP)")
+    print("   • /api/nlp/capabilities - View enhanced NLP capabilities with live examples")
+    print("   • /api/nlp/test-all-types - Test all query types with live results")
+    print("   • /api/nlp/test - Test queries with detailed analysis")
+    print("   • /api/nlp/demo - Get comprehensive demo queries")
+    print("   • /api/examples - Get categorized example queries")
+    print("   • /api/sinhala/examples - Get Sinhala example queries")
+    print("   • /api/tamil/examples - Get Tamil example queries")
+    print("   • /api/language/detect - Detect language of input text")
+    print("   • /api/translation/test - Test translation functionality")
+    print("   • /api/translation/translate - Translate text between languages")
+    print("   • /api/status - System status and statistics")
+    print("   • /api/suggestions - Get location suggestions")
+    print("   • /api/places - Get all available places")
+    print("=" * 60)
+    try:
+        app.run(debug=False, port=port, host='0.0.0.0')  # Set debug=False for production
+    except Exception as e:
+        print(f"❌ Error starting application: {e}")
+        print("💡 Try running as administrator or check if another application is using the port")

enhanced_nlp_processor.py CHANGED Viewed

@@ -1,904 +1,904 @@
-#!/usr/bin/env python3
-"""
-Enhanced NLP Processor for Transport Query Application
-Advanced natural language understanding and query processing
-"""
-import re
-import json
-from typing import Dict, List, Tuple, Optional, Any
-from datetime import datetime
-from spell_corrector import SpellCorrector
-from neo4j_service import Neo4jService
-from config import Config
-from logger import get_logger
-class EnhancedNLPProcessor:
-    """Advanced NLP processor with sophisticated query understanding"""
-    def __init__(self):
-        self.config = Config()
-        self.spell_corrector = SpellCorrector()
-        self.neo4j_service = Neo4jService()
-        self.logger = get_logger(self.__class__.__name__)
-        # Query patterns and templates
-        self.query_patterns = {
-            'fare_queries': [
-                r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
-                r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
-            ],
-            'comparison_queries': [
-                r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
-            ],
-            'range_queries': [
-                r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
-                r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
-                r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
-            ],
-            'route_queries': [
-                r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
-                r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
-                r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
-                r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
-            ],
-            'statistical_queries': [
-                r'(?:average|mean|median)\s+(?:fare|price|cost)',
-                r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
-                r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
-                r'(?:database|system)\s+(?:statistics?|stats?|overview)',
-                r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
-            ],
-            'recommendation_queries': [
-                r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
-                r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
-                r'(?:popular|frequent)\s+(?:routes?|destinations?)',
-                r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
-            ]
-        }
-        # Query intent classification
-        self.intent_keywords = {
-            'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
-            'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
-            'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
-            'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
-            'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
-            'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
-            'availability': ['available', 'exist', 'have', 'is there', 'can i']
-        }
-    def process_query(self, user_query: str) -> Dict[str, Any]:
-        """
-        Process natural language query with advanced NLP understanding
-        Args:
-            user_query: Natural language query string
-        Returns:
-            Dictionary with comprehensive query analysis and results
-        """
-        try:
-            # Step 1: Preprocess query
-            processed_query = self._preprocess_query(user_query)
-            self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
-            # Step 2: Extract entities and intent
-            entities = self._extract_entities(processed_query)
-            intent = self._classify_intent(processed_query, entities)
-            # Step 3: Generate Cypher query
-            cypher_query = self._generate_cypher_query(intent, entities, processed_query)
-            self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
-            # Step 4: Execute query and format results
-            if cypher_query:
-                results = self._execute_query(cypher_query)
-                self.logger.info(f"Query results count: {len(results)}")
-                response = self._format_response(intent, entities, results, processed_query)
-            else:
-                response = self._handle_unclear_query(processed_query)
-            # Step 5: Add metadata
-            response.update({
-                'query_analysis': {
-                    'original_query': user_query,
-                    'processed_query': processed_query,
-                    'intent': intent,
-                    'entities': entities,
-                    'confidence': self._calculate_confidence(intent, entities)
-                }
-            })
-            return response
-        except Exception as e:
-            return {
-                'success': False,
-                'message': f'Error processing query: {str(e)}',
-                'suggestions': self._get_suggestions()
-            }
-    def _preprocess_query(self, query: str) -> str:
-        """Preprocess and normalize the query"""
-        # Convert to lowercase
-        query = query.lower().strip()
-        # Remove extra whitespace
-        query = re.sub(r'\s+', ' ', query)
-        # Normalize common variations
-        replacements = {
-            'rs.': 'rupees',
-            'rs': 'rupees',
-            'lkr': 'rupees',
-            '→': 'to',
-            '->': 'to',
-            'vs': 'versus',
-            '&': 'and',
-            'w/': 'with',
-            'w/o': 'without'
-        }
-        for old, new in replacements.items():
-            query = query.replace(old, new)
-        return query
-    def _extract_entities(self, query: str) -> Dict[str, Any]:
-        """Extract entities from the query"""
-        entities = {
-            'locations': [],
-            'numbers': [],
-            'currencies': [],
-            'comparators': [],
-            'time_expressions': []
-        }
-        # Extract locations with priority for different query types
-        comparison_patterns = [
-            r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            # Simpler patterns for comparison
-            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
-        ]
-        fare_patterns = [
-            r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
-        ]
-        general_patterns = [
-            r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
-            r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
-        ]
-        # Use a set to avoid duplicates
-        seen_locations = set()
-        # Try comparison patterns first (highest priority)
-        for pattern in comparison_patterns:
-            matches = re.finditer(pattern, query, re.IGNORECASE)
-            for match in matches:
-                locations = [loc.strip() for loc in match.groups() if loc.strip()]
-                for loc in locations:
-                    # Skip if we've already processed this location
-                    if loc.lower() in seen_locations:
-                        continue
-                    seen_locations.add(loc.lower())
-                    corrected, confidence, method = self.spell_corrector.correct_location(loc)
-                    if confidence > 0.5:
-                        entities['locations'].append({
-                            'original': loc,
-                            'corrected': corrected,
-                            'confidence': confidence,
-                            'method': method
-                        })
-        # If no locations found with comparison patterns, try fare patterns
-        if not entities['locations']:
-            for pattern in fare_patterns:
-                matches = re.finditer(pattern, query, re.IGNORECASE)
-                for match in matches:
-                    locations = [loc.strip() for loc in match.groups() if loc.strip()]
-                    for loc in locations:
-                        # Skip if we've already processed this location
-                        if loc.lower() in seen_locations:
-                            continue
-                        seen_locations.add(loc.lower())
-                        corrected, confidence, method = self.spell_corrector.correct_location(loc)
-                        if confidence > 0.5:
-                            entities['locations'].append({
-                                'original': loc,
-                                'corrected': corrected,
-                                'confidence': confidence,
-                                'method': method
-                            })
-        # If no locations found with fare patterns, try general patterns
-        if not entities['locations']:
-            for pattern in general_patterns:
-                matches = re.finditer(pattern, query, re.IGNORECASE)
-                for match in matches:
-                    locations = [loc.strip() for loc in match.groups() if loc.strip()]
-                    for loc in locations:
-                        # Skip if we've already processed this location
-                        if loc.lower() in seen_locations:
-                            continue
-                        seen_locations.add(loc.lower())
-                        corrected, confidence, method = self.spell_corrector.correct_location(loc)
-                        if confidence > 0.5:
-                            entities['locations'].append({
-                                'original': loc,
-                                'corrected': corrected,
-                                'confidence': confidence,
-                                'method': method
-                            })
-        # Extract numbers and currencies
-        number_patterns = [
-            r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
-            r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
-            r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
-        ]
-        for pattern in number_patterns:
-            matches = re.finditer(pattern, query, re.IGNORECASE)
-            for match in matches:
-                groups = match.groups()
-                if len(groups) >= 2:
-                    if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
-                        # Pattern: (under|below|less than|over|above|more than) (number) (currency)
-                        comparator = groups[0]
-                        number = groups[1]
-                        currency = groups[2] if len(groups) >= 3 else 'rupees'
-                        entities['numbers'].append({
-                            'value': float(number.replace(',', '')),
-                            'currency': currency,
-                            'comparator': comparator
-                        })
-                    elif 'between' in pattern:
-                        # Pattern: between (number1) and (number2) (currency)
-                        min_number = groups[0]
-                        max_number = groups[1]
-                        currency = groups[2] if len(groups) >= 3 else 'rupees'
-                        entities['numbers'].append({
-                            'value': float(min_number.replace(',', '')),
-                            'currency': currency,
-                            'comparator': 'between_min'
-                        })
-                        entities['numbers'].append({
-                            'value': float(max_number.replace(',', '')),
-                            'currency': currency,
-                            'comparator': 'between_max'
-                        })
-                    else:
-                        # Pattern: (number) (currency)
-                        number = groups[0]
-                        currency = groups[1] if len(groups) >= 2 else 'rupees'
-                        entities['numbers'].append({
-                            'value': float(number.replace(',', '')),
-                            'currency': currency,
-                            'comparator': None
-                        })
-        # Extract comparators
-        comparator_patterns = [
-            r'(cheaper|more\s+expensive|better|worse|faster|slower)',
-            r'(compare|difference|vs|versus)',
-            r'(under|below|less\s+than|over|above|more\s+than)'
-        ]
-        for pattern in comparator_patterns:
-            matches = re.finditer(pattern, query, re.IGNORECASE)
-            for match in matches:
-                entities['comparators'].append(match.group(1).lower())
-        return entities
-    def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
-        """Classify the intent of the query"""
-        intent_scores = {}
-        for intent, keywords in self.intent_keywords.items():
-            score = 0
-            for keyword in keywords:
-                if keyword in query:
-                    score += 1
-            intent_scores[intent] = score
-        # Get primary intent
-        primary_intent = max(intent_scores.items(), key=lambda x: x[1])
-        # Check for specific patterns with priority
-        if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
-            primary_intent = ('comparison', 10)
-        elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
-            primary_intent = ('recommendation', 10)
-        elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
-            primary_intent = ('range_search', 10)
-        elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
-            # Check if we have at least 2 locations
-            if entities and len(entities.get('locations', [])) >= 2:
-                primary_intent = ('fare_inquiry', 10)
-        elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
-            primary_intent = ('route_inquiry', 10)
-        return {
-            'primary': primary_intent[0],
-            'confidence': primary_intent[1] / 10,
-            'all_scores': intent_scores
-        }
-    def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
-        """Generate Cypher query using LLM for better understanding"""
-        try:
-            # Try LLM-based query generation first
-            llm_query = self._generate_cypher_with_llm(query, intent, entities)
-            if llm_query:
-                return llm_query
-        except Exception as e:
-            print(f"LLM query generation failed: {e}")
-        # Fallback to rule-based generation
-        primary_intent = intent['primary']
-        if primary_intent == 'fare_inquiry':
-            return self._generate_fare_query(entities)
-        elif primary_intent == 'comparison':
-            return self._generate_comparison_query(entities)
-        elif primary_intent == 'route_inquiry':
-            return self._generate_route_query(entities, query)
-        elif primary_intent == 'statistics':
-            return self._generate_statistics_query(entities)
-        elif primary_intent == 'recommendation':
-            return self._generate_recommendation_query(entities, query)
-        elif primary_intent == 'range_search':
-            return self._generate_range_query(entities)
-        else:
-            return self._generate_fallback_query(query)
-    def _generate_fare_query(self, entities: Dict) -> Optional[str]:
-        """Generate fare inquiry Cypher query"""
-        locations = entities.get('locations', [])
-        if len(locations) >= 2:
-            from_loc = locations[0]['corrected']
-            to_loc = locations[1]['corrected']
-            return f"""
-            MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare,
-                'Direct route' as route_type
-            """
-        return None
-    def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
-        """Generate comparison Cypher query"""
-        locations = entities.get('locations', [])
-        if len(locations) >= 3:
-            # Handle case where we have same origin, different destinations
-            if len(locations) == 3:
-                # Pattern: "Colombo to Kandy and Colombo to Anuradapura"
-                route1_from = locations[0]['corrected']
-                route1_to = locations[1]['corrected']
-                route2_from = locations[0]['corrected']  # Same origin
-                route2_to = locations[2]['corrected']
-            elif len(locations) >= 4:
-                # Pattern: "Colombo to Kandy and Anuradapura to Galle"
-                route1_from = locations[0]['corrected']
-                route1_to = locations[1]['corrected']
-                route2_from = locations[2]['corrected']
-                route2_to = locations[3]['corrected']
-            else:
-                return None
-            return f"""
-            MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
-            MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
-            RETURN
-                a1.name + ' to ' + b1.name as route1,
-                r1.fare as fare1,
-                a2.name + ' to ' + b2.name as route2,
-                r2.fare as fare2,
-                r1.fare - r2.fare as difference,
-                CASE
-                    WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
-                    WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
-                    ELSE 'Both routes have the same fare'
-                END as comparison
-            """
-        return None
-    def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
-        """Generate route inquiry Cypher query"""
-        locations = entities.get('locations', [])
-        if 'from' in query and locations:
-            location = locations[0]['corrected']
-            return f"""
-            MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare
-            ORDER BY r.fare
-            """
-        elif 'to' in query and locations:
-            location = locations[0]['corrected']
-            return f"""
-            MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare
-            ORDER BY r.fare
-            """
-        return None
-    def _generate_statistics_query(self, entities: Dict) -> str:
-        """Generate statistics Cypher query"""
-        return """
-        MATCH (p:Place)
-        MATCH ()-[r:Fare]->()
-        RETURN
-            count(DISTINCT p) as total_places,
-            count(r) as total_routes,
-            round(avg(r.fare), 2) as average_fare,
-            min(r.fare) as minimum_fare,
-            max(r.fare) as maximum_fare,
-            round(stdDev(r.fare), 2) as fare_standard_deviation
-        """
-    def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
-        """Generate recommendation Cypher query"""
-        if 'cheap' in query or 'budget' in query or 'affordable' in query:
-            return """
-            MATCH (a:Place)-[r:Fare]->(b:Place)
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare
-            ORDER BY r.fare ASC
-            LIMIT 10
-            """
-        elif 'popular' in query or 'frequent' in query:
-            return """
-            MATCH (a:Place)-[r:Fare]->(b:Place)
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare
-            ORDER BY r.fare DESC
-            LIMIT 10
-            """
-        else:
-            return """
-            MATCH (a:Place)-[r:Fare]->(b:Place)
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare
-            ORDER BY r.fare ASC
-            LIMIT 5
-            """
-    def _generate_range_query(self, entities: Dict) -> Optional[str]:
-        """Generate range search Cypher query"""
-        numbers = entities.get('numbers', [])
-        if numbers:
-            # Check for between range
-            between_min = None
-            between_max = None
-            single_value = None
-            single_comparator = None
-            for number in numbers:
-                comparator = number.get('comparator', '')
-                value = number['value']
-                if comparator == 'between_min':
-                    between_min = value
-                elif comparator == 'between_max':
-                    between_max = value
-                elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
-                    single_value = value
-                    single_comparator = comparator
-            # Generate query based on type
-            if between_min is not None and between_max is not None:
-                return f"""
-                MATCH (a:Place)-[r:Fare]->(b:Place)
-                WHERE r.fare >= {between_min} AND r.fare <= {between_max}
-                RETURN
-                    a.name as from_place,
-                    b.name as to_place,
-                    r.fare as fare
-                ORDER BY r.fare ASC
-                """
-            elif single_value is not None and single_comparator is not None:
-                if single_comparator in ['under', 'below', 'less than']:
-                    return f"""
-                    MATCH (a:Place)-[r:Fare]->(b:Place)
-                    WHERE r.fare < {single_value}
-                    RETURN
-                        a.name as from_place,
-                        b.name as to_place,
-                        r.fare as fare
-                    ORDER BY r.fare ASC
-                    """
-                elif single_comparator in ['over', 'above', 'more than']:
-                    return f"""
-                    MATCH (a:Place)-[r:Fare]->(b:Place)
-                    WHERE r.fare > {single_value}
-                    RETURN
-                        a.name as from_place,
-                        b.name as to_place,
-                        r.fare as fare
-                    ORDER BY r.fare DESC
-                    """
-        return None
-    def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
-        """Generate Cypher query using LLM for better understanding"""
-        try:
-            if not self.config.OPENAI_API_KEY:
-                return None
-            # Get available places for context
-            available_places = list(self.neo4j_service.get_all_places())
-            # Create comprehensive prompt for Cypher generation
-            prompt = f"""
-            You are a Neo4j Cypher query generator for a transport database.
-            Database Schema:
-            - Nodes: Place (with property 'name')
-            - Relationships: Fare (with property 'fare')
-            Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
-            User Query: "{query}"
-            Detected Intent: {intent.get('primary', 'unknown')}
-            Extracted Entities: {entities}
-            Your task is to generate a valid Cypher query that answers the user's question.
-            Query Types and Examples:
-            1. FARE INQUIRY:
-               - "What is the fare from Colombo to Kandy?"
-               - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
-            2. COMPARISON:
-               - "Compare fares from Colombo to Kandy vs Colombo to Galle"
-               - Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
-            3. RANGE SEARCH:
-               - "Find routes under 500 rupees"
-               - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
-            4. RECOMMENDATION:
-               - "Recommend cheap routes"
-               - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
-            5. STATISTICS:
-               - "What is the average fare?"
-               - Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
-            6. ROUTE INQUIRY:
-               - "Routes from Colombo"
-               - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
-            Important Rules:
-            1. Always use proper Cypher syntax
-            2. Use exact place names from the available places list
-            3. For comparisons, use multiple MATCH clauses
-            4. For ranges, use WHERE clauses with appropriate operators
-            5. For statistics, use aggregation functions
-            6. Always include meaningful column aliases
-            7. Use ORDER BY for sorted results
-            8. Use LIMIT for large result sets
-            Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
-            """
-            cypher_query = None
-            # Prefer new SDK
-            try:
-                from openai import OpenAI
-                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
-                response = client.chat.completions.create(
-                    model=self.config.OPENAI_MODEL,
-                    messages=[
-                        {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=300,
-                    temperature=0.1
-                )
-                cypher_query = response.choices[0].message.content.strip()
-            except Exception as sdk_err:
-                import openai
-                try:
-                    openai.api_key = self.config.OPENAI_API_KEY
-                    response = openai.ChatCompletion.create(
-                        model=self.config.OPENAI_MODEL,
-                        messages=[
-                            {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
-                            {"role": "user", "content": prompt}
-                        ],
-                        max_tokens=300,
-                        temperature=0.1
-                    )
-                    cypher_query = response.choices[0].message.content.strip()
-                except Exception:
-                    raise sdk_err
-            # Validate the response
-            if cypher_query.upper() == "FALLBACK":
-                return None
-            # Basic validation - check if it starts with MATCH
-            if cypher_query.upper().startswith('MATCH'):
-                return cypher_query
-            return None
-        except Exception as e:
-            print(f"LLM Cypher generation error: {e}")
-            return None
-    def _generate_fallback_query(self, query: str) -> Optional[str]:
-        """Generate fallback query when intent is unclear"""
-        # Try to extract locations using spell corrector
-        locations = self.spell_corrector.extract_locations_from_query(query)
-        if len(locations) >= 2:
-            from_loc = locations[0][1]
-            to_loc = locations[1][1]
-            return f"""
-            MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
-            RETURN
-                a.name as from_place,
-                b.name as to_place,
-                r.fare as fare
-            """
-        # Additional fallback: direct pattern matching for fare queries
-        if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
-            import re
-            fare_patterns = [
-                r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
-                r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
-                r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
-                r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
-                r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
-            ]
-            for pattern in fare_patterns:
-                match = re.search(pattern, query.lower())
-                if match:
-                    from_loc = match.group(1).strip()
-                    to_loc = match.group(2).strip()
-                    # Correct locations
-                    from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
-                    to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
-                    if from_conf > 0.5 and to_conf > 0.5:
-                        return f"""
-                        MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
-                        RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                        """
-        return None
-    def _execute_query(self, cypher_query: str) -> List[Dict]:
-        """Execute Cypher query and return results"""
-        try:
-            with self.neo4j_service.driver.session() as session:
-                result = session.run(cypher_query)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Query execution error: {e}")
-            return []
-    def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
-        """Format the response based on intent and results"""
-        primary_intent = intent['primary']
-        if not results:
-            return {
-                'success': False,
-                'message': 'No results found for your query.',
-                'suggestions': self._get_suggestions()
-            }
-        if primary_intent == 'fare_inquiry':
-            return self._format_fare_response(results, entities)
-        elif primary_intent == 'comparison':
-            return self._format_comparison_response(results, entities)
-        elif primary_intent == 'route_inquiry':
-            return self._format_route_response(results, entities)
-        elif primary_intent == 'statistics':
-            return self._format_statistics_response(results)
-        elif primary_intent == 'recommendation':
-            return self._format_recommendation_response(results, query)
-        elif primary_intent == 'range_search':
-            return self._format_range_response(results, entities)
-        else:
-            return self._format_generic_response(results)
-    def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
-        """Format fare inquiry response"""
-        if results:
-            result = results[0]
-            return {
-                'success': True,
-                'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
-                'data': results,
-                'query_type': 'fare_inquiry',
-                'summary': {
-                    'from_place': result['from_place'],
-                    'to_place': result['to_place'],
-                    'fare': result['fare']
-                }
-            }
-        return {'success': False, 'message': 'Fare information not found.'}
-    def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
-        """Format comparison response"""
-        if results:
-            result = results[0]
-            return {
-                'success': True,
-                'message': result.get('comparison', 'Comparison completed'),
-                'data': results,
-                'query_type': 'comparison',
-                'summary': {
-                    'route1': result.get('route1'),
-                    'route2': result.get('route2'),
-                    'difference': result.get('difference')
-                }
-            }
-        return {'success': False, 'message': 'Comparison not possible.'}
-    def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
-        """Format route inquiry response"""
-        return {
-            'success': True,
-            'message': f"Found {len(results)} routes",
-            'data': results,
-            'query_type': 'route_inquiry',
-            'summary': {
-                'total_routes': len(results),
-                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
-            }
-        }
-    def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
-        """Format statistics response"""
-        if results:
-            stats = results[0]
-            return {
-                'success': True,
-                'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
-                'data': results,
-                'query_type': 'statistics',
-                'summary': {
-                    'total_places': stats['total_places'],
-                    'total_routes': stats['total_routes'],
-                    'average_fare': stats['average_fare'],
-                    'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
-                }
-            }
-        return {'success': False, 'message': 'Statistics not available.'}
-    def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
-        """Format recommendation response"""
-        return {
-            'success': True,
-            'message': f"Here are {len(results)} recommended routes",
-            'data': results,
-            'query_type': 'recommendation',
-            'summary': {
-                'recommendations_count': len(results),
-                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
-            }
-        }
-    def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
-        """Format range search response"""
-        return {
-            'success': True,
-            'message': f"Found {len(results)} routes in your specified range",
-            'data': results,
-            'query_type': 'range_search',
-            'summary': {
-                'routes_found': len(results),
-                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
-            }
-        }
-    def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
-        """Format generic response"""
-        return {
-            'success': True,
-            'message': f"Found {len(results)} results",
-            'data': results,
-            'query_type': 'generic'
-        }
-    def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
-        """Handle unclear or ambiguous queries"""
-        return {
-            'success': False,
-            'message': 'I could not understand your query. Please try rephrasing it.',
-            'suggestions': self._get_suggestions(),
-            'query_type': 'unclear'
-        }
-    def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
-        """Calculate confidence score for the query interpretation"""
-        confidence = 0.0
-        # Intent confidence
-        confidence += intent.get('confidence', 0) * 0.4
-        # Entity confidence
-        locations = entities.get('locations', [])
-        if locations:
-            avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
-            confidence += avg_location_confidence * 0.4
-        # Query complexity bonus
-        if len(locations) >= 2:
-            confidence += 0.2
-        return min(confidence, 1.0)
-    def _get_suggestions(self) -> List[str]:
-        """Get query suggestions"""
-        return [
-            "What is the fare from Colombo to Kandy?",
-            "Compare fares from Colombo to Kandy vs Colombo to Galle",
-            "Show me routes from Panadura",
-            "Find routes under 500 rupees",
-            "What are the cheapest routes?",
-            "Show me popular destinations",
-            "Give me database statistics",
-            "Recommend affordable routes"
-        ]

+#!/usr/bin/env python3
+"""
+Enhanced NLP Processor for Transport Query Application
+Advanced natural language understanding and query processing
+"""
+import re
+import json
+from typing import Dict, List, Tuple, Optional, Any
+from datetime import datetime
+from spell_corrector import SpellCorrector
+from neo4j_service import Neo4jService
+from config import Config
+from logger import get_logger
+class EnhancedNLPProcessor:
+    """Advanced NLP processor with sophisticated query understanding"""
+    def __init__(self):
+        self.config = Config()
+        self.spell_corrector = SpellCorrector()
+        self.neo4j_service = Neo4jService()
+        self.logger = get_logger(self.__class__.__name__)
+        # Query patterns and templates
+        self.query_patterns = {
+            'fare_queries': [
+                r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:what\s+is\s+)?(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:bus\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:fare|price|cost)',
+                r'(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:travel|transport)\s+(?:cost|price|fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:bus|train)\s+(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:ticket\s+price|ticket\s+fare)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
+            ],
+            'comparison_queries': [
+                r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
+            ],
+            'range_queries': [
+                r'(?:routes?|fares?|prices?)\s+(?:between|from)\s+([0-9,]+)\s+(?:and|to)\s+([0-9,]+)\s+(?:rupees?|rs?)',
+                r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:under|below|less\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)',
+                r'(?:find|show)\s+(?:routes?|fares?|prices?)\s+(?:over|above|more\s+than)\s+([0-9,]+)\s+(?:rupees?|rs?)'
+            ],
+            'route_queries': [
+                r'(?:routes?|buses?|trains?)\s+(?:from|departing\s+from)\s+([a-zA-Z\s]+)',
+                r'(?:routes?|buses?|trains?)\s+(?:to|arriving\s+at)\s+([a-zA-Z\s]+)',
+                r'(?:how\s+many\s+)?(?:routes?|buses?|trains?)\s+(?:connect|go\s+to|from)\s+([a-zA-Z\s]+)',
+                r'(?:direct|non-stop)\s+(?:routes?|buses?|trains?)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)'
+            ],
+            'statistical_queries': [
+                r'(?:average|mean|median)\s+(?:fare|price|cost)',
+                r'(?:total|sum)\s+(?:of\s+)?(?:all\s+)?(?:fares?|prices?|costs?)',
+                r'(?:how\s+many\s+)?(?:routes?|places?|locations?)',
+                r'(?:database|system)\s+(?:statistics?|stats?|overview)',
+                r'(?:summary|overview)\s+(?:of\s+)?(?:transport|fare)\s+(?:data|database)'
+            ],
+            'recommendation_queries': [
+                r'(?:recommend|suggest)\s+(?:cheap|budget|affordable)\s+(?:routes?|options?)',
+                r'(?:best|optimal)\s+(?:route|way)\s+(?:from\s+)?([a-zA-Z\s]+)\s+(?:to|→|->)\s+([a-zA-Z\s]+)',
+                r'(?:popular|frequent)\s+(?:routes?|destinations?)',
+                r'(?:hidden|secret|unknown)\s+(?:routes?|destinations?)'
+            ]
+        }
+        # Query intent classification
+        self.intent_keywords = {
+            'fare_inquiry': ['fare', 'price', 'cost', 'how much', 'what is the cost'],
+            'route_inquiry': ['route', 'bus', 'train', 'transport', 'how to get', 'way to'],
+            'comparison': ['compare', 'difference', 'vs', 'versus', 'which is', 'better'],
+            'statistics': ['statistics', 'stats', 'overview', 'summary', 'total', 'average'],
+            'recommendation': ['recommend', 'suggest', 'best', 'optimal', 'popular'],
+            'range_search': ['between', 'under', 'over', 'above', 'below', 'range'],
+            'availability': ['available', 'exist', 'have', 'is there', 'can i']
+        }
+    def process_query(self, user_query: str) -> Dict[str, Any]:
+        """
+        Process natural language query with advanced NLP understanding
+        Args:
+            user_query: Natural language query string
+        Returns:
+            Dictionary with comprehensive query analysis and results
+        """
+        try:
+            # Step 1: Preprocess query
+            processed_query = self._preprocess_query(user_query)
+            self.logger.info(f"Processing query: original='{user_query}', preprocessed='{processed_query}'")
+            # Step 2: Extract entities and intent
+            entities = self._extract_entities(processed_query)
+            intent = self._classify_intent(processed_query, entities)
+            # Step 3: Generate Cypher query
+            cypher_query = self._generate_cypher_query(intent, entities, processed_query)
+            self.logger.debug(f"Intent: {intent}; Entities: {entities}; Cypher: {str(cypher_query).strip()[:200]}")
+            # Step 4: Execute query and format results
+            if cypher_query:
+                results = self._execute_query(cypher_query)
+                self.logger.info(f"Query results count: {len(results)}")
+                response = self._format_response(intent, entities, results, processed_query)
+            else:
+                response = self._handle_unclear_query(processed_query)
+            # Step 5: Add metadata
+            response.update({
+                'query_analysis': {
+                    'original_query': user_query,
+                    'processed_query': processed_query,
+                    'intent': intent,
+                    'entities': entities,
+                    'confidence': self._calculate_confidence(intent, entities)
+                }
+            })
+            return response
+        except Exception as e:
+            return {
+                'success': False,
+                'message': f'Error processing query: {str(e)}',
+                'suggestions': self._get_suggestions()
+            }
+    def _preprocess_query(self, query: str) -> str:
+        """Preprocess and normalize the query"""
+        # Convert to lowercase
+        query = query.lower().strip()
+        # Remove extra whitespace
+        query = re.sub(r'\s+', ' ', query)
+        # Normalize common variations
+        replacements = {
+            'rs.': 'rupees',
+            'rs': 'rupees',
+            'lkr': 'rupees',
+            '→': 'to',
+            '->': 'to',
+            'vs': 'versus',
+            '&': 'and',
+            'w/': 'with',
+            'w/o': 'without'
+        }
+        for old, new in replacements.items():
+            query = query.replace(old, new)
+        return query
+    def _extract_entities(self, query: str) -> Dict[str, Any]:
+        """Extract entities from the query"""
+        entities = {
+            'locations': [],
+            'numbers': [],
+            'currencies': [],
+            'comparators': [],
+            'time_expressions': []
+        }
+        # Extract locations with priority for different query types
+        comparison_patterns = [
+            r'(?:which\s+is\s+)?(?:cheaper|more\s+expensive)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:what\s+is\s+)?(?:the\s+)?(?:difference|compare)\s+(?:in\s+)?(?:fare|price|cost)\s+(?:between\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:compare|difference)\s+(?:between\s+)?(?:fares?|prices?|costs?)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            # Simpler patterns for comparison
+            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)\s+(?:and|vs|versus)\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
+        ]
+        fare_patterns = [
+            r'(?:fare|price|cost)\s+(?:of|from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
+        ]
+        general_patterns = [
+            r'from\s+([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'([a-zA-Z\s]+?)\s+to\s+([a-zA-Z\s]+?)(?:\s|$|\?)',
+            r'between\s+([a-zA-Z\s]+?)\s+and\s+([a-zA-Z\s]+?)(?:\s|$|\?)'
+        ]
+        # Use a set to avoid duplicates
+        seen_locations = set()
+        # Try comparison patterns first (highest priority)
+        for pattern in comparison_patterns:
+            matches = re.finditer(pattern, query, re.IGNORECASE)
+            for match in matches:
+                locations = [loc.strip() for loc in match.groups() if loc.strip()]
+                for loc in locations:
+                    # Skip if we've already processed this location
+                    if loc.lower() in seen_locations:
+                        continue
+                    seen_locations.add(loc.lower())
+                    corrected, confidence, method = self.spell_corrector.correct_location(loc)
+                    if confidence > 0.5:
+                        entities['locations'].append({
+                            'original': loc,
+                            'corrected': corrected,
+                            'confidence': confidence,
+                            'method': method
+                        })
+        # If no locations found with comparison patterns, try fare patterns
+        if not entities['locations']:
+            for pattern in fare_patterns:
+                matches = re.finditer(pattern, query, re.IGNORECASE)
+                for match in matches:
+                    locations = [loc.strip() for loc in match.groups() if loc.strip()]
+                    for loc in locations:
+                        # Skip if we've already processed this location
+                        if loc.lower() in seen_locations:
+                            continue
+                        seen_locations.add(loc.lower())
+                        corrected, confidence, method = self.spell_corrector.correct_location(loc)
+                        if confidence > 0.5:
+                            entities['locations'].append({
+                                'original': loc,
+                                'corrected': corrected,
+                                'confidence': confidence,
+                                'method': method
+                            })
+        # If no locations found with fare patterns, try general patterns
+        if not entities['locations']:
+            for pattern in general_patterns:
+                matches = re.finditer(pattern, query, re.IGNORECASE)
+                for match in matches:
+                    locations = [loc.strip() for loc in match.groups() if loc.strip()]
+                    for loc in locations:
+                        # Skip if we've already processed this location
+                        if loc.lower() in seen_locations:
+                            continue
+                        seen_locations.add(loc.lower())
+                        corrected, confidence, method = self.spell_corrector.correct_location(loc)
+                        if confidence > 0.5:
+                            entities['locations'].append({
+                                'original': loc,
+                                'corrected': corrected,
+                                'confidence': confidence,
+                                'method': method
+                            })
+        # Extract numbers and currencies
+        number_patterns = [
+            r'(under|below|less\s+than|over|above|more\s+than)\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
+            r'between\s+(\d+(?:,\d+)*(?:\.\d+)?)\s+and\s+(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?',
+            r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(rupees?|rs?|lkr)?'
+        ]
+        for pattern in number_patterns:
+            matches = re.finditer(pattern, query, re.IGNORECASE)
+            for match in matches:
+                groups = match.groups()
+                if len(groups) >= 2:
+                    if groups[0] in ['under', 'below', 'less than', 'over', 'above', 'more than']:
+                        # Pattern: (under|below|less than|over|above|more than) (number) (currency)
+                        comparator = groups[0]
+                        number = groups[1]
+                        currency = groups[2] if len(groups) >= 3 else 'rupees'
+                        entities['numbers'].append({
+                            'value': float(number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': comparator
+                        })
+                    elif 'between' in pattern:
+                        # Pattern: between (number1) and (number2) (currency)
+                        min_number = groups[0]
+                        max_number = groups[1]
+                        currency = groups[2] if len(groups) >= 3 else 'rupees'
+                        entities['numbers'].append({
+                            'value': float(min_number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': 'between_min'
+                        })
+                        entities['numbers'].append({
+                            'value': float(max_number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': 'between_max'
+                        })
+                    else:
+                        # Pattern: (number) (currency)
+                        number = groups[0]
+                        currency = groups[1] if len(groups) >= 2 else 'rupees'
+                        entities['numbers'].append({
+                            'value': float(number.replace(',', '')),
+                            'currency': currency,
+                            'comparator': None
+                        })
+        # Extract comparators
+        comparator_patterns = [
+            r'(cheaper|more\s+expensive|better|worse|faster|slower)',
+            r'(compare|difference|vs|versus)',
+            r'(under|below|less\s+than|over|above|more\s+than)'
+        ]
+        for pattern in comparator_patterns:
+            matches = re.finditer(pattern, query, re.IGNORECASE)
+            for match in matches:
+                entities['comparators'].append(match.group(1).lower())
+        return entities
+    def _classify_intent(self, query: str, entities: Dict = None) -> Dict[str, Any]:
+        """Classify the intent of the query"""
+        intent_scores = {}
+        for intent, keywords in self.intent_keywords.items():
+            score = 0
+            for keyword in keywords:
+                if keyword in query:
+                    score += 1
+            intent_scores[intent] = score
+        # Get primary intent
+        primary_intent = max(intent_scores.items(), key=lambda x: x[1])
+        # Check for specific patterns with priority
+        if any(pattern in query for pattern in ['compare', 'difference', 'vs', 'versus', 'cheaper', 'more expensive']):
+            primary_intent = ('comparison', 10)
+        elif any(pattern in query for pattern in ['recommend', 'suggest', 'best', 'optimal', 'popular']):
+            primary_intent = ('recommendation', 10)
+        elif any(pattern in query for pattern in ['between', 'under', 'over', 'above', 'below', 'range']):
+            primary_intent = ('range_search', 10)
+        elif any(pattern in query for pattern in ['fare', 'price', 'cost', 'how much']):
+            # Check if we have at least 2 locations
+            if entities and len(entities.get('locations', [])) >= 2:
+                primary_intent = ('fare_inquiry', 10)
+        elif any(pattern in query for pattern in ['route', 'bus', 'train', 'transport']):
+            primary_intent = ('route_inquiry', 10)
+        return {
+            'primary': primary_intent[0],
+            'confidence': primary_intent[1] / 10,
+            'all_scores': intent_scores
+        }
+    def _generate_cypher_query(self, intent: Dict, entities: Dict, query: str) -> Optional[str]:
+        """Generate Cypher query using LLM for better understanding"""
+        try:
+            # Try LLM-based query generation first
+            llm_query = self._generate_cypher_with_llm(query, intent, entities)
+            if llm_query:
+                return llm_query
+        except Exception as e:
+            print(f"LLM query generation failed: {e}")
+        # Fallback to rule-based generation
+        primary_intent = intent['primary']
+        if primary_intent == 'fare_inquiry':
+            return self._generate_fare_query(entities)
+        elif primary_intent == 'comparison':
+            return self._generate_comparison_query(entities)
+        elif primary_intent == 'route_inquiry':
+            return self._generate_route_query(entities, query)
+        elif primary_intent == 'statistics':
+            return self._generate_statistics_query(entities)
+        elif primary_intent == 'recommendation':
+            return self._generate_recommendation_query(entities, query)
+        elif primary_intent == 'range_search':
+            return self._generate_range_query(entities)
+        else:
+            return self._generate_fallback_query(query)
+    def _generate_fare_query(self, entities: Dict) -> Optional[str]:
+        """Generate fare inquiry Cypher query"""
+        locations = entities.get('locations', [])
+        if len(locations) >= 2:
+            from_loc = locations[0]['corrected']
+            to_loc = locations[1]['corrected']
+            return f"""
+            MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare,
+                'Direct route' as route_type
+            """
+        return None
+    def _generate_comparison_query(self, entities: Dict) -> Optional[str]:
+        """Generate comparison Cypher query"""
+        locations = entities.get('locations', [])
+        if len(locations) >= 3:
+            # Handle case where we have same origin, different destinations
+            if len(locations) == 3:
+                # Pattern: "Colombo to Kandy and Colombo to Anuradapura"
+                route1_from = locations[0]['corrected']
+                route1_to = locations[1]['corrected']
+                route2_from = locations[0]['corrected']  # Same origin
+                route2_to = locations[2]['corrected']
+            elif len(locations) >= 4:
+                # Pattern: "Colombo to Kandy and Anuradapura to Galle"
+                route1_from = locations[0]['corrected']
+                route1_to = locations[1]['corrected']
+                route2_from = locations[2]['corrected']
+                route2_to = locations[3]['corrected']
+            else:
+                return None
+            return f"""
+            MATCH (a1:Place {{name: '{route1_from}'}})-[r1:Fare]->(b1:Place {{name: '{route1_to}'}})
+            MATCH (a2:Place {{name: '{route2_from}'}})-[r2:Fare]->(b2:Place {{name: '{route2_to}'}})
+            RETURN
+                a1.name + ' to ' + b1.name as route1,
+                r1.fare as fare1,
+                a2.name + ' to ' + b2.name as route2,
+                r2.fare as fare2,
+                r1.fare - r2.fare as difference,
+                CASE
+                    WHEN r1.fare < r2.fare THEN 'Route 1 is cheaper'
+                    WHEN r1.fare > r2.fare THEN 'Route 2 is cheaper'
+                    ELSE 'Both routes have the same fare'
+                END as comparison
+            """
+        return None
+    def _generate_route_query(self, entities: Dict, query: str) -> Optional[str]:
+        """Generate route inquiry Cypher query"""
+        locations = entities.get('locations', [])
+        if 'from' in query and locations:
+            location = locations[0]['corrected']
+            return f"""
+            MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare
+            """
+        elif 'to' in query and locations:
+            location = locations[0]['corrected']
+            return f"""
+            MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare
+            """
+        return None
+    def _generate_statistics_query(self, entities: Dict) -> str:
+        """Generate statistics Cypher query"""
+        return """
+        MATCH (p:Place)
+        MATCH ()-[r:Fare]->()
+        RETURN
+            count(DISTINCT p) as total_places,
+            count(r) as total_routes,
+            round(avg(r.fare), 2) as average_fare,
+            min(r.fare) as minimum_fare,
+            max(r.fare) as maximum_fare,
+            round(stdDev(r.fare), 2) as fare_standard_deviation
+        """
+    def _generate_recommendation_query(self, entities: Dict, query: str) -> str:
+        """Generate recommendation Cypher query"""
+        if 'cheap' in query or 'budget' in query or 'affordable' in query:
+            return """
+            MATCH (a:Place)-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare ASC
+            LIMIT 10
+            """
+        elif 'popular' in query or 'frequent' in query:
+            return """
+            MATCH (a:Place)-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare DESC
+            LIMIT 10
+            """
+        else:
+            return """
+            MATCH (a:Place)-[r:Fare]->(b:Place)
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            ORDER BY r.fare ASC
+            LIMIT 5
+            """
+    def _generate_range_query(self, entities: Dict) -> Optional[str]:
+        """Generate range search Cypher query"""
+        numbers = entities.get('numbers', [])
+        if numbers:
+            # Check for between range
+            between_min = None
+            between_max = None
+            single_value = None
+            single_comparator = None
+            for number in numbers:
+                comparator = number.get('comparator', '')
+                value = number['value']
+                if comparator == 'between_min':
+                    between_min = value
+                elif comparator == 'between_max':
+                    between_max = value
+                elif comparator in ['under', 'below', 'less than', 'over', 'above', 'more than']:
+                    single_value = value
+                    single_comparator = comparator
+            # Generate query based on type
+            if between_min is not None and between_max is not None:
+                return f"""
+                MATCH (a:Place)-[r:Fare]->(b:Place)
+                WHERE r.fare >= {between_min} AND r.fare <= {between_max}
+                RETURN
+                    a.name as from_place,
+                    b.name as to_place,
+                    r.fare as fare
+                ORDER BY r.fare ASC
+                """
+            elif single_value is not None and single_comparator is not None:
+                if single_comparator in ['under', 'below', 'less than']:
+                    return f"""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    WHERE r.fare < {single_value}
+                    RETURN
+                        a.name as from_place,
+                        b.name as to_place,
+                        r.fare as fare
+                    ORDER BY r.fare ASC
+                    """
+                elif single_comparator in ['over', 'above', 'more than']:
+                    return f"""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    WHERE r.fare > {single_value}
+                    RETURN
+                        a.name as from_place,
+                        b.name as to_place,
+                        r.fare as fare
+                    ORDER BY r.fare DESC
+                    """
+        return None
+    def _generate_cypher_with_llm(self, query: str, intent: Dict, entities: Dict) -> Optional[str]:
+        """Generate Cypher query using LLM for better understanding"""
+        try:
+            if not self.config.OPENAI_API_KEY:
+                return None
+            # Get available places for context
+            available_places = list(self.neo4j_service.get_all_places())
+            # Create comprehensive prompt for Cypher generation
+            prompt = f"""
+            You are a Neo4j Cypher query generator for a transport database.
+            Database Schema:
+            - Nodes: Place (with property 'name')
+            - Relationships: Fare (with property 'fare')
+            Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
+            User Query: "{query}"
+            Detected Intent: {intent.get('primary', 'unknown')}
+            Extracted Entities: {entities}
+            Your task is to generate a valid Cypher query that answers the user's question.
+            Query Types and Examples:
+            1. FARE INQUIRY:
+               - "What is the fare from Colombo to Kandy?"
+               - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place {{name: 'Kandy'}}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
+            2. COMPARISON:
+               - "Compare fares from Colombo to Kandy vs Colombo to Galle"
+               - Cypher: MATCH (a1:Place {{name: 'Colombo'}})-[r1:Fare]->(b1:Place {{name: 'Kandy'}}) MATCH (a2:Place {{name: 'Colombo'}})-[r2:Fare]->(b2:Place {{name: 'Galle'}}) RETURN a1.name + ' to ' + b1.name as route1, r1.fare as fare1, a2.name + ' to ' + b2.name as route2, r2.fare as fare2, r1.fare - r2.fare as difference
+            3. RANGE SEARCH:
+               - "Find routes under 500 rupees"
+               - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) WHERE r.fare < 500 RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC
+            4. RECOMMENDATION:
+               - "Recommend cheap routes"
+               - Cypher: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
+            5. STATISTICS:
+               - "What is the average fare?"
+               - Cypher: MATCH ()-[r:Fare]->() RETURN round(avg(r.fare), 2) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
+            6. ROUTE INQUIRY:
+               - "Routes from Colombo"
+               - Cypher: MATCH (a:Place {{name: 'Colombo'}})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
+            Important Rules:
+            1. Always use proper Cypher syntax
+            2. Use exact place names from the available places list
+            3. For comparisons, use multiple MATCH clauses
+            4. For ranges, use WHERE clauses with appropriate operators
+            5. For statistics, use aggregation functions
+            6. Always include meaningful column aliases
+            7. Use ORDER BY for sorted results
+            8. Use LIMIT for large result sets
+            Return ONLY the Cypher query, nothing else. If you cannot generate a valid query, return "FALLBACK".
+            """
+            cypher_query = None
+            # Prefer new SDK
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
+                response = client.chat.completions.create(
+                    model=self.config.OPENAI_MODEL,
+                    messages=[
+                        {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=300,
+                    temperature=0.1
+                )
+                cypher_query = response.choices[0].message.content.strip()
+            except Exception as sdk_err:
+                import openai
+                try:
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    response = openai.ChatCompletion.create(
+                        model=self.config.OPENAI_MODEL,
+                        messages=[
+                            {"role": "system", "content": "You are a Cypher query generator. Return only valid Cypher queries."},
+                            {"role": "user", "content": prompt}
+                        ],
+                        max_tokens=300,
+                        temperature=0.1
+                    )
+                    cypher_query = response.choices[0].message.content.strip()
+                except Exception:
+                    raise sdk_err
+            # Validate the response
+            if cypher_query.upper() == "FALLBACK":
+                return None
+            # Basic validation - check if it starts with MATCH
+            if cypher_query.upper().startswith('MATCH'):
+                return cypher_query
+            return None
+        except Exception as e:
+            print(f"LLM Cypher generation error: {e}")
+            return None
+    def _generate_fallback_query(self, query: str) -> Optional[str]:
+        """Generate fallback query when intent is unclear"""
+        # Try to extract locations using spell corrector
+        locations = self.spell_corrector.extract_locations_from_query(query)
+        if len(locations) >= 2:
+            from_loc = locations[0][1]
+            to_loc = locations[1][1]
+            return f"""
+            MATCH (a:Place {{name: '{from_loc}'}})-[r:Fare]->(b:Place {{name: '{to_loc}'}})
+            RETURN
+                a.name as from_place,
+                b.name as to_place,
+                r.fare as fare
+            """
+        # Additional fallback: direct pattern matching for fare queries
+        if 'fare' in query.lower() or 'price' in query.lower() or 'cost' in query.lower():
+            import re
+            fare_patterns = [
+                r'fare\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'price\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'cost\s+(?:of|from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'(?:what\s+is\s+)?(?:the\s+)?(?:fare|price|cost)(?:\s+of)?(?:\s+from)?\s+([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)',
+                r'(?:how\s+much\s+)?(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z\s]+)\s+to\s+([a-zA-Z\s]+)'
+            ]
+            for pattern in fare_patterns:
+                match = re.search(pattern, query.lower())
+                if match:
+                    from_loc = match.group(1).strip()
+                    to_loc = match.group(2).strip()
+                    # Correct locations
+                    from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
+                    to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
+                    if from_conf > 0.5 and to_conf > 0.5:
+                        return f"""
+                        MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
+                        RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                        """
+        return None
+    def _execute_query(self, cypher_query: str) -> List[Dict]:
+        """Execute Cypher query and return results"""
+        try:
+            with self.neo4j_service.driver.session() as session:
+                result = session.run(cypher_query)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Query execution error: {e}")
+            return []
+    def _format_response(self, intent: Dict, entities: Dict, results: List[Dict], query: str) -> Dict[str, Any]:
+        """Format the response based on intent and results"""
+        primary_intent = intent['primary']
+        if not results:
+            return {
+                'success': False,
+                'message': 'No results found for your query.',
+                'suggestions': self._get_suggestions()
+            }
+        if primary_intent == 'fare_inquiry':
+            return self._format_fare_response(results, entities)
+        elif primary_intent == 'comparison':
+            return self._format_comparison_response(results, entities)
+        elif primary_intent == 'route_inquiry':
+            return self._format_route_response(results, entities)
+        elif primary_intent == 'statistics':
+            return self._format_statistics_response(results)
+        elif primary_intent == 'recommendation':
+            return self._format_recommendation_response(results, query)
+        elif primary_intent == 'range_search':
+            return self._format_range_response(results, entities)
+        else:
+            return self._format_generic_response(results)
+    def _format_fare_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format fare inquiry response"""
+        if results:
+            result = results[0]
+            return {
+                'success': True,
+                'message': f"The fare from {result['from_place']} to {result['to_place']} is Rs. {result['fare']}",
+                'data': results,
+                'query_type': 'fare_inquiry',
+                'summary': {
+                    'from_place': result['from_place'],
+                    'to_place': result['to_place'],
+                    'fare': result['fare']
+                }
+            }
+        return {'success': False, 'message': 'Fare information not found.'}
+    def _format_comparison_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format comparison response"""
+        if results:
+            result = results[0]
+            return {
+                'success': True,
+                'message': result.get('comparison', 'Comparison completed'),
+                'data': results,
+                'query_type': 'comparison',
+                'summary': {
+                    'route1': result.get('route1'),
+                    'route2': result.get('route2'),
+                    'difference': result.get('difference')
+                }
+            }
+        return {'success': False, 'message': 'Comparison not possible.'}
+    def _format_route_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format route inquiry response"""
+        return {
+            'success': True,
+            'message': f"Found {len(results)} routes",
+            'data': results,
+            'query_type': 'route_inquiry',
+            'summary': {
+                'total_routes': len(results),
+                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
+            }
+        }
+    def _format_statistics_response(self, results: List[Dict]) -> Dict[str, Any]:
+        """Format statistics response"""
+        if results:
+            stats = results[0]
+            return {
+                'success': True,
+                'message': f"Database contains {stats['total_places']} places and {stats['total_routes']} routes",
+                'data': results,
+                'query_type': 'statistics',
+                'summary': {
+                    'total_places': stats['total_places'],
+                    'total_routes': stats['total_routes'],
+                    'average_fare': stats['average_fare'],
+                    'fare_range': f"Rs. {stats['minimum_fare']} - Rs. {stats['maximum_fare']}"
+                }
+            }
+        return {'success': False, 'message': 'Statistics not available.'}
+    def _format_recommendation_response(self, results: List[Dict], query: str) -> Dict[str, Any]:
+        """Format recommendation response"""
+        return {
+            'success': True,
+            'message': f"Here are {len(results)} recommended routes",
+            'data': results,
+            'query_type': 'recommendation',
+            'summary': {
+                'recommendations_count': len(results),
+                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
+            }
+        }
+    def _format_range_response(self, results: List[Dict], entities: Dict) -> Dict[str, Any]:
+        """Format range search response"""
+        return {
+            'success': True,
+            'message': f"Found {len(results)} routes in your specified range",
+            'data': results,
+            'query_type': 'range_search',
+            'summary': {
+                'routes_found': len(results),
+                'fare_range': f"Rs. {min(r['fare'] for r in results)} - Rs. {max(r['fare'] for r in results)}" if results else "N/A"
+            }
+        }
+    def _format_generic_response(self, results: List[Dict]) -> Dict[str, Any]:
+        """Format generic response"""
+        return {
+            'success': True,
+            'message': f"Found {len(results)} results",
+            'data': results,
+            'query_type': 'generic'
+        }
+    def _handle_unclear_query(self, query: str) -> Dict[str, Any]:
+        """Handle unclear or ambiguous queries"""
+        return {
+            'success': False,
+            'message': 'I could not understand your query. Please try rephrasing it.',
+            'suggestions': self._get_suggestions(),
+            'query_type': 'unclear'
+        }
+    def _calculate_confidence(self, intent: Dict, entities: Dict) -> float:
+        """Calculate confidence score for the query interpretation"""
+        confidence = 0.0
+        # Intent confidence
+        confidence += intent.get('confidence', 0) * 0.4
+        # Entity confidence
+        locations = entities.get('locations', [])
+        if locations:
+            avg_location_confidence = sum(loc['confidence'] for loc in locations) / len(locations)
+            confidence += avg_location_confidence * 0.4
+        # Query complexity bonus
+        if len(locations) >= 2:
+            confidence += 0.2
+        return min(confidence, 1.0)
+    def _get_suggestions(self) -> List[str]:
+        """Get query suggestions"""
+        return [
+            "What is the fare from Colombo to Kandy?",
+            "Compare fares from Colombo to Kandy vs Colombo to Galle",
+            "Show me routes from Panadura",
+            "Find routes under 500 rupees",
+            "What are the cheapest routes?",
+            "Show me popular destinations",
+            "Give me database statistics",
+            "Recommend affordable routes"
+        ]

language_detector.py ADDED Viewed

	@@ -0,0 +1,251 @@

+#!/usr/bin/env python3
+"""
+Language Detection Service
+Auto-detects user input language: Sinhala, Singlish, English, Tamil
+"""
+import re
+from typing import Dict, Any, Optional, Tuple
+from logger import get_logger
+class LanguageDetector:
+    """Detects language of user input with support for Sinhala, Singlish, English, and Tamil"""
+    def __init__(self):
+        self.logger = get_logger(self.__class__.__name__)
+        # Unicode ranges for different scripts
+        self.script_ranges = {
+            'sinhala': re.compile(r'[\u0D80-\u0DFF]'),  # Sinhala script
+            'tamil': re.compile(r'[\u0B80-\u0BFF]'),    # Tamil script
+            'english': re.compile(r'[a-zA-Z]'),         # Latin script
+            'numbers': re.compile(r'[0-9]'),            # Numbers
+            'punctuation': re.compile(r'[^\w\s]')       # Punctuation
+        }
+        # Common Singlish patterns (Sinhala + English mixed)
+        self.singlish_patterns = [
+            r'[\u0D80-\u0DFF]+[a-zA-Z]+',  # Sinhala followed by English
+            r'[a-zA-Z]+[\u0D80-\u0DFF]+',  # English followed by Sinhala
+            r'[\u0D80-\u0DFF]+\s+[a-zA-Z]+',  # Sinhala word followed by English word
+            r'[a-zA-Z]+\s+[\u0D80-\u0DFF]+',  # English word followed by Sinhala word
+        ]
+        # Common Singlish words/phrases
+        self.singlish_indicators = [
+            'bus', 'fare', 'price', 'cost', 'route', 'ticket', 'station',
+            'colombo', 'kandy', 'galle', 'matara', 'anuradhapura', 'panadura',
+            'rupees', 'rs', 'lkr', 'how much', 'what is', 'show me', 'find',
+            'from', 'to', 'and', 'or', 'the', 'a', 'an', 'is', 'are', 'was', 'were'
+        ]
+        # Tamil transport terms (for detection)
+        self.tamil_transport_terms = [
+            'பேருந்து', 'கட்டணம்', 'விலை', 'செலவு', 'பாதை', 'டிக்கெட்', 'நிலையம்',
+            'கொழும்பு', 'கண்டி', 'காலி', 'மாத்தறை', 'அனுராதபுரம்', 'பனதுரை',
+            'ரூபாய்', 'எவ்வளவு', 'என்ன', 'காட்டு', 'கண்டுபிடி', 'இருந்து', 'வரை',
+            'மற்றும்', 'அல்லது', 'இது', 'அது', 'உள்ளது', 'இருக்கிறது'
+        ]
+        # Sinhala transport terms (for detection)
+        self.sinhala_transport_terms = [
+            'බස්', 'ගාස්තු', 'මිල', 'වාරික', 'මාර්ග', 'ටිකට්', 'නිලය',
+            'කොළඹ', 'මහනුවර', 'ගාල්ල', 'මාතර', 'අනුරාධපුර', 'පානදුර',
+            'රුපියල්', 'කීයද', 'මොනවාද', 'පෙන්වන්න', 'සොයන්න', 'සිට', 'ට',
+            'සහ', 'හෝ', 'මේ', 'ඒ', 'කියලා', 'ඉන්නවා'
+        ]
+    def detect_language(self, text: str) -> Dict[str, Any]:
+        """
+        Detect the language of the input text
+        Args:
+            text: Input text to analyze
+        Returns:
+            Dictionary with language detection results
+        """
+        if not text or not text.strip():
+            return {
+                'language': 'unknown',
+                'confidence': 0.0,
+                'details': {
+                    'script_analysis': {},
+                    'pattern_matches': [],
+                    'reasoning': 'Empty or whitespace-only text'
+                }
+            }
+        # Clean and normalize text
+        clean_text = text.strip()
+        # Analyze script composition
+        script_analysis = self._analyze_scripts(clean_text)
+        # Check for Singlish patterns
+        singlish_matches = self._detect_singlish(clean_text)
+        # Determine primary language
+        language, confidence, reasoning = self._determine_language(script_analysis, singlish_matches, clean_text)
+        return {
+            'language': language,
+            'confidence': confidence,
+            'details': {
+                'script_analysis': script_analysis,
+                'singlish_matches': singlish_matches,
+                'reasoning': reasoning,
+                'original_text': text,
+                'clean_text': clean_text
+            }
+        }
+    def _analyze_scripts(self, text: str) -> Dict[str, Any]:
+        """Analyze the script composition of the text"""
+        analysis = {
+            'sinhala_chars': 0,
+            'tamil_chars': 0,
+            'english_chars': 0,
+            'number_chars': 0,
+            'punctuation_chars': 0,
+            'total_chars': len(text),
+            'sinhala_ratio': 0.0,
+            'tamil_ratio': 0.0,
+            'english_ratio': 0.0,
+            'mixed_script': False
+        }
+        for char in text:
+            if self.script_ranges['sinhala'].match(char):
+                analysis['sinhala_chars'] += 1
+            elif self.script_ranges['tamil'].match(char):
+                analysis['tamil_chars'] += 1
+            elif self.script_ranges['english'].match(char):
+                analysis['english_chars'] += 1
+            elif self.script_ranges['numbers'].match(char):
+                analysis['number_chars'] += 1
+            elif self.script_ranges['punctuation'].match(char):
+                analysis['punctuation_chars'] += 1
+        # Calculate ratios
+        if analysis['total_chars'] > 0:
+            analysis['sinhala_ratio'] = analysis['sinhala_chars'] / analysis['total_chars']
+            analysis['tamil_ratio'] = analysis['tamil_chars'] / analysis['total_chars']
+            analysis['english_ratio'] = analysis['english_chars'] / analysis['total_chars']
+        # Check for mixed script
+        script_count = sum([
+            analysis['sinhala_chars'] > 0,
+            analysis['tamil_chars'] > 0,
+            analysis['english_chars'] > 0
+        ])
+        analysis['mixed_script'] = script_count > 1
+        return analysis
+    def _detect_singlish(self, text: str) -> Dict[str, Any]:
+        """Detect Singlish patterns in the text"""
+        matches = {
+            'pattern_matches': [],
+            'indicator_words': [],
+            'is_singlish': False,
+            'confidence': 0.0
+        }
+        # Check for Singlish patterns
+        for pattern in self.singlish_patterns:
+            pattern_matches = re.findall(pattern, text)
+            if pattern_matches:
+                matches['pattern_matches'].extend(pattern_matches)
+        # Check for Singlish indicator words
+        text_lower = text.lower()
+        for indicator in self.singlish_indicators:
+            if indicator in text_lower:
+                matches['indicator_words'].append(indicator)
+        # Determine if it's Singlish
+        if matches['pattern_matches'] or len(matches['indicator_words']) >= 2:
+            matches['is_singlish'] = True
+            matches['confidence'] = min(0.9, 0.3 + (len(matches['pattern_matches']) * 0.2) + (len(matches['indicator_words']) * 0.1))
+        return matches
+    def _determine_language(self, script_analysis: Dict, singlish_matches: Dict, text: str) -> Tuple[str, float, str]:
+        """Determine the primary language based on analysis"""
+        # High confidence cases
+        if singlish_matches['is_singlish'] and singlish_matches['confidence'] > 0.6:
+            return 'singlish', singlish_matches['confidence'], 'Detected Singlish patterns and indicator words'
+        # Pure script cases
+        if script_analysis['sinhala_ratio'] > 0.7 and script_analysis['tamil_ratio'] == 0:
+            return 'sinhala', script_analysis['sinhala_ratio'], 'High Sinhala script ratio'
+        if script_analysis['tamil_ratio'] > 0.7 and script_analysis['sinhala_ratio'] == 0:
+            return 'tamil', script_analysis['tamil_ratio'], 'High Tamil script ratio'
+        if script_analysis['english_ratio'] > 0.7 and script_analysis['sinhala_ratio'] == 0 and script_analysis['tamil_ratio'] == 0:
+            return 'english', script_analysis['english_ratio'], 'High English script ratio'
+        # Mixed cases with dominant script
+        if script_analysis['sinhala_ratio'] > 0.4:
+            confidence = script_analysis['sinhala_ratio']
+            if script_analysis['english_ratio'] > 0.2:
+                return 'singlish', confidence, 'Sinhala-dominant mixed text with English'
+            return 'sinhala', confidence, 'Sinhala-dominant text'
+        if script_analysis['tamil_ratio'] > 0.4:
+            confidence = script_analysis['tamil_ratio']
+            if script_analysis['english_ratio'] > 0.2:
+                return 'tamil_english', confidence, 'Tamil-dominant mixed text with English'
+            return 'tamil', confidence, 'Tamil-dominant text'
+        if script_analysis['english_ratio'] > 0.4:
+            confidence = script_analysis['english_ratio']
+            if script_analysis['sinhala_ratio'] > 0.1 or script_analysis['tamil_ratio'] > 0.1:
+                return 'singlish', confidence, 'English-dominant mixed text'
+            return 'english', confidence, 'English-dominant text'
+        # Fallback: check for specific terms
+        text_lower = text.lower()
+        sinhala_terms_found = sum(1 for term in self.sinhala_transport_terms if term in text_lower)
+        tamil_terms_found = sum(1 for term in self.tamil_transport_terms if term in text_lower)
+        if sinhala_terms_found > tamil_terms_found and sinhala_terms_found > 0:
+            return 'sinhala', 0.6, f'Found {sinhala_terms_found} Sinhala transport terms'
+        if tamil_terms_found > sinhala_terms_found and tamil_terms_found > 0:
+            return 'tamil', 0.6, f'Found {tamil_terms_found} Tamil transport terms'
+        # Check if it's pure English (no non-Latin characters)
+        if script_analysis['english_ratio'] > 0.8 and script_analysis['sinhala_ratio'] == 0 and script_analysis['tamil_ratio'] == 0:
+            return 'english', 0.8, 'Pure English text detected'
+        # Default to English if no clear indicators
+        return 'english', 0.5, 'Default to English - no clear language indicators'
+    def is_sinhala(self, text: str) -> bool:
+        """Quick check if text is Sinhala"""
+        result = self.detect_language(text)
+        return result['language'] in ['sinhala', 'singlish']
+    def is_tamil(self, text: str) -> bool:
+        """Quick check if text is Tamil"""
+        result = self.detect_language(text)
+        return result['language'] in ['tamil', 'tamil_english']
+    def is_english(self, text: str) -> bool:
+        """Quick check if text is English"""
+        result = self.detect_language(text)
+        return result['language'] == 'english'
+    def is_singlish(self, text: str) -> bool:
+        """Quick check if text is Singlish"""
+        result = self.detect_language(text)
+        return result['language'] == 'singlish'
+    def get_detection_summary(self, text: str) -> str:
+        """Get a human-readable summary of language detection"""
+        result = self.detect_language(text)
+        return f"Language: {result['language']} (confidence: {result['confidence']:.2f}) - {result['details']['reasoning']}"

llm_query_processor.py CHANGED Viewed

@@ -1,351 +1,384 @@
-#!/usr/bin/env python3
-"""
-LLM-Based Query Processor for Transport Query Application
-Uses AI to interpret queries and generate Cypher queries
-"""
-import re
-from typing import Dict, List, Tuple, Optional
-from spell_corrector import SpellCorrector
-from neo4j_service import Neo4jService
-from config import Config
-class LLMQueryProcessor:
-    """Process natural language queries using LLM for interpretation and Cypher generation"""
-    def __init__(self):
-        self.config = Config()
-        self.spell_corrector = SpellCorrector()
-        self.neo4j_service = Neo4jService()
-    def process_query(self, user_query: str) -> Dict:
-        """
-        Process a natural language query using LLM for interpretation
-        Returns:
-            Dictionary with query results and metadata
-        """
-        try:
-            # First, extract and correct locations from the query
-            locations = self.spell_corrector.extract_locations_from_query(user_query)
-            # Use LLM to interpret the query and generate Cypher
-            interpretation = self._interpret_query_with_llm(user_query, locations)
-            if interpretation['success']:
-                # Execute the generated Cypher query
-                result = self._execute_cypher_query(interpretation['cypher_query'])
-                return {
-                    'success': True,
-                    'message': interpretation['message'],
-                    'cypher_query': interpretation['cypher_query'],
-                    'data': result,
-                    'corrections': self._format_corrections(locations),
-                    'query_type': interpretation['query_type']
-                }
-            else:
-                return {
-                    'success': False,
-                    'message': interpretation['message'],
-                    'suggestions': self._get_query_suggestions()
-                }
-        except Exception as e:
-            print(f"Query processing error: {e}")
-            return {
-                'success': False,
-                'message': 'An error occurred while processing your query.',
-                'suggestions': self._get_query_suggestions()
-            }
-    def _interpret_query_with_llm(self, query: str, locations: List[Tuple]) -> Dict:
-        """Use LLM to interpret the query and generate appropriate Cypher"""
-        try:
-            if not self.config.OPENAI_API_KEY:
-                return self._fallback_interpretation(query, locations)
-            # Get available places for context
-            available_places = list(self.neo4j_service.get_all_places())
-            # Create comprehensive prompt for query interpretation
-            prompt = f"""
-            You are an intelligent transport query interpreter for a Neo4j database containing Sri Lankan transport data.
-            Database Schema:
-            - Nodes: Place (with property 'name')
-            - Relationships: Fare (with property 'fare')
-            Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
-            User Query: "{query}"
-            Extracted Locations: {[f"{orig}->{corr}" for orig, corr, conf, method in locations]}
-            Your task is to:
-            1. Determine the query type (fare, cheapest, expensive, places, routes_from, routes_to, statistics, lowest_fare)
-            2. Generate the appropriate Cypher query
-            3. Provide a clear response message
-                         Query Types:
-             - fare: Find fare between two specific locations
-             - cheapest: Find cheapest routes (top 10)
-             - expensive: Find most expensive routes (top 10)
-             - places: List all places
-             - routes_from: Find routes departing from a location
-             - routes_to: Find routes arriving at a location
-             - statistics: Get database statistics
-             - lowest_fare: Find the single lowest fare with route details
-            Return your response in this exact JSON format:
-            {{
-                "query_type": "fare|cheapest|expensive|places|routes_from|routes_to|statistics|lowest_fare",
-                "cypher_query": "MATCH ... RETURN ...",
-                "message": "Clear response message for the user"
-            }}
-                         Examples:
-             - "What is the fare from Colombo to Kandy?" → fare query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
-             - "fare of anuradhapura to kandy?" → fare query: MATCH (a:Place {name: 'Anuradapura'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
-             - "Show me the cheapest routes" → cheapest query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
-             - "What is the lowest fare?" → lowest_fare query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 1
-             - "List all places" → places query: MATCH (p:Place) RETURN DISTINCT p.name as place ORDER BY p.name
-             - "Routes from Colombo" → routes_from query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
-             - "Database statistics" → statistics query: MATCH (p:Place) MATCH ()-[r:Fare]->() RETURN count(DISTINCT p) as total_places, count(r) as total_routes, avg(r.fare) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
-             Keep Cypher queries simple and avoid complex functions like shortestPath. Use direct relationships only.
-             For fare queries, recognize various formats like "fare of X to Y", "fare from X to Y", "price from X to Y", etc.
-            """
-            # Call LLM using new SDK first, legacy as fallback
-            import json
-            interpretation = None
-            try:
-                from openai import OpenAI
-                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
-                response = client.chat.completions.create(
-                    model=self.config.OPENAI_MODEL,
-                    messages=[
-                        {"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=500,
-                    temperature=0.1
-                )
-                interpretation = json.loads(response.choices[0].message.content.strip())
-            except Exception as sdk_err:
-                try:
-                    import openai
-                    openai.api_key = self.config.OPENAI_API_KEY
-                    response = openai.ChatCompletion.create(
-                        model=self.config.OPENAI_MODEL,
-                        messages=[
-                            {"role": "system", "content": "You are a transport query interpreter. Return only valid JSON."},
-                            {"role": "user", "content": prompt}
-                        ],
-                        max_tokens=500,
-                        temperature=0.1
-                    )
-                    interpretation = json.loads(response.choices[0].message.content.strip())
-                except Exception:
-                    raise sdk_err
-            # Validate the response
-            if interpretation and 'query_type' in interpretation and 'cypher_query' in interpretation and 'message' in interpretation:
-                return {
-                    'success': True,
-                    'query_type': interpretation['query_type'],
-                    'cypher_query': interpretation['cypher_query'],
-                    'message': interpretation['message']
-                }
-            else:
-                return self._fallback_interpretation(query, locations)
-        except Exception as e:
-            print(f"LLM interpretation error: {e}")
-            return self._fallback_interpretation(query, locations)
-    def _fallback_interpretation(self, query: str, locations: List[Tuple]) -> Dict:
-        """Fallback interpretation when LLM is not available"""
-        query_lower = query.lower()
-        # Simple keyword-based interpretation
-        if 'lowest' in query_lower or 'minimum' in query_lower or 'cheapest' in query_lower:
-            if 'lowest fare' in query_lower or 'minimum fare' in query_lower:
-                                 return {
-                     'success': True,
-                     'query_type': 'lowest_fare',
-                     'cypher_query': """
-                     MATCH (a:Place)-[r:Fare]->(b:Place)
-                     WITH a, b, r, r.fare as fare
-                     ORDER BY r.fare ASC
-                     LIMIT 1
-                     RETURN a.name as from_place, b.name as to_place, fare
-                     """,
-                     'message': 'Finding the lowest fare in the database...'
-                 }
-            else:
-                return {
-                    'success': True,
-                    'query_type': 'cheapest',
-                    'cypher_query': """
-                    MATCH (a:Place)-[r:Fare]->(b:Place)
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare ASC
-                    LIMIT 10
-                    """,
-                    'message': 'Finding the cheapest routes...'
-                }
-        elif 'expensive' in query_lower or 'highest' in query_lower or 'maximum' in query_lower:
-            return {
-                'success': True,
-                'query_type': 'expensive',
-                'cypher_query': """
-                MATCH (a:Place)-[r:Fare]->(b:Place)
-                RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                ORDER BY r.fare DESC
-                LIMIT 10
-                """,
-                'message': 'Finding the most expensive routes...'
-            }
-        elif 'places' in query_lower or 'locations' in query_lower or 'list all' in query_lower:
-            return {
-                'success': True,
-                'query_type': 'places',
-                'cypher_query': """
-                MATCH (p:Place)
-                RETURN DISTINCT p.name as place
-                ORDER BY p.name
-                """,
-                'message': 'Listing all places...'
-            }
-        elif 'statistics' in query_lower or 'stats' in query_lower:
-            return {
-                'success': True,
-                'query_type': 'statistics',
-                'cypher_query': """
-                MATCH (p:Place)
-                MATCH ()-[r:Fare]->()
-                RETURN
-                    count(DISTINCT p) as total_places,
-                    count(r) as total_routes,
-                    avg(r.fare) as average_fare,
-                    min(r.fare) as min_fare,
-                    max(r.fare) as max_fare
-                """,
-                'message': 'Getting database statistics...'
-            }
-        elif len(locations) >= 2:
-            # Fare query between two locations
-            from_location = locations[0][1]
-            to_location = locations[1][1]
-            return {
-                'success': True,
-                'query_type': 'fare',
-                'cypher_query': f"""
-                MATCH (a:Place {{name: '{from_location}'}})-[r:Fare]->(b:Place {{name: '{to_location}'}})
-                RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                """,
-                'message': f'Finding fare from {from_location} to {to_location}...'
-            }
-        elif 'fare' in query_lower and 'to' in query_lower:
-             # Handle queries like "fare of X to Y" where locations might not be extracted properly
-             # Try to extract locations using a simpler pattern
-             import re
-             fare_patterns = [
-                 r'fare\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
-                 r'price\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
-                 r'cost\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
-                 r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
-                 r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
-                 r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)',
-                 r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)'
-             ]
-             for pattern in fare_patterns:
-                 match = re.search(pattern, query_lower)
-                 if match:
-                     from_loc = match.group(1).strip()
-                     to_loc = match.group(2).strip()
-                     # Correct the locations
-                     from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
-                     to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
-                     if from_conf > 0.5 and to_conf > 0.5:
-                         return {
-                             'success': True,
-                             'query_type': 'fare',
-                             'cypher_query': f"""
-                             MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
-                             RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                             """,
-                             'message': f'Finding fare from {from_corrected} to {to_corrected}...'
-                         }
-        elif len(locations) == 1:
-            # Routes from/to a single location
-            location = locations[0][1]
-            if 'from' in query_lower:
-                return {
-                    'success': True,
-                    'query_type': 'routes_from',
-                    'cypher_query': f"""
-                    MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare
-                    """,
-                    'message': f'Finding routes from {location}...'
-                }
-            else:
-                return {
-                    'success': True,
-                    'query_type': 'routes_to',
-                    'cypher_query': f"""
-                    MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare
-                    """,
-                    'message': f'Finding routes to {location}...'
-                }
-        else:
-            return {
-                'success': False,
-                'message': 'I could not understand your query. Please try rephrasing it.'
-            }
-    def _execute_cypher_query(self, cypher_query: str) -> List[Dict]:
-        """Execute the generated Cypher query"""
-        try:
-            with self.neo4j_service.driver.session() as session:
-                result = session.run(cypher_query)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Cypher execution error: {e}")
-            return []
-    def _format_corrections(self, locations: List[Tuple]) -> List[Dict]:
-        """Format location corrections for display"""
-        corrections = []
-        for original, corrected, confidence, method in locations:
-            if original.lower() != corrected.lower():
-                corrections.append({
-                    'original': original,
-                    'corrected': corrected,
-                    'confidence': confidence,
-                    'method': method
-                })
-        return corrections
-    def _get_query_suggestions(self) -> List[str]:
-        """Get query suggestions"""
-        return [
-            "What is the fare from Colombo to Kandy?",
-            "What is the lowest fare price?",
-            "Show me the cheapest routes",
-            "Show me the most expensive routes",
-            "List all places",
-            "Routes from Panadura",
-            "Routes to Galle",
-            "Database statistics"
-        ]

+#!/usr/bin/env python3
+"""
+LLM-Based Query Processor for Transport Query Application
+Uses Google Gemini AI to interpret queries and generate Cypher queries
+"""
+import re
+import json
+from typing import Dict, List, Tuple, Optional
+from spell_corrector import SpellCorrector
+from neo4j_service import Neo4jService
+from config import Config
+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class LLMQueryProcessor:
+    """Process natural language queries using LLM for interpretation and Cypher generation"""
+    def __init__(self):
+        self.config = Config()
+        self.spell_corrector = SpellCorrector()
+        self.neo4j_service = Neo4jService()
+        # Configure Google Generative AI
+        if hasattr(self.config, 'GOOGLE_API_KEY') and self.config.GOOGLE_API_KEY:
+            genai.configure(api_key=self.config.GOOGLE_API_KEY)
+            self.google_api_available = True
+        else:
+            # Fallback to hardcoded API key if not in config
+            google_api_key = os.getenv("GOOGLE_API_KEY")
+            genai.configure(api_key=google_api_key)
+            self.google_api_available = True
+    def process_query(self, user_query: str) -> Dict:
+        """
+        Process a natural language query using LLM for interpretation
+        Returns:
+            Dictionary with query results and metadata
+        """
+        try:
+            # First, extract and correct locations from the query
+            locations = self.spell_corrector.extract_locations_from_query(user_query)
+            # Use LLM to interpret the query and generate Cypher
+            interpretation = self._interpret_query_with_llm(user_query, locations)
+            if interpretation['success']:
+                # Execute the generated Cypher query
+                result = self._execute_cypher_query(interpretation['cypher_query'])
+                return {
+                    'success': True,
+                    'message': interpretation['message'],
+                    'cypher_query': interpretation['cypher_query'],
+                    'data': result,
+                    'corrections': self._format_corrections(locations),
+                    'query_type': interpretation['query_type']
+                }
+            else:
+                return {
+                    'success': False,
+                    'message': interpretation['message'],
+                    'suggestions': self._get_query_suggestions()
+                }
+        except Exception as e:
+            print(f"Query processing error: {e}")
+            return {
+                'success': False,
+                'message': 'An error occurred while processing your query.',
+                'suggestions': self._get_query_suggestions()
+            }
+    def _interpret_query_with_llm(self, query: str, locations: List[Tuple]) -> Dict:
+        """Use Google Gemini AI to interpret the query and generate appropriate Cypher"""
+        try:
+            if not self.google_api_available:
+                return self._fallback_interpretation(query, locations)
+            # Get available places for context
+            available_places = list(self.neo4j_service.get_all_places())
+            # Create comprehensive prompt for query interpretation
+            prompt = f"""
+            You are an intelligent transport query interpreter for a Neo4j database containing Sri Lankan transport data.
+            Database Schema:
+            - Nodes: Place (with property 'name')
+            - Relationships: Fare (with property 'fare')
+            Available Places: {', '.join(available_places[:50])}... (total: {len(available_places)})
+            User Query: "{query}"
+            Extracted Locations: {[f"{orig}->{corr}" for orig, corr, conf, method in locations]}
+            Your task is to:
+            1. Determine the query type (fare, cheapest, expensive, places, routes_from, routes_to, statistics, lowest_fare)
+            2. Generate the appropriate Cypher query
+            3. Provide a clear response message
+                         Query Types:
+             - fare: Find fare between two specific locations
+             - cheapest: Find cheapest routes (top 10)
+             - expensive: Find most expensive routes (top 10)
+             - places: List all places
+             - routes_from: Find routes departing from a location
+             - routes_to: Find routes arriving at a location
+             - statistics: Get database statistics
+             - lowest_fare: Find the single lowest fare with route details
+            Return your response in this exact JSON format:
+            {{
+                "query_type": "fare|cheapest|expensive|places|routes_from|routes_to|statistics|lowest_fare",
+                "cypher_query": "MATCH ... RETURN ...",
+                "message": "Clear response message for the user"
+            }}
+                         Examples:
+             - "What is the fare from Colombo to Kandy?" → fare query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
+             - "fare of anuradhapura to kandy?" → fare query: MATCH (a:Place {name: 'Anuradapura'})-[r:Fare]->(b:Place {name: 'Kandy'}) RETURN a.name as from_place, b.name as to_place, r.fare as fare
+             - "Show me the cheapest routes" → cheapest query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 10
+             - "What is the lowest fare?" → lowest_fare query: MATCH (a:Place)-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare ASC LIMIT 1
+             - "List all places" → places query: MATCH (p:Place) RETURN DISTINCT p.name as place ORDER BY p.name
+             - "Routes from Colombo" → routes_from query: MATCH (a:Place {name: 'Colombo'})-[r:Fare]->(b:Place) RETURN a.name as from_place, b.name as to_place, r.fare as fare ORDER BY r.fare
+             - "Database statistics" → statistics query: MATCH (p:Place) MATCH ()-[r:Fare]->() RETURN count(DISTINCT p) as total_places, count(r) as total_routes, avg(r.fare) as average_fare, min(r.fare) as min_fare, max(r.fare) as max_fare
+             Keep Cypher queries simple and avoid complex functions like shortestPath. Use direct relationships only.
+             For fare queries, recognize various formats like "fare of X to Y", "fare from X to Y", "price from X to Y", etc.
+            """
+            # Call Google Gemini AI
+            interpretation = None
+            try:
+                # Initialize the Gemini model
+                model = genai.GenerativeModel('gemini-1.5-flash')
+                # Create the full prompt with system instructions
+                full_prompt = f"""You are a transport query interpreter for a Neo4j database. Return only valid JSON.
+{prompt}"""
+                # Generate content using Gemini
+                response = model.generate_content(
+                    full_prompt,
+                    generation_config=genai.types.GenerationConfig(
+                        max_output_tokens=500,
+                        temperature=0.1,
+                        response_mime_type="application/json"
+                    )
+                )
+                # Parse the JSON response
+                interpretation = json.loads(response.text.strip())
+            except json.JSONDecodeError as json_err:
+                print(f"JSON parsing error: {json_err}")
+                # Try to extract JSON from response if it's wrapped in text
+                try:
+                    response_text = response.text.strip()
+                    # Look for JSON-like content in the response
+                    json_start = response_text.find('{')
+                    json_end = response_text.rfind('}') + 1
+                    if json_start != -1 and json_end > json_start:
+                        json_content = response_text[json_start:json_end]
+                        interpretation = json.loads(json_content)
+                    else:
+                        raise json_err
+                except Exception:
+                    print(f"Could not parse response: {response.text}")
+                    raise json_err
+            # Validate the response
+            if interpretation and 'query_type' in interpretation and 'cypher_query' in interpretation and 'message' in interpretation:
+                return {
+                    'success': True,
+                    'query_type': interpretation['query_type'],
+                    'cypher_query': interpretation['cypher_query'],
+                    'message': interpretation['message']
+                }
+            else:
+                return self._fallback_interpretation(query, locations)
+        except Exception as e:
+            error_message = str(e)
+            print(f"Google Gemini AI interpretation error: {e}")
+            # Handle specific Google API errors
+            if "quota" in error_message.lower() or "limit" in error_message.lower():
+                print("⚠️ Google API quota exceeded, falling back to rule-based interpretation")
+            elif "API_KEY_INVALID" in error_message or "authentication" in error_message.lower():
+                print("⚠️ Google API authentication failed, falling back to rule-based interpretation")
+            elif "models/gemini" in error_message.lower():
+                print("⚠️ Gemini model not available, falling back to rule-based interpretation")
+            return self._fallback_interpretation(query, locations)
+    def _fallback_interpretation(self, query: str, locations: List[Tuple]) -> Dict:
+        """Fallback interpretation when LLM is not available"""
+        query_lower = query.lower()
+        # Simple keyword-based interpretation
+        if 'lowest' in query_lower or 'minimum' in query_lower or 'cheapest' in query_lower:
+            if 'lowest fare' in query_lower or 'minimum fare' in query_lower:
+                                 return {
+                     'success': True,
+                     'query_type': 'lowest_fare',
+                     'cypher_query': """
+                     MATCH (a:Place)-[r:Fare]->(b:Place)
+                     WITH a, b, r, r.fare as fare
+                     ORDER BY r.fare ASC
+                     LIMIT 1
+                     RETURN a.name as from_place, b.name as to_place, fare
+                     """,
+                     'message': 'Finding the lowest fare in the database...'
+                 }
+            else:
+                return {
+                    'success': True,
+                    'query_type': 'cheapest',
+                    'cypher_query': """
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare ASC
+                    LIMIT 10
+                    """,
+                    'message': 'Finding the cheapest routes...'
+                }
+        elif 'expensive' in query_lower or 'highest' in query_lower or 'maximum' in query_lower:
+            return {
+                'success': True,
+                'query_type': 'expensive',
+                'cypher_query': """
+                MATCH (a:Place)-[r:Fare]->(b:Place)
+                RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                ORDER BY r.fare DESC
+                LIMIT 10
+                """,
+                'message': 'Finding the most expensive routes...'
+            }
+        elif 'places' in query_lower or 'locations' in query_lower or 'list all' in query_lower:
+            return {
+                'success': True,
+                'query_type': 'places',
+                'cypher_query': """
+                MATCH (p:Place)
+                RETURN DISTINCT p.name as place
+                ORDER BY p.name
+                """,
+                'message': 'Listing all places...'
+            }
+        elif 'statistics' in query_lower or 'stats' in query_lower:
+            return {
+                'success': True,
+                'query_type': 'statistics',
+                'cypher_query': """
+                MATCH (p:Place)
+                MATCH ()-[r:Fare]->()
+                RETURN
+                    count(DISTINCT p) as total_places,
+                    count(r) as total_routes,
+                    avg(r.fare) as average_fare,
+                    min(r.fare) as min_fare,
+                    max(r.fare) as max_fare
+                """,
+                'message': 'Getting database statistics...'
+            }
+        elif len(locations) >= 2:
+            # Fare query between two locations
+            from_location = locations[0][1]
+            to_location = locations[1][1]
+            return {
+                'success': True,
+                'query_type': 'fare',
+                'cypher_query': f"""
+                MATCH (a:Place {{name: '{from_location}'}})-[r:Fare]->(b:Place {{name: '{to_location}'}})
+                RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                """,
+                'message': f'Finding fare from {from_location} to {to_location}...'
+            }
+        elif 'fare' in query_lower and 'to' in query_lower:
+             # Handle queries like "fare of X to Y" where locations might not be extracted properly
+             # Try to extract locations using a simpler pattern
+             import re
+             fare_patterns = [
+                 r'fare\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'price\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'cost\s+(?:of|from)?\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)',
+                 r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)',
+                 r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)'
+             ]
+             for pattern in fare_patterns:
+                 match = re.search(pattern, query_lower)
+                 if match:
+                     from_loc = match.group(1).strip()
+                     to_loc = match.group(2).strip()
+                     # Correct the locations
+                     from_corrected, from_conf, _ = self.spell_corrector.correct_location(from_loc)
+                     to_corrected, to_conf, _ = self.spell_corrector.correct_location(to_loc)
+                     if from_conf > 0.5 and to_conf > 0.5:
+                         return {
+                             'success': True,
+                             'query_type': 'fare',
+                             'cypher_query': f"""
+                             MATCH (a:Place {{name: '{from_corrected}'}})-[r:Fare]->(b:Place {{name: '{to_corrected}'}})
+                             RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                             """,
+                             'message': f'Finding fare from {from_corrected} to {to_corrected}...'
+                         }
+        elif len(locations) == 1:
+            # Routes from/to a single location
+            location = locations[0][1]
+            if 'from' in query_lower:
+                return {
+                    'success': True,
+                    'query_type': 'routes_from',
+                    'cypher_query': f"""
+                    MATCH (a:Place {{name: '{location}'}})-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                    """,
+                    'message': f'Finding routes from {location}...'
+                }
+            else:
+                return {
+                    'success': True,
+                    'query_type': 'routes_to',
+                    'cypher_query': f"""
+                    MATCH (a:Place)-[r:Fare]->(b:Place {{name: '{location}'}})
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                    """,
+                    'message': f'Finding routes to {location}...'
+                }
+        else:
+            return {
+                'success': False,
+                'message': 'I could not understand your query. Please try rephrasing it.'
+            }
+    def _execute_cypher_query(self, cypher_query: str) -> List[Dict]:
+        """Execute the generated Cypher query"""
+        try:
+            with self.neo4j_service.driver.session() as session:
+                result = session.run(cypher_query)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Cypher execution error: {e}")
+            return []
+    def _format_corrections(self, locations: List[Tuple]) -> List[Dict]:
+        """Format location corrections for display"""
+        corrections = []
+        for original, corrected, confidence, method in locations:
+            if original.lower() != corrected.lower():
+                corrections.append({
+                    'original': original,
+                    'corrected': corrected,
+                    'confidence': confidence,
+                    'method': method
+                })
+        return corrections
+    def _get_query_suggestions(self) -> List[str]:
+        """Get query suggestions"""
+        return [
+            "What is the fare from Colombo to Kandy?",
+            "What is the lowest fare price?",
+            "Show me the cheapest routes",
+            "Show me the most expensive routes",
+            "List all places",
+            "Routes from Panadura",
+            "Routes to Galle",
+            "Database statistics"
+        ]

logger.py CHANGED Viewed

@@ -1,61 +1,61 @@
-#!/usr/bin/env python3
-"""
-Centralized logging setup for the Transport Query Application.
-Provides a rotating file handler and console output.
-"""
-import logging
-import os
-from logging.handlers import RotatingFileHandler
-def get_logger(name: str) -> logging.Logger:
-    """Create or retrieve a configured logger with file and console handlers."""
-    logger = logging.getLogger(name)
-    if getattr(logger, "_configured", False):
-        return logger
-    log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
-    log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
-    # Try to create log directory, fallback to current directory if it fails
-    try:
-        log_dir = os.path.abspath(log_dir)
-        os.makedirs(log_dir, exist_ok=True)
-    except Exception:
-        # Fallback to current directory if path invalid
-        log_dir = os.getcwd()
-    log_path = os.path.join(log_dir, "app.log")
-    formatter = logging.Formatter(
-        fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S",
-    )
-    # Console handler (always available)
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(formatter)
-    logger.addHandler(console_handler)
-    # Try to add file handler, but don't fail if it doesn't work
-    try:
-        file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-        logger.debug(f"File logging enabled: {log_path}")
-    except (PermissionError, OSError) as e:
-        # If file logging fails, just log to console
-        logger.warning(f"File logging disabled due to permission error: {e}")
-        logger.warning("Logging to console only")
-    # Configure logger
-    logger.setLevel(getattr(logging, log_level_str, logging.INFO))
-    logger.propagate = False
-    logger._configured = True  # type: ignore[attr-defined]
-    logger.debug(f"Logger initialized. Level={log_level_str}")
-    return logger

+#!/usr/bin/env python3
+"""
+Centralized logging setup for the Transport Query Application.
+Provides a rotating file handler and console output.
+"""
+import logging
+import os
+from logging.handlers import RotatingFileHandler
+def get_logger(name: str) -> logging.Logger:
+    """Create or retrieve a configured logger with file and console handlers."""
+    logger = logging.getLogger(name)
+    if getattr(logger, "_configured", False):
+        return logger
+    log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
+    log_dir = os.getenv("LOG_DIR", os.path.join(os.path.dirname(__file__), "..", "logs"))
+    # Try to create log directory, fallback to current directory if it fails
+    try:
+        log_dir = os.path.abspath(log_dir)
+        os.makedirs(log_dir, exist_ok=True)
+    except Exception:
+        # Fallback to current directory if path invalid
+        log_dir = os.getcwd()
+    log_path = os.path.join(log_dir, "app.log")
+    formatter = logging.Formatter(
+        fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    # Console handler (always available)
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    # Try to add file handler, but don't fail if it doesn't work
+    try:
+        file_handler = RotatingFileHandler(log_path, maxBytes=1_000_000, backupCount=5, encoding="utf-8")
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        logger.debug(f"File logging enabled: {log_path}")
+    except (PermissionError, OSError) as e:
+        # If file logging fails, just log to console
+        logger.warning(f"File logging disabled due to permission error: {e}")
+        logger.warning("Logging to console only")
+    # Configure logger
+    logger.setLevel(getattr(logging, log_level_str, logging.INFO))
+    logger.propagate = False
+    logger._configured = True  # type: ignore[attr-defined]
+    logger.debug(f"Logger initialized. Level={log_level_str}")
+    return logger

neo4j_service.py CHANGED Viewed

@@ -1,222 +1,222 @@
-#!/usr/bin/env python3
-"""
-Neo4j Service for Transport Query Application
-Handles all database operations
-"""
-from neo4j import GraphDatabase
-from typing import List, Dict, Optional, Tuple
-from config import Config
-class Neo4jService:
-    """Neo4j database service"""
-    def __init__(self):
-        self.config = Config()
-        self.driver = None
-        self._connect()
-    def _connect(self):
-        """Connect to Neo4j database"""
-        try:
-            self.driver = GraphDatabase.driver(
-                self.config.NEO4J_URI,
-                auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
-            )
-            # Test connection
-            with self.driver.session() as session:
-                session.run("RETURN 1")
-            print("✅ Connected to Neo4j database")
-        except Exception as e:
-            print(f"❌ Failed to connect to Neo4j: {e}")
-            self.driver = None
-    def is_connected(self) -> bool:
-        """Check if connected to Neo4j"""
-        return self.driver is not None
-    def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
-        """Get fare between two locations"""
-        if not self.is_connected():
-            return None
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                """, from_location=from_location, to_location=to_location)
-                record = result.single()
-                if record:
-                    return {
-                        'from_place': record['from_place'],
-                        'to_place': record['to_place'],
-                        'fare': record['fare']
-                    }
-                return None
-        except Exception as e:
-            print(f"Error getting fare: {e}")
-            return None
-    def get_all_places(self) -> List[str]:
-        """Get all available places"""
-        if not self.is_connected():
-            return []
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (p:Place)
-                    RETURN DISTINCT p.name as place
-                    ORDER BY p.name
-                """)
-                return [record['place'] for record in result]
-        except Exception as e:
-            print(f"Error getting places: {e}")
-            return []
-    def get_routes_from_location(self, from_location: str) -> List[Dict]:
-        """Get all routes from a specific location"""
-        if not self.is_connected():
-            return []
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare
-                """, from_location=from_location)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Error getting routes from location: {e}")
-            return []
-    def get_routes_to_location(self, to_location: str) -> List[Dict]:
-        """Get all routes to a specific location"""
-        if not self.is_connected():
-            return []
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare
-                """, to_location=to_location)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Error getting routes to location: {e}")
-            return []
-    def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
-        """Get cheapest routes"""
-        if not self.is_connected():
-            return []
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (a:Place)-[r:Fare]->(b:Place)
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare ASC
-                    LIMIT $limit
-                """, limit=limit)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Error getting cheapest routes: {e}")
-            return []
-    def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
-        """Get most expensive routes"""
-        if not self.is_connected():
-            return []
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (a:Place)-[r:Fare]->(b:Place)
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare DESC
-                    LIMIT $limit
-                """, limit=limit)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Error getting most expensive routes: {e}")
-            return []
-    def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
-        """Search routes within a fare range"""
-        if not self.is_connected():
-            return []
-        try:
-            with self.driver.session() as session:
-                result = session.run("""
-                    MATCH (a:Place)-[r:Fare]->(b:Place)
-                    WHERE r.fare >= $min_fare AND r.fare <= $max_fare
-                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
-                    ORDER BY r.fare
-                """, min_fare=min_fare, max_fare=max_fare)
-                return [dict(record) for record in result]
-        except Exception as e:
-            print(f"Error searching routes by fare range: {e}")
-            return []
-    def get_route_statistics(self) -> Dict:
-        """Get database statistics"""
-        if not self.is_connected():
-            return {}
-        try:
-            with self.driver.session() as session:
-                # Count places
-                places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
-                place_count = places_result.single()['place_count']
-                # Count routes
-                routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
-                route_count = routes_result.single()['route_count']
-                # Average fare
-                avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
-                avg_fare = avg_result.single()['avg_fare']
-                # Min and max fares
-                fare_range_result = session.run("""
-                    MATCH ()-[r:Fare]->()
-                    RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
-                """)
-                fare_range = fare_range_result.single()
-                return {
-                    'total_places': place_count,
-                    'total_routes': route_count,
-                    'average_fare': round(avg_fare, 2) if avg_fare else 0,
-                    'min_fare': fare_range['min_fare'],
-                    'max_fare': fare_range['max_fare']
-                }
-        except Exception as e:
-            print(f"Error getting statistics: {e}")
-            return {}
-    def close(self):
-        """Close database connection"""
-        if self.driver:
-            self.driver.close()
-            self.driver = None

+#!/usr/bin/env python3
+"""
+Neo4j Service for Transport Query Application
+Handles all database operations
+"""
+from neo4j import GraphDatabase
+from typing import List, Dict, Optional, Tuple
+from config import Config
+class Neo4jService:
+    """Neo4j database service"""
+    def __init__(self):
+        self.config = Config()
+        self.driver = None
+        self._connect()
+    def _connect(self):
+        """Connect to Neo4j database"""
+        try:
+            self.driver = GraphDatabase.driver(
+                self.config.NEO4J_URI,
+                auth=(self.config.NEO4J_USER, self.config.NEO4J_PASSWORD)
+            )
+            # Test connection
+            with self.driver.session() as session:
+                session.run("RETURN 1")
+            print("✅ Connected to Neo4j database")
+        except Exception as e:
+            print(f"❌ Failed to connect to Neo4j: {e}")
+            self.driver = None
+    def is_connected(self) -> bool:
+        """Check if connected to Neo4j"""
+        return self.driver is not None
+    def get_fare(self, from_location: str, to_location: str) -> Optional[Dict]:
+        """Get fare between two locations"""
+        if not self.is_connected():
+            return None
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place {name: $to_location})
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                """, from_location=from_location, to_location=to_location)
+                record = result.single()
+                if record:
+                    return {
+                        'from_place': record['from_place'],
+                        'to_place': record['to_place'],
+                        'fare': record['fare']
+                    }
+                return None
+        except Exception as e:
+            print(f"Error getting fare: {e}")
+            return None
+    def get_all_places(self) -> List[str]:
+        """Get all available places"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (p:Place)
+                    RETURN DISTINCT p.name as place
+                    ORDER BY p.name
+                """)
+                return [record['place'] for record in result]
+        except Exception as e:
+            print(f"Error getting places: {e}")
+            return []
+    def get_routes_from_location(self, from_location: str) -> List[Dict]:
+        """Get all routes from a specific location"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place {name: $from_location})-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                """, from_location=from_location)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting routes from location: {e}")
+            return []
+    def get_routes_to_location(self, to_location: str) -> List[Dict]:
+        """Get all routes to a specific location"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place {name: $to_location})
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                """, to_location=to_location)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting routes to location: {e}")
+            return []
+    def get_cheapest_routes(self, limit: int = 10) -> List[Dict]:
+        """Get cheapest routes"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare ASC
+                    LIMIT $limit
+                """, limit=limit)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting cheapest routes: {e}")
+            return []
+    def get_most_expensive_routes(self, limit: int = 10) -> List[Dict]:
+        """Get most expensive routes"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare DESC
+                    LIMIT $limit
+                """, limit=limit)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error getting most expensive routes: {e}")
+            return []
+    def search_routes_by_fare_range(self, min_fare: float, max_fare: float) -> List[Dict]:
+        """Search routes within a fare range"""
+        if not self.is_connected():
+            return []
+        try:
+            with self.driver.session() as session:
+                result = session.run("""
+                    MATCH (a:Place)-[r:Fare]->(b:Place)
+                    WHERE r.fare >= $min_fare AND r.fare <= $max_fare
+                    RETURN a.name as from_place, b.name as to_place, r.fare as fare
+                    ORDER BY r.fare
+                """, min_fare=min_fare, max_fare=max_fare)
+                return [dict(record) for record in result]
+        except Exception as e:
+            print(f"Error searching routes by fare range: {e}")
+            return []
+    def get_route_statistics(self) -> Dict:
+        """Get database statistics"""
+        if not self.is_connected():
+            return {}
+        try:
+            with self.driver.session() as session:
+                # Count places
+                places_result = session.run("MATCH (p:Place) RETURN count(p) as place_count")
+                place_count = places_result.single()['place_count']
+                # Count routes
+                routes_result = session.run("MATCH ()-[r:Fare]->() RETURN count(r) as route_count")
+                route_count = routes_result.single()['route_count']
+                # Average fare
+                avg_result = session.run("MATCH ()-[r:Fare]->() RETURN avg(r.fare) as avg_fare")
+                avg_fare = avg_result.single()['avg_fare']
+                # Min and max fares
+                fare_range_result = session.run("""
+                    MATCH ()-[r:Fare]->()
+                    RETURN min(r.fare) as min_fare, max(r.fare) as max_fare
+                """)
+                fare_range = fare_range_result.single()
+                return {
+                    'total_places': place_count,
+                    'total_routes': route_count,
+                    'average_fare': round(avg_fare, 2) if avg_fare else 0,
+                    'min_fare': fare_range['min_fare'],
+                    'max_fare': fare_range['max_fare']
+                }
+        except Exception as e:
+            print(f"Error getting statistics: {e}")
+            return {}
+    def close(self):
+        """Close database connection"""
+        if self.driver:
+            self.driver.close()
+            self.driver = None

spell_corrector.py CHANGED Viewed

@@ -1,257 +1,257 @@
-#!/usr/bin/env python3
-"""
-Spell Correction Module for Transport Query Application
-Handles location name corrections using fuzzy matching and LLM
-"""
-import re
-from fuzzywuzzy import fuzz
-from typing import List, Tuple, Optional
-import openai
-from config import Config
-class SpellCorrector:
-    """Spell correction for location names"""
-    def __init__(self):
-        self.config = Config()
-        self.location_mapping = self.config.LOCATION_MAPPING
-        self.available_locations = set(self.location_mapping.values())
-        # Initialize OpenAI if API key is available
-        if self.config.OPENAI_API_KEY:
-            try:
-                # Prefer new SDK client if installed; otherwise set legacy api key
-                try:
-                    from openai import OpenAI  # noqa: F401
-                    self.llm_available = True
-                except Exception:
-                    openai.api_key = self.config.OPENAI_API_KEY
-                    self.llm_available = True
-            except Exception:
-                self.llm_available = False
-        else:
-            self.llm_available = False
-    def correct_location(self, location: str) -> Tuple[str, float, str]:
-        """
-        Correct a location name using multiple methods
-        Returns:
-            Tuple of (corrected_name, confidence_score, correction_method)
-        """
-        location = location.strip().lower()
-        # Method 1: Direct mapping
-        if location in self.location_mapping:
-            corrected = self.location_mapping[location]
-            return corrected, 1.0, "direct_mapping"
-        # Method 2: Fuzzy matching
-        best_match, confidence = self._fuzzy_match(location)
-        if confidence >= self.config.SIMILARITY_THRESHOLD:
-            return best_match, confidence, "fuzzy_matching"
-        # Method 3: LLM correction (if available)
-        if self.llm_available:
-            llm_corrected = self._llm_correct(location)
-            if llm_corrected:
-                # Verify LLM suggestion with fuzzy matching
-                llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
-                if llm_confidence >= 0.6:  # Lower threshold for LLM suggestions
-                    return llm_corrected, llm_confidence, "llm_correction"
-        # Method 4: Partial matching
-        partial_match = self._partial_match(location)
-        if partial_match:
-            return partial_match, 0.7, "partial_matching"
-        # No correction found
-        return location.title(), 0.0, "no_correction"
-    def _fuzzy_match(self, location: str) -> Tuple[str, float]:
-        """Find best fuzzy match for location"""
-        best_match = None
-        best_score = 0
-        for available_location in self.available_locations:
-            score = fuzz.ratio(location.lower(), available_location.lower()) / 100
-            if score > best_score:
-                best_score = score
-                best_match = available_location
-        return best_match, best_score
-    def _partial_match(self, location: str) -> Optional[str]:
-        """Find partial matches (substring matching)"""
-        location_lower = location.lower()
-        for available_location in self.available_locations:
-            available_lower = available_location.lower()
-            # Check if location is contained in available location
-            if location_lower in available_lower or available_lower in location_lower:
-                return available_location
-        return None
-    def _llm_correct(self, location: str) -> Optional[str]:
-        """Use LLM to correct location name"""
-        try:
-            prompt = f"""
-            You are a location name correction system for Sri Lankan cities and towns.
-            Given a potentially misspelled location name, return the correct spelling.
-            Available locations include: {', '.join(sorted(self.available_locations))}
-            Input location: "{location}"
-            Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
-            """
-            corrected = None
-            # Try new SDK first
-            try:
-                from openai import OpenAI
-                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
-                response = client.chat.completions.create(
-                    model=self.config.OPENAI_MODEL,
-                    messages=[
-                        {"role": "system", "content": "You are a helpful assistant that corrects location names."},
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=50,
-                    temperature=0.1
-                )
-                corrected = response.choices[0].message.content.strip()
-            except Exception as sdk_err:
-                # Fallback to legacy API if present
-                import openai
-                try:
-                    openai.api_key = self.config.OPENAI_API_KEY
-                    response = openai.ChatCompletion.create(
-                        model=self.config.OPENAI_MODEL,
-                        messages=[
-                            {"role": "system", "content": "You are a helpful assistant that corrects location names."},
-                            {"role": "user", "content": prompt}
-                        ],
-                        max_tokens=50,
-                        temperature=0.1
-                    )
-                    corrected = response.choices[0].message.content.strip()
-                except Exception:
-                    raise sdk_err
-            # Validate LLM response
-            if corrected.upper() == "UNKNOWN":
-                return None
-            # Check if corrected location exists in our database
-            if corrected in self.available_locations:
-                return corrected
-            # Try fuzzy matching on LLM response
-            llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
-            if confidence >= 0.8:
-                return llm_fuzzy_match
-            return None
-        except Exception as e:
-            print(f"LLM correction error: {e}")
-            return None
-    def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
-        """
-        Extract and correct locations from a natural language query
-        Returns:
-            List of tuples: (original, corrected, confidence, method)
-        """
-        # Common patterns for location extraction
-        patterns = [
-             r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
-             r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
-             r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
-         ]
-        locations = []
-        # Try all patterns to find locations
-        for pattern in patterns:
-            match = re.search(pattern, query, re.IGNORECASE)
-            if match:
-                # Extract locations from the match
-                groups = match.groups()
-                if len(groups) >= 2:
-                    from_location = groups[0].strip()
-                    to_location = groups[1].strip()
-                    # Skip if locations are too short or common words
-                    if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
-                        from_corrected, from_confidence, from_method = self.correct_location(from_location)
-                        if from_confidence > 0.5:
-                            locations.append((
-                                from_location,
-                                from_corrected,
-                                from_confidence,
-                                from_method
-                            ))
-                    if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
-                        to_corrected, to_confidence, to_method = self.correct_location(to_location)
-                        if to_confidence > 0.5:
-                            locations.append((
-                                to_location,
-                                to_corrected,
-                                to_confidence,
-                                to_method
-                            ))
-                    # If we found locations, break to avoid duplicates
-                    if len(locations) >= 2:
-                        break
-        return locations
-    def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
-        """Get location suggestions for autocomplete"""
-        suggestions = []
-        partial_lower = partial_location.lower()
-        for location in self.available_locations:
-            location_lower = location.lower()
-            # Check if partial location is a prefix
-            if location_lower.startswith(partial_lower):
-                suggestions.append((location, 1.0))
-            # Check fuzzy similarity
-            elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
-                suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
-        # Sort by confidence and return top suggestions
-        suggestions.sort(key=lambda x: x[1], reverse=True)
-        return suggestions[:self.config.MAX_SUGGESTIONS]
-    def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
-        """Validate if a route exists in the database"""
-        from_corrected, from_confidence, _ = self.correct_location(from_location)
-        to_corrected, to_confidence, _ = self.correct_location(to_location)
-        if from_confidence < 0.5:
-            return False, f"Could not identify departure location: '{from_location}'"
-        if to_confidence < 0.5:
-            return False, f"Could not identify destination location: '{to_location}'"
-        if from_corrected == to_corrected:
-            return False, f"Departure and destination cannot be the same: '{from_corrected}'"
-        return True, f"Route: {from_corrected} → {to_corrected}"

+#!/usr/bin/env python3
+"""
+Spell Correction Module for Transport Query Application
+Handles location name corrections using fuzzy matching and LLM
+"""
+import re
+from fuzzywuzzy import fuzz
+from typing import List, Tuple, Optional
+import openai
+from config import Config
+class SpellCorrector:
+    """Spell correction for location names"""
+    def __init__(self):
+        self.config = Config()
+        self.location_mapping = self.config.LOCATION_MAPPING
+        self.available_locations = set(self.location_mapping.values())
+        # Initialize OpenAI if API key is available
+        if self.config.OPENAI_API_KEY:
+            try:
+                # Prefer new SDK client if installed; otherwise set legacy api key
+                try:
+                    from openai import OpenAI  # noqa: F401
+                    self.llm_available = True
+                except Exception:
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    self.llm_available = True
+            except Exception:
+                self.llm_available = False
+        else:
+            self.llm_available = False
+    def correct_location(self, location: str) -> Tuple[str, float, str]:
+        """
+        Correct a location name using multiple methods
+        Returns:
+            Tuple of (corrected_name, confidence_score, correction_method)
+        """
+        location = location.strip().lower()
+        # Method 1: Direct mapping
+        if location in self.location_mapping:
+            corrected = self.location_mapping[location]
+            return corrected, 1.0, "direct_mapping"
+        # Method 2: Fuzzy matching
+        best_match, confidence = self._fuzzy_match(location)
+        if confidence >= self.config.SIMILARITY_THRESHOLD:
+            return best_match, confidence, "fuzzy_matching"
+        # Method 3: LLM correction (if available)
+        if self.llm_available:
+            llm_corrected = self._llm_correct(location)
+            if llm_corrected:
+                # Verify LLM suggestion with fuzzy matching
+                llm_confidence = fuzz.ratio(location.lower(), llm_corrected.lower()) / 100
+                if llm_confidence >= 0.6:  # Lower threshold for LLM suggestions
+                    return llm_corrected, llm_confidence, "llm_correction"
+        # Method 4: Partial matching
+        partial_match = self._partial_match(location)
+        if partial_match:
+            return partial_match, 0.7, "partial_matching"
+        # No correction found
+        return location.title(), 0.0, "no_correction"
+    def _fuzzy_match(self, location: str) -> Tuple[str, float]:
+        """Find best fuzzy match for location"""
+        best_match = None
+        best_score = 0
+        for available_location in self.available_locations:
+            score = fuzz.ratio(location.lower(), available_location.lower()) / 100
+            if score > best_score:
+                best_score = score
+                best_match = available_location
+        return best_match, best_score
+    def _partial_match(self, location: str) -> Optional[str]:
+        """Find partial matches (substring matching)"""
+        location_lower = location.lower()
+        for available_location in self.available_locations:
+            available_lower = available_location.lower()
+            # Check if location is contained in available location
+            if location_lower in available_lower or available_lower in location_lower:
+                return available_location
+        return None
+    def _llm_correct(self, location: str) -> Optional[str]:
+        """Use LLM to correct location name"""
+        try:
+            prompt = f"""
+            You are a location name correction system for Sri Lankan cities and towns.
+            Given a potentially misspelled location name, return the correct spelling.
+            Available locations include: {', '.join(sorted(self.available_locations))}
+            Input location: "{location}"
+            Return only the corrected location name, nothing else. If no correction is possible, return "UNKNOWN".
+            """
+            corrected = None
+            # Try new SDK first
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.config.OPENAI_API_KEY)
+                response = client.chat.completions.create(
+                    model=self.config.OPENAI_MODEL,
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant that corrects location names."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=50,
+                    temperature=0.1
+                )
+                corrected = response.choices[0].message.content.strip()
+            except Exception as sdk_err:
+                # Fallback to legacy API if present
+                import openai
+                try:
+                    openai.api_key = self.config.OPENAI_API_KEY
+                    response = openai.ChatCompletion.create(
+                        model=self.config.OPENAI_MODEL,
+                        messages=[
+                            {"role": "system", "content": "You are a helpful assistant that corrects location names."},
+                            {"role": "user", "content": prompt}
+                        ],
+                        max_tokens=50,
+                        temperature=0.1
+                    )
+                    corrected = response.choices[0].message.content.strip()
+                except Exception:
+                    raise sdk_err
+            # Validate LLM response
+            if corrected.upper() == "UNKNOWN":
+                return None
+            # Check if corrected location exists in our database
+            if corrected in self.available_locations:
+                return corrected
+            # Try fuzzy matching on LLM response
+            llm_fuzzy_match, confidence = self._fuzzy_match(corrected)
+            if confidence >= 0.8:
+                return llm_fuzzy_match
+            return None
+        except Exception as e:
+            print(f"LLM correction error: {e}")
+            return None
+    def extract_locations_from_query(self, query: str) -> List[Tuple[str, str, float, str]]:
+        """
+        Extract and correct locations from a natural language query
+        Returns:
+            List of tuples: (original, corrected, confidence, method)
+        """
+        # Common patterns for location extraction
+        patterns = [
+             r'from\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'between\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+and\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'fare\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'price\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'cost\s+(?:of|from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'how\s+much\s+(?:is|does)\s+(?:the\s+)?(?:fare|price|cost)\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'what\s+(?:is|are)\s+(?:the\s+)?(?:fare|price|cost)s?\s+(?:from\s+)?([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)',
+             r'([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+(?:fare|price|cost)(?:\s|$|\?)',
+             r'(?:fare|price|cost)\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)\s+to\s+([a-zA-Z]+(?:\s+[a-zA-Z]+)*)(?:\s|$|\?)'
+         ]
+        locations = []
+        # Try all patterns to find locations
+        for pattern in patterns:
+            match = re.search(pattern, query, re.IGNORECASE)
+            if match:
+                # Extract locations from the match
+                groups = match.groups()
+                if len(groups) >= 2:
+                    from_location = groups[0].strip()
+                    to_location = groups[1].strip()
+                    # Skip if locations are too short or common words
+                    if len(from_location) >= 2 and from_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
+                        from_corrected, from_confidence, from_method = self.correct_location(from_location)
+                        if from_confidence > 0.5:
+                            locations.append((
+                                from_location,
+                                from_corrected,
+                                from_confidence,
+                                from_method
+                            ))
+                    if len(to_location) >= 2 and to_location.lower() not in ['to', 'from', 'and', 'the', 'a', 'an']:
+                        to_corrected, to_confidence, to_method = self.correct_location(to_location)
+                        if to_confidence > 0.5:
+                            locations.append((
+                                to_location,
+                                to_corrected,
+                                to_confidence,
+                                to_method
+                            ))
+                    # If we found locations, break to avoid duplicates
+                    if len(locations) >= 2:
+                        break
+        return locations
+    def get_suggestions(self, partial_location: str) -> List[Tuple[str, float]]:
+        """Get location suggestions for autocomplete"""
+        suggestions = []
+        partial_lower = partial_location.lower()
+        for location in self.available_locations:
+            location_lower = location.lower()
+            # Check if partial location is a prefix
+            if location_lower.startswith(partial_lower):
+                suggestions.append((location, 1.0))
+            # Check fuzzy similarity
+            elif fuzz.ratio(partial_lower, location_lower) / 100 >= 0.6:
+                suggestions.append((location, fuzz.ratio(partial_lower, location_lower) / 100))
+        # Sort by confidence and return top suggestions
+        suggestions.sort(key=lambda x: x[1], reverse=True)
+        return suggestions[:self.config.MAX_SUGGESTIONS]
+    def validate_route(self, from_location: str, to_location: str) -> Tuple[bool, str]:
+        """Validate if a route exists in the database"""
+        from_corrected, from_confidence, _ = self.correct_location(from_location)
+        to_corrected, to_confidence, _ = self.correct_location(to_location)
+        if from_confidence < 0.5:
+            return False, f"Could not identify departure location: '{from_location}'"
+        if to_confidence < 0.5:
+            return False, f"Could not identify destination location: '{to_location}'"
+        if from_corrected == to_corrected:
+            return False, f"Departure and destination cannot be the same: '{from_corrected}'"
+        return True, f"Route: {from_corrected} → {to_corrected}"

translation_service.py CHANGED Viewed

@@ -1,702 +1,1057 @@
-#!/usr/bin/env python3
-"""
-Translation Service for Sinhala-English Translation
-Handles translation of queries and responses with multiple free alternatives
-"""
-import requests
-import json
-import re
-import openai
-from typing import Dict, Any, Optional
-from config import Config
-from logger import get_logger
-class TranslationService:
-    def __init__(self):
-        self.config = Config()
-        self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
-        self.logger = get_logger(self.__class__.__name__)
-        # Controls
-        import os
-        self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
-        self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
-        self.last_translation_method: Optional[str] = None
-        # Free translation APIs
-        self.libre_translate_url = "https://libretranslate.de/translate"  # Free public instance
-        self.mymemory_url = "https://api.mymemory.translated.net/get"
-        # Common transport terms in Sinhala and their English equivalents
-        self.transport_terms = {
-            # Fare related
-            'කීයද': 'how much',
-            'මිල': 'price',
-            'වාරික': 'fare',
-            'වාරිකය': 'fare',
-            'වාරිකව': 'fare',
-            'ගාස්තු': 'fare',
-            'ගාස්තුව': 'fare',
-            'ප්‍රවාහන ගාස්තු': 'transport fare',
-            'බස් ගාස්තු': 'bus fare',
-            'බස් ගාස්තුව': 'bus fare',
-            'රේල් ගාස්තු': 'train fare',
-            'රේල් ගාස්තුව': 'train fare',
-            # Locations
-            'කොළඹ': 'Colombo',
-            'මහනුවර': 'Kandy',
-            'මහනුවරට': 'Kandy',
-            'ගාල්ල': 'Galle',
-            'ගාල්ලට': 'Galle',
-            'මාතර': 'Matara',
-            'මාතරට': 'Matara',
-            'අනුරාධපුර': 'Anuradhapura',
-            'අනුරාධපුරට': 'Anuradhapura',
-            'පානදුර': 'Panadura',
-            'පානදුරට': 'Panadura',
-            'අලුත්ගම': 'Aluthgama',
-            'අලුත්ගමට': 'Aluthgama',
-            'නුගේගොඩ': 'Nugegoda',
-            'නුගේගොඩට': 'Nugegoda',
-            'දෙහිවල': 'Dehiwala',
-            'දෙහිවලට': 'Dehiwala',
-            'මොරටුව': 'Moratuwa',
-            'මොරටුවට': 'Moratuwa',
-            # Direction words
-            'වලින්': 'from',
-            'වල': 'from',
-            'ට': 'to',
-            'වෙත': 'to',
-            'සිට': 'from',
-            'දක්වා': 'to',
-            'සි': 'from',
-            # Question words
-            'කොහෙද': 'where',
-            'කවදාද': 'when',
-            'කොහොමද': 'how',
-            'මොනවාද': 'what',
-            'කවුද': 'who',
-            # Comparison words
-            'සමඟ': 'with',
-            'සහ': 'and',
-            'හෝ': 'or',
-            'වඩා': 'more',
-            'අඩු': 'less',
-            'සමාන': 'same',
-            'වෙනස': 'different',
-            'සසඳන්න': 'compare',
-            'සසඳන': 'compare',
-            # Time words
-            'දැන්': 'now',
-            'අද': 'today',
-            'හෙට': 'tomorrow',
-            'ඊයේ': 'yesterday',
-            # Common verbs
-            'යන්න': 'go',
-            'යන': 'go',
-            'එන්න': 'come',
-            'බලන්න': 'see',
-            'දැනගන්න': 'know',
-            'සොයන්න': 'find',
-            'සොයන': 'find',
-            'ඉගෙනගන්න': 'learn',
-            'නිර්දේශ': 'recommend',
-            'නිර්දේශ කරන්න': 'recommend',
-            'පෙන්වන්න': 'show',
-            'පෙන්වන': 'show',
-            # Numbers and currency
-            'රුපියල්': 'rupees',
-            'රු': 'rupees',
-            'රුපියල': 'rupees',
-            # Common phrases
-            'අතර': 'between',
-            'සහිත': 'with',
-            'මාර්ග': 'routes',
-            'මාර්ගවල': 'routes',
-            'ගමනාන්ත': 'destinations',
-            'ප්‍රසිද්ධ': 'popular',
-            'සාමාන්‍ය': 'average',
-            'සාමාන්‍යය': 'average',
-            'දත්ත': 'data',
-            'සංඛ්‍යාල���ඛන': 'statistics'
-        }
-        # Sinhala script detection pattern
-        self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
-    def is_sinhala_text(self, text: str) -> bool:
-        """Check if text contains Sinhala characters"""
-        detected = bool(self.sinhala_pattern.search(text))
-        self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
-        return detected
-    def _map_sinhala_place(self, text: str) -> str:
-        """Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
-        candidate = text.strip()
-        # Direct map
-        if candidate in self.transport_terms:
-            return self.transport_terms[candidate]
-        # Strip common Sinhala case particles/suffixes and try again
-        base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
-        if base in self.transport_terms:
-            return self.transport_terms[base]
-        return candidate
-    def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
-        """Detect simple Sinhala fare queries and build a clean English query.
-        Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
-        """
-        try:
-            # Quick check for fare-related tokens to avoid false positives
-            if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
-                return None
-            # Extract source and destination around Sinhala "from" and "to" particles
-            m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
-            if not m:
-                return None
-            src_si = m.group(1).strip()
-            dst_si = m.group(2).strip()
-            src_en = self._map_sinhala_place(src_si)
-            dst_en = self._map_sinhala_place(dst_si)
-            return f"What is the fare from {src_en} to {dst_en}?"
-        except Exception:
-            return None
-    def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
-        """Translate using OpenAI LLM (new SDK). Preserve original intent (comparison, lists, conjunctions)."""
-        if not self.openai_api_key:
-            return None
-        try:
-            # Determine source language
-            if source_lang == 'auto':
-                source_lang = 'si' if self.is_sinhala_text(text) else 'en'
-            # Create language mapping
-            lang_map = {
-                ('si', 'en'): 'Sinhala to English',
-                ('en', 'si'): 'English to Sinhala'
-            }
-            direction = lang_map.get((source_lang, target_lang))
-            if not direction:
-                return None
-            prompt = f"""
-            Translate the following text from {direction}.
-            Output only the translated text without quotes or extra commentary.
-            Critically: Preserve the original intent and structure. Do not simplify.
-            - If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
-            - Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
-            - Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
-            Use standard English city names:
-            - මහනුවර = Kandy (not Mahanuwara)
-            - කොළඹ = Colombo
-            - ගාල්ල = Galle
-            - මාතර = Matara
-            - අනුරාධපුර = Anuradhapura
-            Text to translate: {text}
-            """
-            # Build few-shot examples to preserve comparison/imperative structure
-            examples = [
-                (
-                    "කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
-                    "What is the bus fare from Colombo to Kandy?"
-                ),
-                (
-                    "කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
-                    "What is the ticket price from Colombo to Galle?"
-                ),
-                (
-                    "කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.",
-                    "Compare fares from Colombo to Panadura and from Colombo to Galle."
-                ),
-                (
-                    "රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
-                    "Show routes with fares under 500 rupees."
-                ),
-                (
-                    "අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
-                    "Recommend cheap routes."
-                ),
-            ]
-            # Compose messages with few-shot conditioning
-            def build_messages(txt: str):
-                msgs = [
-                    {
-                        "role": "system",
-                        "content": (
-                            "You are a professional translator. Translate accurately and naturally. "
-                            "Preserve imperative/comparative intent and list structure. Do not paraphrase. "
-                            "Return only the English translation without quotes. "
-                            "Canonical phrasing rules (use exactly): \n"
-                            "- Use 'Compare' for comparison requests.\n"
-                            "- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
-                            "- Use 'How much is the' for 'කීයද' fare/price questions.\n"
-                            "- Use 'cheap' (not 'affordable').\n"
-                            "- Use 'under' (not 'below') for '< value'.\n"
-                        ),
-                    },
-                    {
-                        "role": "user",
-                        "content": (
-                            "Instructions: Preserve structure. Use 'Compare' for 'සසඳ', use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
-                            "Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura."
-                        ),
-                    },
-                ]
-                for si, en in examples:
-                    msgs.append({"role": "user", "content": f"Sinhala: {si}\nEnglish:"})
-                    msgs.append({"role": "assistant", "content": en})
-                msgs.append({"role": "user", "content": f"Sinhala: {txt}\nEnglish:"})
-                return msgs
-            # Use new OpenAI SDK
-            try:
-                from openai import OpenAI
-                client = OpenAI(api_key=self.openai_api_key)
-                response = client.chat.completions.create(
-                    model="gpt-3.5-turbo",
-                    max_tokens=150,
-                    temperature=0.3,
-                    messages=build_messages(text)
-                )
-                translated = response.choices[0].message.content.strip()
-                self.last_translation_method = 'llm'
-            except Exception as sdk_err:
-                # Fallback to legacy API if available
-                import openai
-                try:
-                    openai.api_key = self.openai_api_key
-                    response = openai.ChatCompletion.create(
-                        model="gpt-3.5-turbo",
-                        max_tokens=150,
-                        temperature=0.3,
-                        messages=build_messages(text)
-                    )
-                    translated = response.choices[0].message.content.strip()
-                    self.last_translation_method = 'llm'
-                except Exception:
-                    raise sdk_err
-            if translated.startswith('"') and translated.endswith('"'):
-                translated = translated[1:-1]
-            return translated if translated else None
-        except Exception as e:
-            self.logger.warning(f"LLM translation error: {e}")
-            return None
-    def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
-        """Translate using LibreTranslate (free public API)"""
-        try:
-            # Map language codes
-            lang_map = {
-                'si': 'si',  # Sinhala
-                'en': 'en',  # English
-                'auto': 'auto'
-            }
-            source = lang_map.get(source_lang, 'auto')
-            target = lang_map.get(target_lang, 'en')
-            payload = {
-                'q': text,
-                'source': source,
-                'target': target,
-                'format': 'text'
-            }
-            headers = {
-                'Content-Type': 'application/json'
-            }
-            response = requests.post(
-                self.libre_translate_url,
-                json=payload,
-                headers=headers,
-                timeout=10
-            )
-            if response.status_code == 200:
-                result = response.json()
-                translated = result.get('translatedText')
-                self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
-                self.last_translation_method = 'libretranslate'
-                return translated
-            return None
-        except Exception as e:
-            self.logger.warning(f"LibreTranslate error: {e}")
-            return None
-    def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
-        """Translate using MyMemory (free API)"""
-        try:
-            # Map language codes
-            lang_map = {
-                'si': 'si',  # Sinhala
-                'en': 'en',  # English
-                'auto': 'auto'
-            }
-            source = lang_map.get(source_lang, 'auto')
-            langpair = f"{source}|{target_lang}"
-            params = {
-                'q': text,
-                'langpair': langpair
-            }
-            response = requests.get(
-                self.mymemory_url,
-                params=params,
-                timeout=10
-            )
-            if response.status_code == 200:
-                result = response.json()
-                translated = result.get('responseData', {}).get('translatedText')
-                self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
-                self.last_translation_method = 'mymemory'
-                return translated
-            return None
-        except Exception as e:
-            self.logger.warning(f"MyMemory translation error: {e}")
-            return None
-    def translate_with_dictionary(self, text: str, target_lang: str) -> str:
-        """Translate using dictionary-based approach"""
-        if target_lang == 'en':
-            # Sinhala to English
-            translated = text
-            for sinhala, english in self.transport_terms.items():
-                translated = translated.replace(sinhala, english)
-            return translated
-        elif target_lang == 'si':
-            # English to Sinhala
-            translated = text
-            for sinhala, english in self.transport_terms.items():
-                translated = translated.replace(english, sinhala)
-            return translated
-        return text
-    def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
-        """Main translation method with multiple fallbacks"""
-        if not text or not text.strip():
-            return text
-        # Try translation methods
-        if self.force_llm_translation:
-            translation_methods = [
-                ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
-            ]
-        else:
-            translation_methods = [
-                ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
-                ('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
-                ('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
-                ('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
-            ]
-        for method_name, method_func in translation_methods:
-            try:
-                result = method_func()
-                if result and result.strip():
-                    self.logger.info(f"Translation successful using {method_name}")
-                    if not self.last_translation_method:
-                        self.last_translation_method = method_name.lower()
-                    return result.strip()
-            except Exception as e:
-                self.logger.warning(f"{method_name} translation failed: {e}")
-                continue
-        # Final fallback
-        result = self.translate_with_dictionary(text, target_lang)
-        self.last_translation_method = 'dictionary'
-        return result
-    def translate_query(self, query: str) -> Dict[str, Any]:
-        """Translate a user query from Sinhala to English"""
-        if not self.is_sinhala_text(query):
-            return {
-                'is_sinhala': False,
-                'original_query': query,
-                'translated_query': query,
-                'translation_method': 'none'
-            }
-        # Optional: Sinhala-specific fare parsing (disabled by default unless USE_PATTERN_TRANSLATION=true)
-        if self.use_pattern_translation:
-            parsed = self._parse_sinhala_fare_query(query)
-            if parsed:
-                self.logger.info(f"Pattern-based Sinhala fare parse: '{query}' -> '{parsed}'")
-                return {
-                    'is_sinhala': True,
-                    'original_query': query,
-                    'translated_query': parsed,
-                    'translation_method': 'pattern'
-                }
-        # Fallback: general translation to English
-        translated = self.translate_text(query, 'en', 'si')
-        # Normalize English synonyms to expected NLP vocabulary
-        translated = self._normalize_english_query(translated)
-        method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
-        self.logger.info(f"Translated Sinhala query ({method}): '{query}' -> '{translated}'")
-        return {
-            'is_sinhala': True,
-            'original_query': query,
-            'translated_query': translated,
-            'translation_method': method
-        }
-    def _normalize_english_query(self, text: str) -> str:
-        """Normalize English synonyms to match NLP patterns (fare/price/cost)."""
-        if not text:
-            return text
-        normalized = text
-        replacements = {
-            'fees': 'fare',
-            'fee': 'fare',
-            'charges': 'cost',
-            'charge': 'cost',
-            'ticket price': 'fare',
-            'ticket fare': 'fare',
-            'bus ticket': 'bus fare',
-        }
-        # Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
-        lower = normalized.lower()
-        for old, new in replacements.items():
-            lower = lower.replace(old, new)
-        return lower
-    def translate_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
-        """Translate response back to Sinhala"""
-        translated_response = response.copy()
-        # Translate the main message
-        if 'message' in response:
-            translated_response['message'] = self.translate_text(
-                response['message'], 'si', 'en'
-            )
-        # Translate suggestions if any
-        if 'suggestions' in response and response['suggestions']:
-            translated_response['suggestions'] = [
-                self.translate_text(suggestion, 'si', 'en')
-                for suggestion in response['suggestions']
-            ]
-        # Translate corrections if any
-        if 'corrections' in response and response['corrections']:
-            translated_corrections = []
-            for correction in response['corrections']:
-                translated_correction = correction.copy()
-                if 'original' in correction:
-                    translated_correction['original'] = self.translate_text(
-                        correction['original'], 'si', 'en'
-                    )
-                if 'corrected' in correction:
-                    translated_correction['corrected'] = self.translate_text(
-                        correction['corrected'], 'si', 'en'
-                    )
-                translated_corrections.append(translated_correction)
-            translated_response['corrections'] = translated_corrections
-        # Add translation metadata
-        translated_response['translation_info'] = {
-            'translated': True,
-            'translation_method': 'llm' if self.openai_api_key else 'dictionary'
-        }
-        return translated_response
-    def get_sinhala_examples(self) -> Dict[str, Any]:
-        """Get example queries in Sinhala"""
-        sinhala_examples = {
-            'fare_queries': [
-                {
-                    'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
-                    'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
-                },
-                {
-                    'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
-                    'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
-                },
-                {
-                    'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
-                    'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
-                }
-            ],
-            'comparison_queries': [
-                {
-                    'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
-                    'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
-                },
-                {
-                    'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
-                    'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
-                }
-            ],
-            'range_queries': [
-                {
-                    'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
-                    'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
-                },
-                {
-                    'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
-                    'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
-                }
-            ],
-            'recommendation_queries': [
-                {
-                    'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
-                    'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
-                },
-                {
-                    'query': 'ප්‍රසිද්ධ ගමනාන්ත පෙන්වන්න',
-                    'description': 'ප්‍රසිද්ධ ගමනාන්ත සොයන්න'
-                }
-            ],
-            'statistical_queries': [
-                {
-                    'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
-                    'description': 'සාමාන්‍ය ගාස්තුව සොයන්න'
-                },
-                {
-                    'query': 'දත්ත ගබඩා සංඛ්‍යාලේඛන',
-                    'description': 'දත්ත ගබඩා සංඛ්‍යාලේඛන සොයන්න'
-                }
-            ]
-        }
-        return sinhala_examples
-    def test_translation(self) -> Dict[str, Any]:
-        """Test translation functionality on transportation-related Sinhala queries."""
-        test_cases = [
-            {
-                'sinhala': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
-                'expected_english': 'What is the bus fare from Colombo to Kandy?'
-            },
-            {
-                'sinhala': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
-                'expected_english': 'How much is the price from Matara to Galle?'
-            },
-            {
-                'sinhala': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
-                'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
-            },
-            {
-                'sinhala': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
-                'expected_english': 'Show routes with fares under 500 rupees.'
-            },
-            {
-                'sinhala': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
-                'expected_english': 'Recommend cheap routes.'
-            },
-            {
-                'sinhala': 'කොළඹ සිට යන මාර්ග මොනවාද?',
-                'expected_english': 'What routes depart from Colombo?'
-            },
-            {
-                'sinhala': 'සාමාන්‍ය ගාස්තුව කීයද?',
-                'expected_english': 'What is the average fare?'
-            },
-            {
-                'sinhala': 'කඩුවෙල සිට මාතර දක්වා සහ ගාල්ල දක්වා බස් ගාස්තු සසඳන්න.',
-                'expected_english': 'Compare bus fares from Kaduwela to Matara and to Galle.'
-            },
-            {
-                'sinhala': 'කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?',
-                'expected_english': 'What is the ticket price from Colombo to Galle?'
-            },
-            {
-                'sinhala': 'රුපියල් 1000 ට වැඩි ගාස්තු සහිත මාර්ග සදහන් කරන්න.',
-                'expected_english': 'List routes with fares over 1000 rupees.'
-            }
-        ]
-        results = []
-        total_exact = 0
-        total_good = 0
-        total_tests = len(test_cases)
-        for test_case in test_cases:
-            sinhala = test_case['sinhala']
-            expected = test_case['expected_english']
-            is_sinhala = self.is_sinhala_text(sinhala)
-            # Reset method tracker and translate
-            self.last_translation_method = None
-            translated = self.translate_text(sinhala, 'en', 'si') or ''
-            tr = translated.strip()
-            ex = expected.strip()
-            tr_low = tr.lower()
-            ex_low = ex.lower()
-            # Accuracy heuristic
-            if tr_low == ex_low:
-                accuracy = 'exact'
-                total_exact += 1
-                total_good += 1
-            elif tr_low in ex_low or ex_low in tr_low:
-                accuracy = 'good'
-                total_good += 1
-            else:
-                accuracy = 'partial'
-            # Intent preservation check for comparisons
-            intent_preserved = True
-            if '��සඳ' in sinhala or 'සසඳා' in sinhala:
-                intent_preserved = ('compare' in tr_low)
-            results.append({
-                'sinhala_query': sinhala,
-                'is_sinhala_detected': is_sinhala,
-                'translated_english': tr,
-                'expected_english': ex,
-                'translation_accuracy': accuracy,
-                'intent_preserved': intent_preserved,
-                'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
-            })
-        summary = {
-            'total_tests': total_tests,
-            'exact_matches': total_exact,
-            'good_or_better': total_good,
-            'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
-        }
-        self.logger.info(f"Translation test summary: {summary}")
-        return {
-            'translation_service_status': 'active',
-            'available_methods': {
-                'llm': self.openai_api_key is not None,
-                'libre_translate': True,
-                'mymemory': True,
-                'dictionary': True
-            },
-            'summary': summary,
-            'test_results': results
-        }

+#!/usr/bin/env python3
+"""
+Enhanced Translation Service for Multi-Language Support
+Handles translation of queries and responses for Sinhala, Tamil, Singlish, and English
+"""
+import requests
+import json
+import re
+import openai
+from typing import Dict, Any, Optional
+from config import Config
+from logger import get_logger
+from language_detector import LanguageDetector
+class TranslationService:
+    def __init__(self):
+        self.config = Config()
+        self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
+        self.logger = get_logger(self.__class__.__name__)
+        self.language_detector = LanguageDetector()
+        # Controls
+        import os
+        self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
+        self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
+        self.last_translation_method: Optional[str] = None
+        # Free translation APIs
+        self.libre_translate_url = "https://libretranslate.de/translate"  # Free public instance
+        self.mymemory_url = "https://api.mymemory.translated.net/get"
+        # Tamil transport terms and their English equivalents
+        self.tamil_transport_terms = {
+            # Fare related
+            'எவ்வளவு': 'how much',
+            'விலை': 'price',
+            'கட்டணம்': 'fare',
+            'செலவு': 'cost',
+            'பேருந்து கட்டணம்': 'bus fare',
+            'ரயில் கட்டணம்': 'train fare',
+            'டிக்கெட் விலை': 'ticket price',
+            # Locations
+            'கொழும்பு': 'Colombo',
+            'கண்டி': 'Kandy',
+            'காலி': 'Galle',
+            'மாத்தறை': 'Matara',
+            'அனுராதபுரம்': 'Anuradhapura',
+            'பனதுரை': 'Panadura',
+            'அலுத்துகமா': 'Aluthgama',
+            'நுகேகோடா': 'Nugegoda',
+            'தெஹிவாலா': 'Dehiwala',
+            'மொரட்டுவா': 'Moratuwa',
+            # Direction words
+            'இருந்து': 'from',
+            'வரை': 'to',
+            'வழியாக': 'via',
+            'மூலம்': 'through',
+            # Question words
+            'எங்கே': 'where',
+            'எப்போது': 'when',
+            'எப்படி': 'how',
+            'என்ன': 'what',
+            'யார்': 'who',
+            # Comparison words
+            'உடன்': 'with',
+            'மற்றும்': 'and',
+            'அல்லது': 'or',
+            'அதிகம்': 'more',
+            'குறைவு': 'less',
+            'ஒரே': 'same',
+            'வேறு': 'different',
+            'ஒப்பிடு': 'compare',
+            'வித்தியாசம்': 'difference',
+            # Time words
+            'இப்போது': 'now',
+            'இன்று': 'today',
+            'நாளை': 'tomorrow',
+            'நேற்று': 'yesterday',
+            # Common verbs
+            'போ': 'go',
+            'வா': 'come',
+            'பார்': 'see',
+            'தெரிந்து கொள்': 'know',
+            'கண்டுபிடி': 'find',
+            'கற்றுக்கொள்': 'learn',
+            'பரிந்துரை': 'recommend',
+            'கா���்டு': 'show',
+            # Numbers and currency
+            'ரூபாய்': 'rupees',
+            'ரூ': 'rupees',
+            # Common phrases
+            'இடையில்': 'between',
+            'உடன்': 'with',
+            'பாதைகள்': 'routes',
+            'பிரபலமான': 'popular',
+            'சராசரி': 'average',
+            'தரவு': 'data',
+            'புள்ளிவிவரங்கள்': 'statistics'
+        }
+        # Common transport terms in Sinhala and their English equivalents
+        self.transport_terms = {
+            # Fare related
+            'කීයද': 'how much',
+            'මිල': 'price',
+            'වාරික': 'fare',
+            'වාරිකය': 'fare',
+            'වාරිකව': 'fare',
+            'ගාස්තු': 'fare',
+            'ගාස්තුව': 'fare',
+            'ප්‍රවාහන ගාස්තු': 'transport fare',
+            'බස් ගාස්තු': 'bus fare',
+            'බස් ගාස්තුව': 'bus fare',
+            'රේල් ගාස්තු': 'train fare',
+            'රේල් ගාස්තුව': 'train fare',
+            # Locations
+            'කොළඹ': 'Colombo',
+            'මහනුවර': 'Kandy',
+            'මහනුවරට': 'Kandy',
+            'ගාල්ල': 'Galle',
+            'ගාල්ලට': 'Galle',
+            'මාතර': 'Matara',
+            'මාතරට': 'Matara',
+            'අනුරාධපුර': 'Anuradhapura',
+            'අනුරාධපුරට': 'Anuradhapura',
+            'පානදුර': 'Panadura',
+            'පානදුරට': 'Panadura',
+            'අලුත්ගම': 'Aluthgama',
+            'අලුත්ගමට': 'Aluthgama',
+            'නුගේගොඩ': 'Nugegoda',
+            'නුගේගොඩට': 'Nugegoda',
+            'දෙහිවල': 'Dehiwala',
+            'දෙහිවලට': 'Dehiwala',
+            'මොරටුව': 'Moratuwa',
+            'මොරටුවට': 'Moratuwa',
+            # Direction words
+            'වලින්': 'from',
+            'වල': 'from',
+            'ට': 'to',
+            'වෙත': 'to',
+            'සිට': 'from',
+            'දක්වා': 'to',
+            'සි': 'from',
+            # Question words
+            'කොහෙද': 'where',
+            'කවදාද': 'when',
+            'කොහොමද': 'how',
+            'මොනවාද': 'what',
+            'කවුද': 'who',
+            # Comparison words
+            'සමඟ': 'with',
+            'සහ': 'and',
+            'හෝ': 'or',
+            'වඩා': 'more',
+            'අඩු': 'less',
+            'සමාන': 'same',
+            'වෙනස': 'different',
+            'සසඳන්න': 'compare',
+            'සසඳන': 'compare',
+            # Time words
+            'දැන්': 'now',
+            'අද': 'today',
+            'හෙට': 'tomorrow',
+            'ඊයේ': 'yesterday',
+            # Common verbs
+            'යන්න': 'go',
+            'යන': 'go',
+            'එන්න': 'come',
+            'බලන්න': 'see',
+            'දැනගන්න': 'know',
+            'සොයන්න': 'find',
+            'සොයන': 'find',
+            'ඉගෙනගන්න': 'learn',
+            'නිර්දේශ': 'recommend',
+            'නිර්දේශ කරන්න': 'recommend',
+            'පෙන්වන්න': 'show',
+            'පෙන්වන': 'show',
+            # Numbers and currency
+            'රුපියල්': 'rupees',
+            'රු': 'rupees',
+            'රුපියල': 'rupees',
+            # Common phrases
+            'අතර': 'between',
+            'සහිත': 'with',
+            'මාර්ග': 'routes',
+            'මාර්ගවල': 'routes',
+            'ගමනාන්ත': 'destinations',
+            'ප්‍රසිද්ධ': 'popular',
+            'සාමාන්‍ය': 'average',
+            'සාමාන්‍යය': 'average',
+            'දත්ත': 'data',
+            'සංඛ්‍යාලේඛන': 'statistics'
+        }
+        # Sinhala script detection pattern
+        self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
+    def is_sinhala_text(self, text: str) -> bool:
+        """Check if text contains Sinhala characters"""
+        detected = bool(self.sinhala_pattern.search(text))
+        self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
+        return detected
+    def is_tamil_text(self, text: str) -> bool:
+        """Check if text contains Tamil characters"""
+        tamil_pattern = re.compile(r'[\u0B80-\u0BFF]')
+        detected = bool(tamil_pattern.search(text))
+        self.logger.debug(f"Tamil detection: detected={detected}, text='{text}'")
+        return detected
+    def is_singlish_text(self, text: str) -> bool:
+        """Check if text is Singlish (Sinhala-English mixed)"""
+        detection_result = self.language_detector.detect_language(text)
+        return detection_result['language'] == 'singlish'
+    def _map_sinhala_place(self, text: str) -> str:
+        """Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
+        candidate = text.strip()
+        # Direct map
+        if candidate in self.transport_terms:
+            return self.transport_terms[candidate]
+        # Strip common Sinhala case particles/suffixes and try again
+        base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
+        if base in self.transport_terms:
+            return self.transport_terms[base]
+        return candidate
+    def _map_tamil_place(self, text: str) -> str:
+        """Map a Tamil place token to its English equivalent using known terms and suffix stripping."""
+        candidate = text.strip()
+        # Direct map
+        if candidate in self.tamil_transport_terms:
+            return self.tamil_transport_terms[candidate]
+        # Strip common Tamil case particles/suffixes and try again
+        base = re.sub(r'(இருந்து|வரை|வழியாக|மூலம்)$', '', candidate)
+        if base in self.tamil_transport_terms:
+            return self.tamil_transport_terms[base]
+        return candidate
+    def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
+        """Detect simple Sinhala fare queries and build a clean English query.
+        Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
+        """
+        try:
+            # Quick check for fare-related tokens to avoid false positives
+            if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
+                return None
+            # Extract source and destination around Sinhala "from" and "to" particles
+            m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
+            if not m:
+                return None
+            src_si = m.group(1).strip()
+            dst_si = m.group(2).strip()
+            src_en = self._map_sinhala_place(src_si)
+            dst_en = self._map_sinhala_place(dst_si)
+            return f"What is the fare from {src_en} to {dst_en}?"
+        except Exception:
+            return None
+    def _parse_tamil_fare_query(self, query: str) -> Optional[str]:
+        """Detect simple Tamil fare queries and build a clean English query.
+        Example handled: "கொழும்பு இருந்து கண்டி வரை கட்டணம் எவ்வளவு?" -> "What is the fare from Colombo to Kandy?"
+        """
+        try:
+            # Quick check for fare-related tokens to avoid false positives
+            if not any(tok in query for tok in ['கட்டணம்', 'விலை', 'செலவு', 'எவ்வளவு']):
+                return None
+            # Extract source and destination around Tamil "from" and "to" particles
+            m = re.search(r'([\u0B80-\u0BFF\s]+?)\s*இருந்து\s*([\u0B80-\u0BFF\s]+?)(?:வரை|வழியாக)', query)
+            if not m:
+                return None
+            src_ta = m.group(1).strip()
+            dst_ta = m.group(2).strip()
+            src_en = self._map_tamil_place(src_ta)
+            dst_en = self._map_tamil_place(dst_ta)
+            return f"What is the fare from {src_en} to {dst_en}?"
+        except Exception:
+            return None
+    def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
+        """Translate using OpenAI LLM (new SDK). Preserve original intent (comparison, lists, conjunctions)."""
+        if not self.openai_api_key:
+            return None
+        try:
+            # Determine source language
+            if source_lang == 'auto':
+                detection_result = self.language_detector.detect_language(text)
+                detected_lang = detection_result['language']
+                if detected_lang == 'sinhala':
+                    source_lang = 'si'
+                elif detected_lang == 'tamil':
+                    source_lang = 'ta'
+                elif detected_lang == 'singlish':
+                    source_lang = 'singlish'
+                else:
+                    source_lang = 'en'
+            # Create language mapping
+            lang_map = {
+                ('si', 'en'): 'Sinhala to English',
+                ('en', 'si'): 'English to Sinhala',
+                ('ta', 'en'): 'Tamil to English',
+                ('en', 'ta'): 'English to Tamil',
+                ('singlish', 'en'): 'Singlish to English'
+            }
+            direction = lang_map.get((source_lang, target_lang))
+            if not direction:
+                return None
+            prompt = f"""
+            Translate the following text from {direction}.
+            Output only the translated text without quotes or extra commentary.
+            Critically: Preserve the original intent and structure. Do not simplify.
+            - If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
+            - Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
+            - Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
+            Use standard English city names:
+            - මහනුවර = Kandy (not Mahanuwara)
+            - කොළඹ = Colombo
+            - ගාල්ල = Galle
+            - මාතර = Matara
+            - අනුරාධපුර = Anuradhapura
+            Text to translate: {text}
+            """
+            # Build few-shot examples to preserve comparison/imperative structure
+            examples = []
+            # Add examples based on source language
+            if source_lang == 'si':
+                examples = [
+                    (
+                        "කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
+                        "What is the bus fare from Colombo to Kandy?"
+                    ),
+                    (
+                        "කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
+                        "What is the ticket price from Colombo to Galle?"
+                    ),
+                    (
+                        "කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල்ල දක්වා ගාස්තු සසඳා බලන්න.",
+                        "Compare fares from Colombo to Panadura and from Colombo to Galle."
+                    ),
+                    (
+                        "රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
+                        "Show routes with fares under 500 rupees."
+                    ),
+                    (
+                        "අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
+                        "Recommend cheap routes."
+                    ),
+                ]
+            elif source_lang == 'ta':
+                examples = [
+                    (
+                        "கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?",
+                        "What is the bus fare from Colombo to Kandy?"
+                    ),
+                    (
+                        "கொழும்பு இருந்து காலி வரை டிக்கெட் விலை எவ்வளவு?",
+                        "What is the ticket price from Colombo to Galle?"
+                    ),
+                    (
+                        "கொழும்பு இருந்து பனதுரை வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.",
+                        "Compare fares from Colombo to Panadura and from Colombo to Galle."
+                    ),
+                    (
+                        "ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை காட்டு.",
+                        "Show routes with fares under 500 rupees."
+                    ),
+                    (
+                        "குறைந்த விலையில் பாதைகளை பரிந்துரை.",
+                        "Recommend cheap routes."
+                    ),
+                ]
+            elif source_lang == 'singlish':
+                examples = [
+                    (
+                        "කොළඹ සිට Kandy ගාස්තුව කීයද?",
+                        "What is the fare from Colombo to Kandy?"
+                    ),
+                    (
+                        "Colombo සිට ගාල්ල�� bus fare කීයද?",
+                        "What is the bus fare from Colombo to Galle?"
+                    ),
+                    (
+                        "කොළඹ සිට Panadura සහ Colombo සිට Galle fares compare කරන්න.",
+                        "Compare fares from Colombo to Panadura and from Colombo to Galle."
+                    ),
+                ]
+            # Compose messages with few-shot conditioning
+            def build_messages(txt: str):
+                msgs = [
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are a professional translator. Translate accurately and naturally. "
+                            "Preserve imperative/comparative intent and list structure. Do not paraphrase. "
+                            "Return only the English translation without quotes. "
+                            "Canonical phrasing rules (use exactly): \n"
+                            "- Use 'Compare' for comparison requests.\n"
+                            "- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
+                            "- Use 'How much is the' for 'කීයද' fare/price questions.\n"
+                            "- Use 'cheap' (not 'affordable').\n"
+                            "- Use 'under' (not 'below') for '< value'.\n"
+                        ),
+                    },
+                    {
+                        "role": "user",
+                        "content": (
+                            "Instructions: Preserve structure. Use 'Compare' for 'සසඳ', use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
+                            "Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura."
+                        ),
+                    },
+                ]
+                for si, en in examples:
+                    msgs.append({"role": "user", "content": f"Sinhala: {si}\nEnglish:"})
+                    msgs.append({"role": "assistant", "content": en})
+                msgs.append({"role": "user", "content": f"Sinhala: {txt}\nEnglish:"})
+                return msgs
+            # Use new OpenAI SDK
+            try:
+                from openai import OpenAI
+                client = OpenAI(api_key=self.openai_api_key)
+                response = client.chat.completions.create(
+                    model="gpt-3.5-turbo",
+                    max_tokens=150,
+                    temperature=0.3,
+                    messages=build_messages(text)
+                )
+                translated = response.choices[0].message.content.strip()
+                self.last_translation_method = 'llm'
+            except Exception as sdk_err:
+                # Fallback to legacy API if available
+                import openai
+                try:
+                    openai.api_key = self.openai_api_key
+                    response = openai.ChatCompletion.create(
+                        model="gpt-3.5-turbo",
+                        max_tokens=150,
+                        temperature=0.3,
+                        messages=build_messages(text)
+                    )
+                    translated = response.choices[0].message.content.strip()
+                    self.last_translation_method = 'llm'
+                except Exception:
+                    raise sdk_err
+            if translated.startswith('"') and translated.endswith('"'):
+                translated = translated[1:-1]
+            return translated if translated else None
+        except Exception as e:
+            self.logger.warning(f"LLM translation error: {e}")
+            return None
+    def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
+        """Translate using LibreTranslate (free public API)"""
+        try:
+            # Map language codes
+            lang_map = {
+                'si': 'si',  # Sinhala
+                'en': 'en',  # English
+                'auto': 'auto'
+            }
+            source = lang_map.get(source_lang, 'auto')
+            target = lang_map.get(target_lang, 'en')
+            payload = {
+                'q': text,
+                'source': source,
+                'target': target,
+                'format': 'text'
+            }
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            response = requests.post(
+                self.libre_translate_url,
+                json=payload,
+                headers=headers,
+                timeout=10
+            )
+            if response.status_code == 200:
+                result = response.json()
+                translated = result.get('translatedText')
+                self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
+                self.last_translation_method = 'libretranslate'
+                return translated
+            return None
+        except Exception as e:
+            self.logger.warning(f"LibreTranslate error: {e}")
+            return None
+    def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
+        """Translate using MyMemory (free API)"""
+        try:
+            # Map language codes
+            lang_map = {
+                'si': 'si',  # Sinhala
+                'en': 'en',  # English
+                'auto': 'auto'
+            }
+            source = lang_map.get(source_lang, 'auto')
+            langpair = f"{source}|{target_lang}"
+            params = {
+                'q': text,
+                'langpair': langpair
+            }
+            response = requests.get(
+                self.mymemory_url,
+                params=params,
+                timeout=10
+            )
+            if response.status_code == 200:
+                result = response.json()
+                translated = result.get('responseData', {}).get('translatedText')
+                self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
+                self.last_translation_method = 'mymemory'
+                return translated
+            return None
+        except Exception as e:
+            self.logger.warning(f"MyMemory translation error: {e}")
+            return None
+    def translate_with_dictionary(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
+        """Translate using dictionary-based approach"""
+        if target_lang == 'en':
+            # Determine source language if auto
+            if source_lang == 'auto':
+                detection_result = self.language_detector.detect_language(text)
+                detected_lang = detection_result['language']
+                if detected_lang == 'sinhala':
+                    source_lang = 'si'
+                elif detected_lang == 'tamil':
+                    source_lang = 'ta'
+                else:
+                    source_lang = 'si'  # Default to Sinhala
+            translated = text
+            if source_lang == 'si':
+                # Sinhala to English
+                for sinhala, english in self.transport_terms.items():
+                    translated = translated.replace(sinhala, english)
+            elif source_lang == 'ta':
+                # Tamil to English
+                for tamil, english in self.tamil_transport_terms.items():
+                    translated = translated.replace(tamil, english)
+            return translated
+        elif target_lang == 'si':
+            # English to Sinhala
+            translated = text
+            for sinhala, english in self.transport_terms.items():
+                translated = translated.replace(english, sinhala)
+            return translated
+        elif target_lang == 'ta':
+            # English to Tamil
+            translated = text
+            for tamil, english in self.tamil_transport_terms.items():
+                translated = translated.replace(english, tamil)
+            return translated
+        return text
+    def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
+        """Main translation method with multiple fallbacks"""
+        if not text or not text.strip():
+            return text
+        # Try translation methods
+        if self.force_llm_translation:
+            translation_methods = [
+                ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
+            ]
+        else:
+            translation_methods = [
+                ('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
+                ('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
+                ('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
+                ('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
+            ]
+        for method_name, method_func in translation_methods:
+            try:
+                result = method_func()
+                if result and result.strip():
+                    self.logger.info(f"Translation successful using {method_name}")
+                    if not self.last_translation_method:
+                        self.last_translation_method = method_name.lower()
+                    return result.strip()
+            except Exception as e:
+                self.logger.warning(f"{method_name} translation failed: {e}")
+                continue
+        # Final fallback
+        result = self.translate_with_dictionary(text, target_lang, source_lang)
+        self.last_translation_method = 'dictionary'
+        return result
+    def translate_query(self, query: str) -> Dict[str, Any]:
+        """Translate a user query from any supported language to English"""
+        # Detect the language of the input
+        detection_result = self.language_detector.detect_language(query)
+        detected_language = detection_result['language']
+        # If it's already English, return as is
+        if detected_language == 'english':
+            return {
+                'is_sinhala': False,
+                'is_tamil': False,
+                'is_singlish': False,
+                'detected_language': 'english',
+                'original_query': query,
+                'translated_query': query,
+                'translation_method': 'none',
+                'detection_confidence': detection_result['confidence']
+            }
+        # Handle pattern-based parsing for specific languages
+        if self.use_pattern_translation:
+            parsed = None
+            if detected_language == 'sinhala':
+                parsed = self._parse_sinhala_fare_query(query)
+            elif detected_language == 'tamil':
+                parsed = self._parse_tamil_fare_query(query)
+            if parsed:
+                self.logger.info(f"Pattern-based {detected_language} fare parse: '{query}' -> '{parsed}'")
+                return {
+                    'is_sinhala': detected_language == 'sinhala',
+                    'is_tamil': detected_language == 'tamil',
+                    'is_singlish': detected_language == 'singlish',
+                    'detected_language': detected_language,
+                    'original_query': query,
+                    'translated_query': parsed,
+                    'translation_method': 'pattern',
+                    'detection_confidence': detection_result['confidence']
+                }
+        # Determine source language code for translation
+        source_lang = 'si' if detected_language == 'sinhala' else 'ta' if detected_language == 'tamil' else 'si'
+        # Translate to English
+        translated = self.translate_text(query, 'en', source_lang)
+        # Normalize English synonyms to expected NLP vocabulary
+        translated = self._normalize_english_query(translated)
+        method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
+        self.logger.info(f"Translated {detected_language} query ({method}): '{query}' -> '{translated}'")
+        return {
+            'is_sinhala': detected_language == 'sinhala',
+            'is_tamil': detected_language == 'tamil',
+            'is_singlish': detected_language == 'singlish',
+            'detected_language': detected_language,
+            'original_query': query,
+            'translated_query': translated,
+            'translation_method': method,
+            'detection_confidence': detection_result['confidence']
+        }
+    def _normalize_english_query(self, text: str) -> str:
+        """Normalize English synonyms to match NLP patterns (fare/price/cost)."""
+        if not text:
+            return text
+        normalized = text
+        replacements = {
+            'fees': 'fare',
+            'fee': 'fare',
+            'charges': 'cost',
+            'charge': 'cost',
+            'ticket price': 'fare',
+            'ticket fare': 'fare',
+            'bus ticket': 'bus fare',
+        }
+        # Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
+        lower = normalized.lower()
+        for old, new in replacements.items():
+            lower = lower.replace(old, new)
+        return lower
+    def translate_response(self, response: Dict[str, Any], target_language: str = None) -> Dict[str, Any]:
+        """Translate response back to the detected language"""
+        translated_response = response.copy()
+        # Determine target language from translation_info if not provided
+        if target_language is None and 'translation_info' in response:
+            translation_info = response['translation_info']
+            if translation_info.get('detected_language'):
+                detected_lang = translation_info['detected_language']
+                if detected_lang == 'sinhala':
+                    target_language = 'si'
+                elif detected_lang == 'tamil':
+                    target_language = 'ta'
+                else:
+                    target_language = 'si'  # Default to Sinhala
+            else:
+                target_language = 'si'  # Default to Sinhala
+        elif target_language is None:
+            target_language = 'si'  # Default to Sinhala
+        # Translate the main message
+        if 'message' in response:
+            translated_response['message'] = self.translate_text(
+                response['message'], target_language, 'en'
+            )
+        # Translate suggestions if any
+        if 'suggestions' in response and response['suggestions']:
+            translated_response['suggestions'] = [
+                self.translate_text(suggestion, target_language, 'en')
+                for suggestion in response['suggestions']
+            ]
+        # Translate corrections if any
+        if 'corrections' in response and response['corrections']:
+            translated_corrections = []
+            for correction in response['corrections']:
+                translated_correction = correction.copy()
+                if 'original' in correction:
+                    translated_correction['original'] = self.translate_text(
+                        correction['original'], target_language, 'en'
+                    )
+                if 'corrected' in correction:
+                    translated_correction['corrected'] = self.translate_text(
+                        correction['corrected'], target_language, 'en'
+                    )
+                translated_corrections.append(translated_correction)
+            translated_response['corrections'] = translated_corrections
+        # Add translation metadata
+        translated_response['translation_info'] = {
+            'translated': True,
+            'target_language': target_language,
+            'translation_method': 'llm' if self.openai_api_key else 'dictionary'
+        }
+        return translated_response
+    def get_sinhala_examples(self) -> Dict[str, Any]:
+        """Get example queries in Sinhala"""
+        sinhala_examples = {
+            'fare_queries': [
+                {
+                    'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
+                    'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
+                },
+                {
+                    'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
+                    'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
+                },
+                {
+                    'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
+                    'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
+                }
+            ],
+            'comparison_queries': [
+                {
+                    'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
+                    'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
+                },
+                {
+                    'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
+                    'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
+                }
+            ],
+            'range_queries': [
+                {
+                    'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
+                    'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
+                },
+                {
+                    'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
+                    'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
+                }
+            ],
+            'recommendation_queries': [
+                {
+                    'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
+                    'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
+                },
+                {
+                    'query': 'ප්‍රසිද්ධ ගමනාන්ත පෙන්වන්න',
+                    'description': 'ප්‍රසිද්ධ ගමනාන්ත සොයන්න'
+                }
+            ],
+            'statistical_queries': [
+                {
+                    'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
+                    'description': 'සාමාන්‍ය ගාස්තුව සොයන්න'
+                },
+                {
+                    'query': 'දත්ත ගබඩා සංඛ්‍යාලේඛන',
+                    'description': 'දත්ත ගබඩා සංඛ්‍යාලේඛන සොයන්න'
+                }
+            ]
+        }
+        return sinhala_examples
+    def get_tamil_examples(self) -> Dict[str, Any]:
+        """Get example queries in Tamil"""
+        tamil_examples = {
+            'fare_queries': [
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
+                    'description': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் கண்டுபிடி'
+                },
+                {
+                    'query': 'மாத்தறை இருந்து காலி வரை விலை எவ்வளவு?',
+                    'description': 'மாத்தறை இருந்து காலி வரை விலை கண்டுபிடி'
+                },
+                {
+                    'query': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம்',
+                    'description': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம் கண்டுபிடி'
+                }
+            ],
+            'comparison_queries': [
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு',
+                    'description': 'வெவ்வேறு பாதைகளின் கட்டணம் ஒப்பிடு'
+                },
+                {
+                    'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து அனுராதபுரம் வரை கட்டணத்தின் வித்தியாசம் எவ்வளவு?',
+                    'description': 'இரண்டு பாதைகளின் கட்டண வித்தியாசம் கண்டுபிடி'
+                }
+            ],
+            'range_queries': [
+                {
+                    'query': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி',
+                    'description': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி'
+                },
+                {
+                    'query': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை காட்டு',
+                    'description': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை கண்டுபிடி'
+                }
+            ],
+            'recommendation_queries': [
+                {
+                    'query': 'குறைந்த விலையில் பாதைகளை பரிந்துரை',
+                    'description': 'குறைந்த விலையில் பாதைகளை பரிந்துரை'
+                },
+                {
+                    'query': 'பிரபலமான இலக்குகளை காட்டு',
+                    'description': 'பிரபலமான இலக்குகளை கண்டுபிடி'
+                }
+            ],
+            'statistical_queries': [
+                {
+                    'query': 'சராசரி கட்டணம் எவ்வளவு?',
+                    'description': 'சராசரி கட்டணம் கண்டுபிடி'
+                },
+                {
+                    'query': 'தரவு சேமிப்பக புள்ளிவிவரங்கள்',
+                    'description': 'தரவு சேமிப்பக புள்ளிவிவரங்கள் கண்டுபிடி'
+                }
+            ]
+        }
+        return tamil_examples
+    def test_translation(self) -> Dict[str, Any]:
+        """Test translation functionality on transportation-related queries in multiple languages."""
+        test_cases = [
+            # Sinhala test cases
+            {
+                'language': 'sinhala',
+                'original': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
+                'expected_english': 'What is the bus fare from Colombo to Kandy?'
+            },
+            {
+                'language': 'sinhala',
+                'original': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
+                'expected_english': 'How much is the price from Matara to Galle?'
+            },
+            {
+                'language': 'sinhala',
+                'original': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
+                'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
+            },
+            {
+                'language': 'sinhala',
+                'original': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
+                'expected_english': 'Show routes with fares under 500 rupees.'
+            },
+            {
+                'language': 'sinhala',
+                'original': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
+                'expected_english': 'Recommend cheap routes.'
+            },
+            # Tamil test cases
+            {
+                'language': 'tamil',
+                'original': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
+                'expected_english': 'What is the bus fare from Colombo to Kandy?'
+            },
+            {
+                'language': 'tamil',
+                'original': 'மாத்தறை இருந்து காலி வரை விலை எவ்வளவு?',
+                'expected_english': 'How much is the price from Matara to Galle?'
+            },
+            {
+                'language': 'tamil',
+                'original': 'கொழும்பு இருந்து பனதுரை வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.',
+                'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
+            },
+            {
+                'language': 'tamil',
+                'original': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை காட்டு.',
+                'expected_english': 'Show routes with fares under 500 rupees.'
+            },
+            {
+                'language': 'tamil',
+                'original': 'குறைந்த விலையில் பாதைகளை பரிந்துரை.',
+                'expected_english': 'Recommend cheap routes.'
+            },
+            # Singlish test cases
+            {
+                'language': 'singlish',
+                'original': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
+                'expected_english': 'What is the fare from Colombo to Kandy?'
+            },
+            {
+                'language': 'singlish',
+                'original': 'Colombo සිට ගාල්ලට bus fare කීයද?',
+                'expected_english': 'What is the bus fare from Colombo to Galle?'
+            },
+            {
+                'language': 'singlish',
+                'original': 'කොළඹ සිට Panadura සහ Colombo සිට Galle fares compare කරන්න.',
+                'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
+            },
+            # English test cases
+            {
+                'language': 'english',
+                'original': 'What is the fare from Colombo to Kandy?',
+                'expected_english': 'What is the fare from Colombo to Kandy?'
+            },
+            {
+                'language': 'english',
+                'original': 'Show me routes from Panadura',
+                'expected_english': 'Show me routes from Panadura'
+            }
+        ]
+        results = []
+        total_exact = 0
+        total_good = 0
+        total_tests = len(test_cases)
+        for test_case in test_cases:
+            original = test_case['original']
+            expected = test_case['expected_english']
+            language = test_case['language']
+            # Detect language
+            detection_result = self.language_detector.detect_language(original)
+            detected_language = detection_result['language']
+            # Reset method tracker and translate
+            self.last_translation_method = None
+            translated = self.translate_text(original, 'en', 'auto') or ''
+            tr = translated.strip()
+            ex = expected.strip()
+            tr_low = tr.lower()
+            ex_low = ex.lower()
+            # Accuracy heuristic
+            if tr_low == ex_low:
+                accuracy = 'exact'
+                total_exact += 1
+                total_good += 1
+            elif tr_low in ex_low or ex_low in tr_low:
+                accuracy = 'good'
+                total_good += 1
+            else:
+                accuracy = 'partial'
+            # Intent preservation check for comparisons
+            intent_preserved = True
+            if language in ['sinhala', 'tamil'] and ('සසඳ' in original or 'ஒப்பிடு' in original):
+                intent_preserved = ('compare' in tr_low)
+            results.append({
+                'original_query': original,
+                'language': language,
+                'detected_language': detected_language,
+                'translated_english': tr,
+                'expected_english': ex,
+                'translation_accuracy': accuracy,
+                'intent_preserved': intent_preserved,
+                'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary'),
+                'detection_confidence': detection_result['confidence']
+            })
+        summary = {
+            'total_tests': total_tests,
+            'exact_matches': total_exact,
+            'good_or_better': total_good,
+            'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
+        }
+        self.logger.info(f"Translation test summary: {summary}")
+        return {
+            'translation_service_status': 'active',
+            'supported_languages': ['sinhala', 'tamil', 'singlish', 'english'],
+            'available_methods': {
+                'llm': self.openai_api_key is not None,
+                'libre_translate': True,
+                'mymemory': True,
+                'dictionary': True
+            },
+            'summary': summary,
+            'test_results': results
+        }