| |
| """ |
| Inference script for Pulse Core 1 - Vietnamese Sentiment Analysis System. |
| Loads trained sentiment models from local files and performs predictions. |
| Supports both VLSP2016 general sentiment and UTS2017_Bank aspect sentiment models. |
| """ |
|
|
| import argparse |
| import joblib |
| import os |
| import glob |
|
|
|
|
| def find_local_models(): |
| """Find all available local sentiment model files""" |
| models = { |
| 'exported': {}, |
| 'runs': {} |
| } |
|
|
| |
| for filename in os.listdir('.'): |
| if filename.endswith('.joblib'): |
| if filename.startswith('vlsp2016_sentiment_'): |
| models['exported']['vlsp2016_sentiment'] = filename |
| elif filename.startswith('uts2017_sentiment_'): |
| models['exported']['uts2017_sentiment'] = filename |
|
|
| |
| vlsp_runs = glob.glob('runs/*/models/VLSP2016_Sentiment_*.joblib') |
| uts_runs = glob.glob('runs/*/models/UTS2017_Bank_AspectSentiment_*.joblib') |
|
|
| if vlsp_runs: |
| |
| vlsp_runs.sort(key=lambda x: os.path.getmtime(x), reverse=True) |
| |
| svc_models = [m for m in vlsp_runs if 'SVC' in m] |
| if svc_models: |
| models['runs']['vlsp2016_sentiment'] = svc_models[0] |
| else: |
| models['runs']['vlsp2016_sentiment'] = vlsp_runs[0] |
|
|
| if uts_runs: |
| |
| uts_runs.sort(key=lambda x: os.path.getmtime(x), reverse=True) |
| |
| svc_models = [m for m in uts_runs if 'SVC' in m] |
| if svc_models: |
| models['runs']['uts2017_sentiment'] = svc_models[0] |
| else: |
| models['runs']['uts2017_sentiment'] = uts_runs[0] |
|
|
| return models |
|
|
|
|
| def load_model(model_path): |
| """Load a model from file path""" |
| try: |
| print(f"Loading model from: {model_path}") |
| model = joblib.load(model_path) |
| print(f"Model loaded successfully. Classes: {len(model.classes_)}") |
| return model |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| return None |
|
|
|
|
| def predict_text(model, text): |
| """Make prediction on a single text""" |
| try: |
| probabilities = model.predict_proba([text])[0] |
|
|
| |
| top_indices = probabilities.argsort()[-3:][::-1] |
| top_predictions = [] |
| for idx in top_indices: |
| category = model.classes_[idx] |
| prob = probabilities[idx] |
| top_predictions.append((category, prob)) |
|
|
| |
| prediction = top_predictions[0][0] |
| confidence = top_predictions[0][1] |
|
|
| return prediction, confidence, top_predictions |
| except Exception as e: |
| print(f"Error making prediction: {e}") |
| return None, 0, [] |
|
|
|
|
| def interactive_mode(model, dataset_name): |
| """Interactive prediction mode""" |
| print(f"\n{'='*60}") |
| if dataset_name == 'vlsp2016_sentiment': |
| print("INTERACTIVE MODE - VIETNAMESE GENERAL SENTIMENT ANALYSIS") |
| print(f"{'='*60}") |
| print("Enter Vietnamese text to analyze sentiment (type 'quit' to exit):") |
| else: |
| print("INTERACTIVE MODE - VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS") |
| print(f"{'='*60}") |
| print("Enter Vietnamese banking text to analyze aspect and sentiment (type 'quit' to exit):") |
|
|
| while True: |
| try: |
| user_input = input("\nText: ").strip() |
|
|
| if user_input.lower() in ['quit', 'exit', 'q']: |
| break |
|
|
| if not user_input: |
| continue |
|
|
| prediction, confidence, top_predictions = predict_text(model, user_input) |
|
|
| if prediction: |
| print(f"Predicted category: {prediction}") |
| print(f"Confidence: {confidence:.3f}") |
| print("Top 3 predictions:") |
| for i, (category, prob) in enumerate(top_predictions, 1): |
| print(f" {i}. {category}: {prob:.3f}") |
|
|
| except KeyboardInterrupt: |
| print("\nExiting...") |
| break |
| except Exception as e: |
| print(f"Error: {e}") |
|
|
|
|
| def test_examples(model, dataset_name): |
| """Test model with predefined examples based on dataset type""" |
| if dataset_name == 'vlsp2016_sentiment': |
| examples = [ |
| "Sản phẩm này rất tốt, tôi rất hài lòng", |
| "Chất lượng dịch vụ tệ quá", |
| "Giá cả hợp lý, có thể chấp nhận được", |
| "Nhân viên phục vụ rất nhiệt tình", |
| "Đồ ăn không ngon, sẽ không quay lại", |
| "Giao hàng nhanh chóng, đóng gói cẩn thận", |
| "Sản phẩm bình thường, không có gì đặc biệt", |
| "Rất đáng tiền, chất lượng tuyệt vời", |
| "Không như mong đợi, khá thất vọng", |
| "Dịch vụ khách hàng tốt, giải quyết nhanh chóng" |
| ] |
| print("\n" + "="*60) |
| print("TESTING VIETNAMESE GENERAL SENTIMENT ANALYSIS") |
| print("="*60) |
| else: |
| examples = [ |
| "Tôi muốn mở tài khoản tiết kiệm mới", |
| "Lãi suất vay mua nhà hiện tại quá cao", |
| "Làm thế nào để đăng ký internet banking?", |
| "Chi phí chuyển tiền ra nước ngoài rất đắt", |
| "Ngân hàng ACB có uy tín không?", |
| "Tôi cần hỗ trợ về dịch vụ ngân hàng", |
| "Thẻ tín dụng bị khóa không rõ lý do", |
| "Dịch vụ chăm sóc khách hàng rất tệ", |
| "Khuyến mãi tháng này rất hấp dẫn", |
| "Bảo mật tài khoản có được đảm bảo không?" |
| ] |
| print("\n" + "="*60) |
| print("TESTING VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS") |
| print("="*60) |
|
|
| for text in examples: |
| prediction, confidence, top_predictions = predict_text(model, text) |
|
|
| if prediction: |
| print(f"\nText: {text}") |
| print(f"Prediction: {prediction}") |
| print(f"Confidence: {confidence:.3f}") |
|
|
| |
| if confidence < 0.7: |
| print("Alternative predictions:") |
| for i, (category, prob) in enumerate(top_predictions[:3], 1): |
| print(f" {i}. {category}: {prob:.3f}") |
| print("-" * 60) |
|
|
|
|
| def list_available_models(): |
| """List all available sentiment models""" |
| models = find_local_models() |
|
|
| print("Available Vietnamese Sentiment Models:") |
| print("=" * 50) |
|
|
| if models['exported']: |
| print("\nExported Models (Project Root):") |
| for model_type, filename in models['exported'].items(): |
| file_size = os.path.getsize(filename) / (1024 * 1024) |
| dataset_type = "General Sentiment" if "vlsp2016" in model_type else "Banking Aspect Sentiment" |
| print(f" {model_type}: {filename} ({file_size:.1f}MB) - {dataset_type}") |
|
|
| if models['runs']: |
| print("\nRuns Models (Training Directory):") |
| for model_type, filepath in models['runs'].items(): |
| file_size = os.path.getsize(filepath) / (1024 * 1024) |
| dataset_type = "General Sentiment" if "vlsp2016" in model_type else "Banking Aspect Sentiment" |
| print(f" {model_type}: {filepath} ({file_size:.1f}MB) - {dataset_type}") |
|
|
| if not models['exported'] and not models['runs']: |
| print("No local sentiment models found!") |
| print("Train a model first using:") |
| print(" VLSP2016: python train.py --dataset vlsp2016 --export-model") |
| print(" UTS2017: python train.py --dataset uts2017 --export-model") |
|
|
|
|
| def main(): |
| """Main function""" |
| parser = argparse.ArgumentParser( |
| description="Inference with local Pulse Core 1 Vietnamese sentiment models" |
| ) |
| parser.add_argument( |
| "--dataset", |
| type=str, |
| choices=["vlsp2016", "uts2017", "auto"], |
| default="auto", |
| help="Dataset type to use (default: auto-detect)" |
| ) |
| parser.add_argument( |
| "--model-path", |
| type=str, |
| help="Path to specific sentiment model file" |
| ) |
| parser.add_argument( |
| "--text", |
| type=str, |
| help="Vietnamese text to analyze (if not provided, enters interactive mode)" |
| ) |
| parser.add_argument( |
| "--test-examples", |
| action="store_true", |
| help="Test with predefined examples" |
| ) |
| parser.add_argument( |
| "--list-models", |
| action="store_true", |
| help="List all available local sentiment models" |
| ) |
| parser.add_argument( |
| "--source", |
| type=str, |
| choices=["exported", "runs"], |
| default="runs", |
| help="Model source: exported files or runs directory (default: runs)" |
| ) |
|
|
| args = parser.parse_args() |
|
|
| |
| if args.list_models: |
| list_available_models() |
| return |
|
|
| |
| models = find_local_models() |
|
|
| |
| model_path = None |
| dataset_name = None |
|
|
| if args.model_path: |
| |
| model_path = args.model_path |
| |
| if 'vlsp2016' in args.model_path: |
| dataset_name = 'vlsp2016_sentiment' |
| elif 'uts2017' in args.model_path: |
| dataset_name = 'uts2017_sentiment' |
| else: |
| dataset_name = 'unknown' |
| else: |
| |
| if args.dataset == 'vlsp2016': |
| if models[args.source] and 'vlsp2016_sentiment' in models[args.source]: |
| model_path = models[args.source]['vlsp2016_sentiment'] |
| dataset_name = 'vlsp2016_sentiment' |
| print("Selected VLSP2016 general sentiment model") |
| else: |
| print("No VLSP2016 models found!") |
| list_available_models() |
| return |
| elif args.dataset == 'uts2017': |
| if models[args.source] and 'uts2017_sentiment' in models[args.source]: |
| model_path = models[args.source]['uts2017_sentiment'] |
| dataset_name = 'uts2017_sentiment' |
| print("Selected UTS2017 banking aspect sentiment model") |
| else: |
| print("No UTS2017 models found!") |
| list_available_models() |
| return |
| else: |
| |
| if models[args.source] and 'vlsp2016_sentiment' in models[args.source]: |
| model_path = models[args.source]['vlsp2016_sentiment'] |
| dataset_name = 'vlsp2016_sentiment' |
| print("Auto-selected VLSP2016 general sentiment model") |
| elif models[args.source] and 'uts2017_sentiment' in models[args.source]: |
| model_path = models[args.source]['uts2017_sentiment'] |
| dataset_name = 'uts2017_sentiment' |
| print("Auto-selected UTS2017 banking aspect sentiment model") |
| else: |
| print("No sentiment models found!") |
| list_available_models() |
| return |
|
|
| if not model_path or not os.path.exists(model_path): |
| print(f"Model file not found: {model_path}") |
| list_available_models() |
| return |
|
|
| |
| model = load_model(model_path) |
| if not model: |
| return |
|
|
| |
| if args.text: |
| |
| prediction, confidence, top_predictions = predict_text(model, args.text) |
| if prediction: |
| print(f"\nText: {args.text}") |
| print(f"Prediction: {prediction}") |
| print(f"Confidence: {confidence:.3f}") |
| print("Top 3 predictions:") |
| for i, (category, prob) in enumerate(top_predictions, 1): |
| print(f" {i}. {category}: {prob:.3f}") |
|
|
| elif args.test_examples: |
| |
| test_examples(model, dataset_name) |
|
|
| else: |
| |
| model_type = "General Sentiment" if dataset_name == 'vlsp2016_sentiment' else "Banking Aspect Sentiment" |
| print(f"Loaded {model_type} model: {os.path.basename(model_path)}") |
| test_examples(model, dataset_name) |
|
|
| |
| try: |
| response = input("\nEnter interactive mode? (y/n): ").strip().lower() |
| if response in ['y', 'yes']: |
| interactive_mode(model, dataset_name) |
| except KeyboardInterrupt: |
| print("\nExiting...") |
|
|
|
|
| if __name__ == "__main__": |
| main() |