""" Comprehensive testing suite for rmtariq/multilingual-emotion-classifier This script provides various testing capabilities for the emotion classification model. Usage: python test_model.py --test-type [quick|comprehensive|interactive|benchmark] Author: rmtariq Repository: https://huggingface.co/rmtariq/multilingual-emotion-classifier """ import argparse import time from transformers import pipeline import torch class EmotionModelTester: """Comprehensive testing suite for the multilingual emotion classifier""" def __init__(self, model_name="rmtariq/multilingual-emotion-classifier"): self.model_name = model_name self.classifier = None self.load_model() def load_model(self): """Load the emotion classification model""" print(f"šŸ“„ Loading model: {self.model_name}") try: self.classifier = pipeline( "text-classification", model=self.model_name, device=0 if torch.cuda.is_available() else -1 ) device = "GPU" if torch.cuda.is_available() else "CPU" print(f"āœ… Model loaded successfully on {device}") except Exception as e: print(f"āŒ Error loading model: {e}") raise def quick_test(self): """Quick test with essential examples""" print("\nšŸš€ QUICK TEST") print("=" * 50) test_cases = [ # English examples ("I am so happy today!", "happy", "šŸ‡¬šŸ‡§"), ("This makes me really angry!", "anger", "šŸ‡¬šŸ‡§"), ("I love you so much!", "love", "šŸ‡¬šŸ‡§"), ("I'm scared of spiders", "fear", "šŸ‡¬šŸ‡§"), ("This news makes me sad", "sadness", "šŸ‡¬šŸ‡§"), ("What a surprise!", "surprise", "šŸ‡¬šŸ‡§"), # Malay examples ("Saya sangat gembira!", "happy", "šŸ‡²šŸ‡¾"), ("Aku marah dengan keadaan ini", "anger", "šŸ‡²šŸ‡¾"), ("Aku sayang kamu", "love", "šŸ‡²šŸ‡¾"), ("Saya takut dengan ini", "fear", "šŸ‡²šŸ‡¾"), # Previously problematic cases (now fixed) ("Ini adalah hari jadi terbaik", "happy", "šŸ‡²šŸ‡¾"), ("Terbaik!", "happy", "šŸ‡²šŸ‡¾"), ("Ini adalah hari yang baik", "happy", "šŸ‡²šŸ‡¾") ] correct = 0 total = len(test_cases) for i, (text, expected, flag) in enumerate(test_cases, 1): result = self.classifier(text) predicted = result[0]['label'].lower() confidence = result[0]['score'] is_correct = predicted == expected if is_correct: correct += 1 status = "āœ…" if is_correct else "āŒ" print(f"{i:2d}. {status} {flag} '{text[:40]}...'") print(f" → {predicted} ({confidence:.1%}) [Expected: {expected}]") accuracy = correct / total print(f"\nšŸ“Š Quick Test Results: {accuracy:.1%} ({correct}/{total})") if accuracy >= 0.9: print("šŸŽ‰ EXCELLENT! Model performing at high level!") elif accuracy >= 0.8: print("šŸ‘ GOOD! Model performing well!") else: print("āš ļø NEEDS ATTENTION. Some issues detected.") return accuracy def comprehensive_test(self): """Comprehensive test covering all aspects""" print("\nšŸ”¬ COMPREHENSIVE TEST") print("=" * 50) # Test categories test_categories = { "English Basic": [ ("I feel fantastic today!", "happy"), ("I'm furious about this!", "anger"), ("I adore this place!", "love"), ("I'm terrified of heights", "fear"), ("I'm heartbroken", "sadness"), ("I can't believe it!", "surprise") ], "Malay Basic": [ ("Gembira sangat hari ini", "happy"), ("Marah betul dengan dia", "anger"), ("Sayang sangat kat kamu", "love"), ("Takut gila dengan benda tu", "fear"), ("Sedih betul dengar berita", "sadness"), ("Terkejut dengan kejadian", "surprise") ], "Malay Fixed Issues": [ ("Ini adalah hari jadi terbaik", "happy"), ("Hari jadi terbaik saya", "happy"), ("Terbaik!", "happy"), ("Hari yang baik", "happy"), ("Pengalaman terbaik", "happy"), ("Masa terbaik", "happy") ], "Edge Cases": [ ("Happy birthday!", "happy"), ("Best day ever!", "happy"), ("Good news!", "happy"), ("Selamat hari jadi", "happy"), ("Berita baik", "happy"), ("Hasil terbaik", "happy") ] } overall_correct = 0 overall_total = 0 for category, cases in test_categories.items(): print(f"\nšŸ“‹ {category}:") print("-" * 30) category_correct = 0 for text, expected in cases: result = self.classifier(text) predicted = result[0]['label'].lower() confidence = result[0]['score'] is_correct = predicted == expected if is_correct: category_correct += 1 overall_correct += 1 overall_total += 1 status = "āœ…" if is_correct else "āŒ" print(f" {status} '{text[:35]}...' → {predicted} ({confidence:.1%})") category_accuracy = category_correct / len(cases) print(f" šŸ“Š {category} Accuracy: {category_accuracy:.1%}") overall_accuracy = overall_correct / overall_total print(f"\nšŸ“Š COMPREHENSIVE TEST RESULTS:") print(f"āœ… Overall Accuracy: {overall_accuracy:.1%} ({overall_correct}/{overall_total})") return overall_accuracy def interactive_test(self): """Interactive testing mode""" print("\nšŸŽ® INTERACTIVE TEST MODE") print("=" * 50) print("Enter text to classify emotions (type 'quit' to exit)") print("Supported emotions: anger, fear, happy, love, sadness, surprise") print() while True: try: text = input("šŸ’¬ Your text: ").strip() if text.lower() in ['quit', 'exit', 'q']: print("šŸ‘‹ Goodbye!") break if not text: continue result = self.classifier(text) predicted = result[0]['label'].lower() confidence = result[0]['score'] # Get emoji for emotion emotion_emojis = { 'anger': '😠', 'fear': '😨', 'happy': '😊', 'love': 'ā¤ļø', 'sadness': '😢', 'surprise': '😲' } emoji = emotion_emojis.get(predicted, 'šŸ¤”') confidence_level = "šŸ’Ŗ High" if confidence > 0.9 else "šŸ‘ Good" if confidence > 0.7 else "āš ļø Low" print(f"šŸŽ­ Result: {emoji} {predicted}") print(f"šŸ“Š Confidence: {confidence:.1%}") print(f"šŸ’Ŗ {confidence_level} confidence!") print() except KeyboardInterrupt: print("\nšŸ‘‹ Goodbye!") break except Exception as e: print(f"āŒ Error: {e}") def benchmark_test(self): """Performance benchmark test""" print("\n⚔ BENCHMARK TEST") print("=" * 50) # Test texts for benchmarking benchmark_texts = [ "I am so happy today!", "This makes me angry!", "I love this!", "I'm scared!", "This is sad news", "What a surprise!", "Saya gembira!", "Aku marah!", "Sayang betul!", "Takut sangat!" ] * 10 # 100 predictions total print(f"šŸ”„ Running {len(benchmark_texts)} predictions...") start_time = time.time() for text in benchmark_texts: _ = self.classifier(text) end_time = time.time() total_time = end_time - start_time avg_time = total_time / len(benchmark_texts) predictions_per_second = len(benchmark_texts) / total_time print(f"šŸ“Š BENCHMARK RESULTS:") print(f"ā±ļø Total time: {total_time:.2f} seconds") print(f"⚔ Average per prediction: {avg_time*1000:.1f} ms") print(f"šŸš€ Predictions per second: {predictions_per_second:.1f}") if predictions_per_second > 10: print("šŸŽ‰ EXCELLENT! Very fast performance!") elif predictions_per_second > 5: print("šŸ‘ GOOD! Acceptable performance!") else: print("āš ļø SLOW. Consider optimization.") return predictions_per_second def main(): """Main testing function""" parser = argparse.ArgumentParser(description="Test the multilingual emotion classifier") parser.add_argument( "--test-type", choices=["quick", "comprehensive", "interactive", "benchmark", "all"], default="quick", help="Type of test to run" ) parser.add_argument( "--model", default="rmtariq/multilingual-emotion-classifier", help="Model name or path" ) args = parser.parse_args() print("šŸŽ­ MULTILINGUAL EMOTION CLASSIFIER TESTING SUITE") print("=" * 60) print(f"Model: {args.model}") print(f"Test Type: {args.test_type}") try: tester = EmotionModelTester(args.model) if args.test_type == "quick": tester.quick_test() elif args.test_type == "comprehensive": tester.comprehensive_test() elif args.test_type == "interactive": tester.interactive_test() elif args.test_type == "benchmark": tester.benchmark_test() elif args.test_type == "all": print("šŸ”„ Running all tests...") tester.quick_test() tester.comprehensive_test() tester.benchmark_test() print("\nšŸŽ® Starting interactive mode...") tester.interactive_test() except Exception as e: print(f"āŒ Testing failed: {e}") return 1 return 0 if __name__ == "__main__": exit(main())