| # Template for model evaluation script for {{phase_name}} | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from datasets import load_dataset # Example datasets library | |
| from sklearn.metrics import accuracy_score, classification_report # Example metrics | |
| import torch # Example PyTorch | |
| # Add other necessary imports | |
| def evaluate_model(model_path, dataset_path, model_name="bert-base-uncased"): | |
| """ | |
| Evaluates a trained model on a dataset. | |
| """ | |
| try: | |
| # Load dataset for evaluation (replace with your actual dataset loading) | |
| dataset = load_dataset('csv', data_files=dataset_path) # Example: CSV dataset loading, replace with your dataset format | |
| print("Evaluation dataset loaded. Loading model and tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) # Use base model tokenizer (or fine-tuned tokenizer if saved separately) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
| def tokenize_function(examples): | |
| return tokenizer(examples["text_column"], padding="max_length", truncation=True) # Example: tokenize 'text_column' | |
| tokenized_datasets = dataset.map(tokenize_function, batched=True) | |
| def compute_metrics(eval_pred): | |
| predictions, labels = eval_pred | |
| predictions = predictions.argmax(axis=-1) | |
| accuracy = accuracy_score(labels, predictions) | |
| report = classification_report(labels, predictions, output_dict=True) # Detailed report | |
| return {"accuracy": accuracy, "classification_report": report} | |
| training_args = TrainingArguments( | |
| output_dir="./evaluation_results", | |
| per_device_eval_batch_size=64, | |
| logging_dir='./eval_logs', | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| eval_dataset=tokenized_datasets["validation"], # Assuming 'validation' split exists | |
| compute_metrics=compute_metrics, | |
| tokenizer=tokenizer | |
| ) | |
| evaluation_results = trainer.evaluate() | |
| print("Model evaluation completed.") | |
| print("Evaluation Results:") | |
| print(f"Accuracy: {evaluation_results['eval_accuracy']}") | |
| print("Classification Report:\n", evaluation_results['eval_classification_report']) | |
| except FileNotFoundError: | |
| print(f"Error: Dataset file or model files not found.") | |
| except Exception as e: | |
| print(f"Error during model evaluation: {e}") | |
| if __name__ == "__main__": | |
| model_filepath = "models/fine_tuned_model" # Replace with your model path | |
| evaluation_data_filepath = "data/evaluation_dataset.csv" # Replace with your evaluation data path | |
| base_model_name = "bert-base-uncased" # Replace with your base model name | |
| evaluate_model(model_filepath, evaluation_data_filepath, model_name=base_model_name) |