import torch import pandas as pd from preprocessing import preprocess_text, load_tokenizer, prepare_data from data_loader import create_data_loader from inference import load_model, evaluate_model version = 7 def run_evaluation(model_path, tokenizer_path, device): cleaned_path = f'./output/version_{version}/cleaned_inference_data_{version}.csv' # Load data try: df = pd.read_csv(cleaned_path) df.dropna(inplace=True) print("Cleaned data found.") except: print("No cleaned data found. Cleaning data now...") # Load the datasets true_news = pd.read_csv('data_1/True.csv') fake_news = pd.read_csv('data_1/Fake.csv') # Add labels true_news['label'] = 1 fake_news['label'] = 0 # Combine the datasets df = pd.concat([true_news, fake_news], ignore_index=True) # Drop unnecessary columns df.drop(columns=['subject', 'date'], inplace=True) df['title'] = df['title'].apply(preprocess_text) df['text'] = df['text'].apply(preprocess_text) df.to_csv(cleaned_path, index=False) df.dropna(inplace=True) print("Cleaned data saved.") labels = df['label'].values # Load tokenizer and model tokenizer = load_tokenizer(tokenizer_path) model = load_model(model_path, len(tokenizer.word_index) + 1) # Prepare data titles = prepare_data(df['title'], tokenizer) texts = prepare_data(df['text'], tokenizer) # Create DataLoader data_loader = create_data_loader( titles, texts, batch_size=32, shuffle=False) # Evaluate accuracy, f1, auc_roc = evaluate_model(model, data_loader, device, labels) return accuracy, f1, auc_roc if __name__ == "__main__": model_path = f'./output/version_{version}/best_model_{version}.pth' tokenizer_path = f'./output/version_{version}/tokenizer_{version}.pickle' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device: {device}") accuracy, f1, auc_roc = run_evaluation(model_path, tokenizer_path, device) print( f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, AUC-ROC: {auc_roc:.4f}')