|
import torch |
|
import pandas as pd |
|
from preprocessing import preprocess_text, load_tokenizer, prepare_data |
|
from data_loader import create_data_loader |
|
from inference import load_model, evaluate_model |
|
|
|
version = 7 |
|
|
|
|
|
def run_evaluation(model_path, tokenizer_path, device): |
|
cleaned_path = f'./output/version_{version}/cleaned_inference_data_{version}.csv' |
|
|
|
try: |
|
df = pd.read_csv(cleaned_path) |
|
df.dropna(inplace=True) |
|
print("Cleaned data found.") |
|
except: |
|
print("No cleaned data found. Cleaning data now...") |
|
|
|
true_news = pd.read_csv('data_1/True.csv') |
|
fake_news = pd.read_csv('data_1/Fake.csv') |
|
|
|
|
|
true_news['label'] = 1 |
|
fake_news['label'] = 0 |
|
|
|
|
|
df = pd.concat([true_news, fake_news], ignore_index=True) |
|
|
|
|
|
df.drop(columns=['subject', 'date'], inplace=True) |
|
|
|
df['title'] = df['title'].apply(preprocess_text) |
|
df['text'] = df['text'].apply(preprocess_text) |
|
|
|
df.to_csv(cleaned_path, index=False) |
|
df.dropna(inplace=True) |
|
print("Cleaned data saved.") |
|
|
|
labels = df['label'].values |
|
|
|
|
|
tokenizer = load_tokenizer(tokenizer_path) |
|
model = load_model(model_path, len(tokenizer.word_index) + 1) |
|
|
|
|
|
titles = prepare_data(df['title'], tokenizer) |
|
texts = prepare_data(df['text'], tokenizer) |
|
|
|
|
|
data_loader = create_data_loader( |
|
titles, texts, batch_size=32, shuffle=False) |
|
|
|
|
|
accuracy, f1, auc_roc = evaluate_model(model, data_loader, device, labels) |
|
return accuracy, f1, auc_roc |
|
|
|
|
|
if __name__ == "__main__": |
|
model_path = f'./output/version_{version}/best_model_{version}.pth' |
|
tokenizer_path = f'./output/version_{version}/tokenizer_{version}.pickle' |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Device: {device}") |
|
|
|
accuracy, f1, auc_roc = run_evaluation(model_path, tokenizer_path, device) |
|
print( |
|
f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, AUC-ROC: {auc_roc:.4f}') |
|
|