fake-news-detector-LSTM / inference_main.py
kimic's picture
Initial commit
c5cd586
raw
history blame
No virus
2.19 kB
import torch
import pandas as pd
from preprocessing import preprocess_text, load_tokenizer, prepare_data
from data_loader import create_data_loader
from inference import load_model, evaluate_model
version = 7
def run_evaluation(model_path, tokenizer_path, device):
cleaned_path = f'./output/version_{version}/cleaned_inference_data_{version}.csv'
# Load data
try:
df = pd.read_csv(cleaned_path)
df.dropna(inplace=True)
print("Cleaned data found.")
except:
print("No cleaned data found. Cleaning data now...")
# Load the datasets
true_news = pd.read_csv('data_1/True.csv')
fake_news = pd.read_csv('data_1/Fake.csv')
# Add labels
true_news['label'] = 1
fake_news['label'] = 0
# Combine the datasets
df = pd.concat([true_news, fake_news], ignore_index=True)
# Drop unnecessary columns
df.drop(columns=['subject', 'date'], inplace=True)
df['title'] = df['title'].apply(preprocess_text)
df['text'] = df['text'].apply(preprocess_text)
df.to_csv(cleaned_path, index=False)
df.dropna(inplace=True)
print("Cleaned data saved.")
labels = df['label'].values
# Load tokenizer and model
tokenizer = load_tokenizer(tokenizer_path)
model = load_model(model_path, len(tokenizer.word_index) + 1)
# Prepare data
titles = prepare_data(df['title'], tokenizer)
texts = prepare_data(df['text'], tokenizer)
# Create DataLoader
data_loader = create_data_loader(
titles, texts, batch_size=32, shuffle=False)
# Evaluate
accuracy, f1, auc_roc = evaluate_model(model, data_loader, device, labels)
return accuracy, f1, auc_roc
if __name__ == "__main__":
model_path = f'./output/version_{version}/best_model_{version}.pth'
tokenizer_path = f'./output/version_{version}/tokenizer_{version}.pickle'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
accuracy, f1, auc_roc = run_evaluation(model_path, tokenizer_path, device)
print(
f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, AUC-ROC: {auc_roc:.4f}')