kimic
/

fake-news-detector-LSTM

Model card Files Files and versions Community

fake-news-detector-LSTM / inference_main.py

kimic's picture

Initial commit

c5cd586 8 months ago

No virus

2.19 kB

	import torch
	import pandas as pd
	from preprocessing import preprocess_text, load_tokenizer, prepare_data
	from data_loader import create_data_loader
	from inference import load_model, evaluate_model

	version = 7


	def run_evaluation(model_path, tokenizer_path, device):
	cleaned_path = f'./output/version_{version}/cleaned_inference_data_{version}.csv'
	# Load data
	try:
	df = pd.read_csv(cleaned_path)
	df.dropna(inplace=True)
	print("Cleaned data found.")
	except:
	print("No cleaned data found. Cleaning data now...")
	# Load the datasets
	true_news = pd.read_csv('data_1/True.csv')
	fake_news = pd.read_csv('data_1/Fake.csv')

	# Add labels
	true_news['label'] = 1
	fake_news['label'] = 0

	# Combine the datasets
	df = pd.concat([true_news, fake_news], ignore_index=True)

	# Drop unnecessary columns
	df.drop(columns=['subject', 'date'], inplace=True)

	df['title'] = df['title'].apply(preprocess_text)
	df['text'] = df['text'].apply(preprocess_text)

	df.to_csv(cleaned_path, index=False)
	df.dropna(inplace=True)
	print("Cleaned data saved.")

	labels = df['label'].values

	# Load tokenizer and model
	tokenizer = load_tokenizer(tokenizer_path)
	model = load_model(model_path, len(tokenizer.word_index) + 1)

	# Prepare data
	titles = prepare_data(df['title'], tokenizer)
	texts = prepare_data(df['text'], tokenizer)

	# Create DataLoader
	data_loader = create_data_loader(
	titles, texts, batch_size=32, shuffle=False)

	# Evaluate
	accuracy, f1, auc_roc = evaluate_model(model, data_loader, device, labels)
	return accuracy, f1, auc_roc


	if __name__ == "__main__":
	model_path = f'./output/version_{version}/best_model_{version}.pth'
	tokenizer_path = f'./output/version_{version}/tokenizer_{version}.pickle'
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Device: {device}")

	accuracy, f1, auc_roc = run_evaluation(model_path, tokenizer_path, device)
	print(
	f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, AUC-ROC: {auc_roc:.4f}')