thankrandomness's picture
change the dataset key
0b98bc0
raw
history blame contribute delete
No virus
579 Bytes
import pandas as pd
from datasets import load_dataset, DatasetDict
# Load the dataset from Huggingface
dataset = load_dataset('thankrandomness/spam-detection-sample', data_files='spam-sample.csv')
# Split the dataset into train and validation sets
split_dataset = dataset['train'].train_test_split(test_size=0.3, seed=42)
dataset = DatasetDict({
'train': split_dataset['train'],
'validation': split_dataset['test']
})
# Convert to a pandas DataFrame
df = pd.DataFrame(dataset['train'])
# Display the first few rows of the dataframe to verify
print(df['body'].head())