Dark-pattern-detection / detection_model.py
Jashveenraj's picture
Upload detection_model.py
00b7179 verified
# -*- coding: utf-8 -*-
"""Detection_model.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18hnebi4AGf55vyqvnxcZhk3oJWcZEyCu
"""
import pandas as pd
df= messages = pd.read_csv('/content/dataset.tsv', sep='\t',names=["label","message"] )
df.head
df.shape
#independent feature
X=list(df['message'])
#dependent feature
Y=list(df['label'])
pd.get_dummies(Y,drop_first=True)
Y=list(pd.get_dummies(Y,drop_first=True)['label'])
Y
#train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 0)
#pip install transformers
#we use bert tokenizer for our bert base model
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_encodings = tokenizer(X_train, truncation=True, padding=True)
test_encoding = tokenizer(X_test, truncation=True, padding=True)
train_encodings
import tensorflow as tf
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings),Y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encoding),Y_test))
train_dataset
from transformers import TFBertForSequenceClassification, TFTrainer, TFTrainingArguments
# Define your training arguments
training_args = TFTrainingArguments(
output_dir="./output",
evaluation_strategy="steps", # You might also set this to "epoch"
eval_steps=None, # Set this to None if you don't want periodic evaluations
save_total_limit=2,
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
)
with training_args.strategy.scope():
model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")
trainer = TFTrainer(
model=model, #instatitaing the model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, #training dataset
eval_dataset=test_dataset #evaluation dataset
)
trainer.train()
trainer.evaluate(test_dataset)
trainer.predict(test_dataset)
trainer.predict(test_dataset)[1].shape
output=trainer.predict(test_dataset)[1]
#to create confusion matrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(Y_test,output)
cm
#saving our model
trainer.save_model('detection_model')