Jashveenraj
/

Dark-pattern-detection

Text Classification

Inference Endpoints

Model card Files Files and versions Community

Dark-pattern-detection / detection_model.py

Jashveenraj's picture

Upload detection_model.py

00b7179 verified 5 months ago

raw history blame contribute delete

No virus

2.4 kB

	# -- coding: utf-8 --
	"""Detection_model.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/18hnebi4AGf55vyqvnxcZhk3oJWcZEyCu
	"""

	import pandas as pd
	df= messages = pd.read_csv('/content/dataset.tsv', sep='\t',names=["label","message"] )
	df.head

	df.shape

	#independent feature
	X=list(df['message'])

	#dependent feature
	Y=list(df['label'])

	pd.get_dummies(Y,drop_first=True)

	Y=list(pd.get_dummies(Y,drop_first=True)['label'])

	Y

	#train-test split
	from sklearn.model_selection import train_test_split
	X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 0)

	#pip install transformers

	#we use bert tokenizer for our bert base model
	from transformers import BertTokenizer
	tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

	train_encodings = tokenizer(X_train, truncation=True, padding=True)
	test_encoding = tokenizer(X_test, truncation=True, padding=True)

	train_encodings

	import tensorflow as tf

	train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings),Y_train))
	test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encoding),Y_test))

	train_dataset

	from transformers import TFBertForSequenceClassification, TFTrainer, TFTrainingArguments

	# Define your training arguments
	training_args = TFTrainingArguments(
	output_dir="./output",
	evaluation_strategy="steps", # You might also set this to "epoch"
	eval_steps=None, # Set this to None if you don't want periodic evaluations
	save_total_limit=2,
	num_train_epochs=3,
	per_device_train_batch_size=8,
	per_device_eval_batch_size=8,
	)

	with training_args.strategy.scope():
	model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")

	trainer = TFTrainer(
	model=model, #instatitaing the model to be trained
	args=training_args, # training arguments, defined above
	train_dataset=train_dataset, #training dataset
	eval_dataset=test_dataset #evaluation dataset
	)

	trainer.train()

	trainer.evaluate(test_dataset)

	trainer.predict(test_dataset)

	trainer.predict(test_dataset)[1].shape

	output=trainer.predict(test_dataset)[1]

	#to create confusion matrix
	from sklearn.metrics import confusion_matrix

	cm=confusion_matrix(Y_test,output)
	cm

	#saving our model
	trainer.save_model('detection_model')