Jashveenraj
/

Dark-pattern-detection

Text Classification

Inference Endpoints

Model card Files Files and versions Community

Jashveenraj commited on Jan 19

Commit

00b7179

•

1 Parent(s): 7cb23f3

Upload detection_model.py

Files changed (1) hide show

detection_model.py +90 -0

detection_model.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# -*- coding: utf-8 -*-
+"""Detection_model.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/18hnebi4AGf55vyqvnxcZhk3oJWcZEyCu
+"""
+import pandas as pd
+df= messages = pd.read_csv('/content/dataset.tsv', sep='\t',names=["label","message"] )
+df.head
+df.shape
+#independent feature
+X=list(df['message'])
+#dependent feature
+Y=list(df['label'])
+pd.get_dummies(Y,drop_first=True)
+Y=list(pd.get_dummies(Y,drop_first=True)['label'])
+Y
+#train-test split
+from sklearn.model_selection import train_test_split
+X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 0)
+#pip install transformers
+#we use bert tokenizer for our bert base model
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+train_encodings = tokenizer(X_train, truncation=True, padding=True)
+test_encoding = tokenizer(X_test, truncation=True, padding=True)
+train_encodings
+import tensorflow as tf
+train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings),Y_train))
+test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encoding),Y_test))
+train_dataset
+from transformers import TFBertForSequenceClassification, TFTrainer, TFTrainingArguments
+# Define your training arguments
+training_args = TFTrainingArguments(
+    output_dir="./output",
+    evaluation_strategy="steps",  # You might also set this to "epoch"
+    eval_steps=None,  # Set this to None if you don't want periodic evaluations
+    save_total_limit=2,
+    num_train_epochs=3,
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=8,
+)
+with training_args.strategy.scope():
+  model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")
+trainer = TFTrainer(
+    model=model,                       #instatitaing the model to be trained
+    args=training_args,                # training arguments, defined above
+    train_dataset=train_dataset,       #training dataset
+    eval_dataset=test_dataset          #evaluation dataset
+)
+trainer.train()
+trainer.evaluate(test_dataset)
+trainer.predict(test_dataset)
+trainer.predict(test_dataset)[1].shape
+output=trainer.predict(test_dataset)[1]
+#to create confusion matrix
+from sklearn.metrics import confusion_matrix
+cm=confusion_matrix(Y_test,output)
+cm
+#saving our model
+trainer.save_model('detection_model')