Spaces:

PBJ
/

Toxic-Comment-Classification

Sleeping

App Files Files Community

PBJ commited on Dec 29, 2022

Commit

35a51d6

1 Parent(s): bf04682

Upload 2 files

Browse files

Files changed (2) hide show

app.py +7 -7
dsbert_toxic_balanced.pt +3 -0

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ comment_input = []
 comment_input.append(comment)
 test_df = pd.DataFrame()
 test_df['comment_text'] = comment_input
-cols = {'toxic':[0], 'severe_toxic':[0], 'obscene':[0], 'threat':[0], 'insult':[0], 'identity_hate':[0]}
 for key in cols.keys():
     test_df[key] = cols[key]
 test_df = test_df.reset_index()
@@ -90,7 +90,7 @@ Test_data = Toxic_Dataset(X_test, Y_test)
 Test_Loader = DataLoader(Test_data, shuffle=False)
 # Loading pre-trained weights of DistilBert model for sequence classification
-# and changing classifiers output to 6 because we have 6 labels to classify.
 # DistilBERT
 from transformers import DistilBertForSequenceClassification
@@ -98,7 +98,7 @@ from transformers import DistilBertForSequenceClassification
 Distil_bert = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
 Distil_bert.classifier = nn.Sequential(
-                    nn.Linear(768,6),
                     nn.Sigmoid()
                   )
 # print(Distil_bert)
@@ -106,7 +106,7 @@ Distil_bert.classifier = nn.Sequential(
 # Instantiating the model and loading the weights
 model = Distil_bert
 model.to('cpu')
-model = torch.load('dsbert_toxic.pt', map_location=torch.device('cpu'))
 # Making Predictions
 for comments, labels in Test_Loader:
@@ -119,7 +119,7 @@ for comments, labels in Test_Loader:
     op = output.logits
     res = []
-    for i in range(6):
         res.append(op[0, i])
     # print(res)
@@ -128,10 +128,10 @@ preds = []
 for i in range(len(res)):
     preds.append(res[i].tolist())
-classes = ['Toxic', 'Severe Toxic', 'Obscene', 'Threat', 'Insult', 'Identity Hate']
 if st.button('Classify'):
     for i in range(len(res)):
         st.write(f"{classes[i]} : {round(preds[i], 2)}\n")
     st.success('These are the outputs')

 comment_input.append(comment)
 test_df = pd.DataFrame()
 test_df['comment_text'] = comment_input
+cols = {'toxic':[0], 'severe_toxic':[0], 'obscene':[0], 'threat':[0], 'insult':[0], 'identity_hate':[0], 'non_toxic': [0]}
 for key in cols.keys():
     test_df[key] = cols[key]
 test_df = test_df.reset_index()
 Test_Loader = DataLoader(Test_data, shuffle=False)
 # Loading pre-trained weights of DistilBert model for sequence classification
+# and changing classifiers output to 7 because we have 7 labels to classify.
 # DistilBERT
 from transformers import DistilBertForSequenceClassification
 Distil_bert = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
 Distil_bert.classifier = nn.Sequential(
+                    nn.Linear(768,7),
                     nn.Sigmoid()
                   )
 # print(Distil_bert)
 # Instantiating the model and loading the weights
 model = Distil_bert
 model.to('cpu')
+model = torch.load('dsbert_toxic_balanced.pt', map_location=torch.device('cpu'))
 # Making Predictions
 for comments, labels in Test_Loader:
     op = output.logits
     res = []
+    for i in range(7):
         res.append(op[0, i])
     # print(res)
 for i in range(len(res)):
     preds.append(res[i].tolist())
+classes = ['Toxic', 'Severe Toxic', 'Obscene', 'Threat', 'Insult', 'Identity Hate', 'Non Toxic']
 if st.button('Classify'):
     for i in range(len(res)):
         st.write(f"{classes[i]} : {round(preds[i], 2)}\n")
     st.success('These are the outputs')

dsbert_toxic_balanced.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3bb55eaba141c4c5582838e502074b3c9bcff689321d85b3a3eff211b274c93
+size 267889455