Spaces:

Thanarit
/

GPT-Detection-Demo

Running

App Files Files Community

ThanaritKanjanametawat commited on Feb 26, 2024

Commit

2bb8a76

1 Parent(s): f1fd352

Change UI Options (1model, 3datasets) for Senior Project

Browse files

Files changed (5) hide show

ClassifierCheckpoint/RobertaClassifierCHEAT256.pth +3 -0
ClassifierCheckpoint/{RobertaClassifierCSAbstract.pth → RobertaClassifierGPABenchmark512.pth} +0 -0
ClassifierCheckpoint/{RobertaClassifierOpenGPT.pth → RobertaClassifierOpenGPT512.pth} +0 -0
ModelDriver.py +25 -5
app.py +21 -17

ClassifierCheckpoint/RobertaClassifierCHEAT256.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:330a021e57adfb3261f338f3372f8d22a5e60350b4f62ecabae113346ce46ca0
+size 498675543

ClassifierCheckpoint/{RobertaClassifierCSAbstract.pth → RobertaClassifierGPABenchmark512.pth} RENAMED Viewed

File without changes

ClassifierCheckpoint/{RobertaClassifierOpenGPT.pth → RobertaClassifierOpenGPT512.pth} RENAMED Viewed

File without changes

ModelDriver.py CHANGED Viewed

@@ -60,9 +60,9 @@ def RobertaSentinelCSAbstractInference(input_text):
 def RobertaClassifierOpenGPTInference(input_text):
     tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
-    model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth"
     model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
-    model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
     model = model.to(device)
     model.eval()
@@ -80,11 +80,11 @@ def RobertaClassifierOpenGPTInference(input_text):
     return Probs
-def RobertaClassifierCSAbstractInference(input_text):
     tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
-    model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth"
     model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
-    model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
     model = model.to(device)
     model.eval()
@@ -101,5 +101,25 @@ def RobertaClassifierCSAbstractInference(input_text):
     return Probs

 def RobertaClassifierOpenGPTInference(input_text):
     tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+    model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT512.pth"
     model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
+    model.load_state_dict(torch.load(model_path, map_location=device))
     model = model.to(device)
     model.eval()
     return Probs
+def RobertaClassifierGPABenchmarkInference(input_text):
     tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+    model_path = "ClassifierCheckpoint/RobertaClassifierGPABenchmark512.pth"
     model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
+    model.load_state_dict(torch.load(model_path, map_location=device))
     model = model.to(device)
     model.eval()
     return Probs
+def RobertaClassifierCHEATInference(input_text):
+    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
+    model_path = "ClassifierCheckpoint/RobertaClassifierCHEAT256.pth"
+    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model = model.to(device)
+    model.eval()
+    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=256, return_tensors='pt')
+    input_ids = tokenized_input['input_ids'].to(device)
+    attention_mask = tokenized_input['attention_mask'].to(device)
+    # Make a prediction
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+    logits = outputs.logits
+    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
+    return Probs

app.py CHANGED Viewed

@@ -5,39 +5,43 @@ import numpy as np
 # Add a title
 st.title('GPT Detection Demo')
-st.write("This is a demo for GPT detection. You can use this demo to test the model. The model is trained on two datasets: OpenGPT and CSAbstract. You can choose the model and dataset in the sidebar.")
-st.write("Reference on how we built Roberta Sentinel: https://arxiv.org/abs/2305.07969")
 # Add 4 options for 4 models
 ModelOption = st.sidebar.selectbox(
     'Which Model do you want to use?',
-    ('RobertaSentinel', 'RobertaClassifier'),
 )
 DatasetOption = st.sidebar.selectbox(
     'Which Dataset the model was trained on?',
-    ('OpenGPT', 'CSAbstract'),
 )
-text = st.text_area('Enter text here (max 500 words)', '')
 if st.button('Generate'):
-    if ModelOption == 'RobertaSentinel':
-        if DatasetOption == 'OpenGPT':
-            result = RobertaSentinelOpenGPTInference(text)
-            st.write("Model: RobertaSentinelOpenGPT")
-        elif DatasetOption == 'CSAbstract':
-            result = RobertaSentinelCSAbstractInference(text)
-            st.write("Model: RobertaSentinelCSAbstract")
-    elif ModelOption == 'RobertaClassifier':
         if DatasetOption == 'OpenGPT':
             result = RobertaClassifierOpenGPTInference(text)
             st.write("Model: RobertaClassifierOpenGPT")
-        elif DatasetOption == 'CSAbstract':
-            result = RobertaClassifierCSAbstractInference(text)
-            st.write("Model: RobertaClassifierCSAbstract")
     Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"

 # Add a title
 st.title('GPT Detection Demo')
+st.write("This is a demo for GPT detection. You can use this demo to test the model. There are 3 variations of the model, The model was trained on CHEAT, GPABenchmark, OpenGPT datasets. They are all in the domain of Scientific Abstract. You can choose dataset variation of the model on the sidebar.")
+# st.write("Reference on how we built Roberta Sentinel: https://arxiv.org/abs/2305.07969")
 # Add 4 options for 4 models
 ModelOption = st.sidebar.selectbox(
     'Which Model do you want to use?',
+    ('RobertaClassifier'),
 )
 DatasetOption = st.sidebar.selectbox(
     'Which Dataset the model was trained on?',
+    ('OpenGPT', 'GPABenchmark', 'CHEAT'),
 )
+text = st.text_area('Enter text here (max 512 words)', '')
 if st.button('Generate'):
+    # if ModelOption == 'RobertaSentinel':
+    #     if DatasetOption == 'OpenGPT':
+    #         result = RobertaSentinelOpenGPTInference(text)
+    #         st.write("Model: RobertaSentinelOpenGPT")
+    #     elif DatasetOption == 'CSAbstract':
+    #         result = RobertaSentinelCSAbstractInference(text)
+    #         st.write("Model: RobertaSentinelCSAbstract")
+    if ModelOption == 'RobertaClassifier':
         if DatasetOption == 'OpenGPT':
             result = RobertaClassifierOpenGPTInference(text)
             st.write("Model: RobertaClassifierOpenGPT")
+        elif DatasetOption == 'GPABenchmark':
+            result = RobertaClassifierGPABenchmarkInference(text)
+            st.write("Model: RobertaClassifierGPABenchmark")
+        elif DatasetOption == 'CHEAT':
+            result = RobertaClassifierCHEATInference(text)
+            st.write("Model: RobertaClassifierCHEAT")
     Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"