Spaces:
Running
Running
ThanaritKanjanametawat
commited on
Commit
β’
2bb8a76
1
Parent(s):
f1fd352
Change UI Options (1model, 3datasets) for Senior Project
Browse files
ClassifierCheckpoint/RobertaClassifierCHEAT256.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:330a021e57adfb3261f338f3372f8d22a5e60350b4f62ecabae113346ce46ca0
|
3 |
+
size 498675543
|
ClassifierCheckpoint/{RobertaClassifierCSAbstract.pth β RobertaClassifierGPABenchmark512.pth}
RENAMED
File without changes
|
ClassifierCheckpoint/{RobertaClassifierOpenGPT.pth β RobertaClassifierOpenGPT512.pth}
RENAMED
File without changes
|
ModelDriver.py
CHANGED
@@ -60,9 +60,9 @@ def RobertaSentinelCSAbstractInference(input_text):
|
|
60 |
|
61 |
def RobertaClassifierOpenGPTInference(input_text):
|
62 |
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
63 |
-
model_path = "ClassifierCheckpoint/
|
64 |
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
|
65 |
-
model.load_state_dict(torch.load(model_path, map_location=device)
|
66 |
model = model.to(device)
|
67 |
model.eval()
|
68 |
|
@@ -80,11 +80,11 @@ def RobertaClassifierOpenGPTInference(input_text):
|
|
80 |
return Probs
|
81 |
|
82 |
|
83 |
-
def
|
84 |
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
85 |
-
model_path = "ClassifierCheckpoint/
|
86 |
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
|
87 |
-
model.load_state_dict(torch.load(model_path, map_location=device)
|
88 |
model = model.to(device)
|
89 |
model.eval()
|
90 |
|
@@ -101,5 +101,25 @@ def RobertaClassifierCSAbstractInference(input_text):
|
|
101 |
|
102 |
return Probs
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
|
|
|
60 |
|
61 |
def RobertaClassifierOpenGPTInference(input_text):
|
62 |
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
63 |
+
model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT512.pth"
|
64 |
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
|
65 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
66 |
model = model.to(device)
|
67 |
model.eval()
|
68 |
|
|
|
80 |
return Probs
|
81 |
|
82 |
|
83 |
+
def RobertaClassifierGPABenchmarkInference(input_text):
|
84 |
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
85 |
+
model_path = "ClassifierCheckpoint/RobertaClassifierGPABenchmark512.pth"
|
86 |
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
|
87 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
88 |
model = model.to(device)
|
89 |
model.eval()
|
90 |
|
|
|
101 |
|
102 |
return Probs
|
103 |
|
104 |
+
def RobertaClassifierCHEATInference(input_text):
|
105 |
+
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
106 |
+
model_path = "ClassifierCheckpoint/RobertaClassifierCHEAT256.pth"
|
107 |
+
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
|
108 |
+
model.load_state_dict(torch.load(model_path, map_location=device))
|
109 |
+
model = model.to(device)
|
110 |
+
model.eval()
|
111 |
+
|
112 |
+
tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=256, return_tensors='pt')
|
113 |
+
input_ids = tokenized_input['input_ids'].to(device)
|
114 |
+
attention_mask = tokenized_input['attention_mask'].to(device)
|
115 |
+
|
116 |
+
# Make a prediction
|
117 |
+
with torch.no_grad():
|
118 |
+
outputs = model(input_ids, attention_mask=attention_mask)
|
119 |
+
logits = outputs.logits
|
120 |
+
Probs = F.softmax(logits, dim=1).cpu().numpy()[0]
|
121 |
+
|
122 |
+
return Probs
|
123 |
+
|
124 |
|
125 |
|
app.py
CHANGED
@@ -5,39 +5,43 @@ import numpy as np
|
|
5 |
|
6 |
# Add a title
|
7 |
st.title('GPT Detection Demo')
|
8 |
-
st.write("This is a demo for GPT detection. You can use this demo to test the model. The model
|
9 |
-
st.write("Reference on how we built Roberta Sentinel: https://arxiv.org/abs/2305.07969")
|
10 |
|
11 |
# Add 4 options for 4 models
|
12 |
ModelOption = st.sidebar.selectbox(
|
13 |
'Which Model do you want to use?',
|
14 |
-
('
|
15 |
)
|
16 |
|
17 |
DatasetOption = st.sidebar.selectbox(
|
18 |
'Which Dataset the model was trained on?',
|
19 |
-
('OpenGPT', '
|
20 |
)
|
21 |
|
22 |
|
23 |
-
text = st.text_area('Enter text here (max
|
24 |
|
25 |
if st.button('Generate'):
|
26 |
-
if ModelOption == 'RobertaSentinel':
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
if DatasetOption == 'OpenGPT':
|
36 |
result = RobertaClassifierOpenGPTInference(text)
|
37 |
st.write("Model: RobertaClassifierOpenGPT")
|
38 |
-
elif DatasetOption == '
|
39 |
-
result =
|
40 |
-
st.write("Model:
|
|
|
|
|
|
|
|
|
41 |
|
42 |
Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"
|
43 |
|
|
|
5 |
|
6 |
# Add a title
|
7 |
st.title('GPT Detection Demo')
|
8 |
+
st.write("This is a demo for GPT detection. You can use this demo to test the model. There are 3 variations of the model, The model was trained on CHEAT, GPABenchmark, OpenGPT datasets. They are all in the domain of Scientific Abstract. You can choose dataset variation of the model on the sidebar.")
|
9 |
+
# st.write("Reference on how we built Roberta Sentinel: https://arxiv.org/abs/2305.07969")
|
10 |
|
11 |
# Add 4 options for 4 models
|
12 |
ModelOption = st.sidebar.selectbox(
|
13 |
'Which Model do you want to use?',
|
14 |
+
('RobertaClassifier'),
|
15 |
)
|
16 |
|
17 |
DatasetOption = st.sidebar.selectbox(
|
18 |
'Which Dataset the model was trained on?',
|
19 |
+
('OpenGPT', 'GPABenchmark', 'CHEAT'),
|
20 |
)
|
21 |
|
22 |
|
23 |
+
text = st.text_area('Enter text here (max 512 words)', '')
|
24 |
|
25 |
if st.button('Generate'):
|
26 |
+
# if ModelOption == 'RobertaSentinel':
|
27 |
+
# if DatasetOption == 'OpenGPT':
|
28 |
+
# result = RobertaSentinelOpenGPTInference(text)
|
29 |
+
# st.write("Model: RobertaSentinelOpenGPT")
|
30 |
+
# elif DatasetOption == 'CSAbstract':
|
31 |
+
# result = RobertaSentinelCSAbstractInference(text)
|
32 |
+
# st.write("Model: RobertaSentinelCSAbstract")
|
33 |
+
|
34 |
+
if ModelOption == 'RobertaClassifier':
|
35 |
if DatasetOption == 'OpenGPT':
|
36 |
result = RobertaClassifierOpenGPTInference(text)
|
37 |
st.write("Model: RobertaClassifierOpenGPT")
|
38 |
+
elif DatasetOption == 'GPABenchmark':
|
39 |
+
result = RobertaClassifierGPABenchmarkInference(text)
|
40 |
+
st.write("Model: RobertaClassifierGPABenchmark")
|
41 |
+
elif DatasetOption == 'CHEAT':
|
42 |
+
result = RobertaClassifierCHEATInference(text)
|
43 |
+
st.write("Model: RobertaClassifierCHEAT")
|
44 |
+
|
45 |
|
46 |
Prediction = "Human Written" if not np.argmax(result) else "Machine Generated"
|
47 |
|