File size: 4,678 Bytes
57bafce
 
 
bd0c703
 
57bafce
582b2f2
081c69d
57bafce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd0c703
57bafce
 
 
 
 
bd0c703
57bafce
bd0c703
 
 
57bafce
 
 
 
bd0c703
57bafce
 
 
 
 
bd0c703
 
 
 
 
 
 
 
 
2bb8a76
0bd9ff0
2bb8a76
1bb143a
bd0c703
 
 
 
1bb143a
 
bd0c703
 
 
 
 
 
 
 
 
 
2bb8a76
bd0c703
2bb8a76
0bd9ff0
2bb8a76
1bb143a
bd0c703
 
 
 
1bb143a
 
bd0c703
 
 
 
 
 
 
 
57bafce
2bb8a76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57bafce
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader


device = torch.device("cpu")
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 2)
        self.gelu = nn.GELU()

    def forward(self, x):
        x = self.gelu(self.fc1(x))
        x = self.fc2(x)
        return x
def extract_features(text):

    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model = RobertaModel.from_pretrained("roberta-base").to(device)
    tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt")
    outputs = model(tokenized_text)
    last_hidden_states = outputs.last_hidden_state
    TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy()
    return TClassification

def RobertaSentinelOpenGPTInference(input_text):
    features = extract_features(input_text)
    loaded_model = MLP(768).to(device)
    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))

    # Define the tokenizer and model for feature extraction
    with torch.no_grad():
        inputs = torch.tensor(features).to(device)
        outputs = loaded_model(inputs.float())
        _, predicted = torch.max(outputs, 0)

        Probs = (F.softmax(outputs, dim=0).cpu().numpy())

    return Probs

def RobertaSentinelCSAbstractInference(input_text):
    features = extract_features(input_text)
    loaded_model = MLP(768).to(device)
    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))

    # Define the tokenizer and model for feature extraction
    with torch.no_grad():
        inputs = torch.tensor(features).to(device)
        outputs = loaded_model(inputs.float())
        _, predicted = torch.max(outputs, 0)

        Probs = (F.softmax(outputs, dim=0).cpu().numpy())

    return Probs


def RobertaClassifierOpenGPTInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT512.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()


    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs


def RobertaClassifierGPABenchmarkInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierGPABenchmark512.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()


    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs

def RobertaClassifierCHEATInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierCHEAT256.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()

    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=256, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs