from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import TensorDataset, DataLoader device = torch.device("cpu") class MLP(nn.Module): def __init__(self, input_dim): super(MLP, self).__init__() self.fc1 = nn.Linear(input_dim, 256) self.fc2 = nn.Linear(256, 2) self.gelu = nn.GELU() def forward(self, x): x = self.gelu(self.fc1(x)) x = self.fc2(x) return x def extract_features(text): tokenizer = RobertaTokenizer.from_pretrained("roberta-base") model = RobertaModel.from_pretrained("roberta-base").to(device) tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt") outputs = model(tokenized_text) last_hidden_states = outputs.last_hidden_state TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy() return TClassification def RobertaSentinelOpenGPTInference(input_text): features = extract_features(input_text) loaded_model = MLP(768).to(device) loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device)) # Define the tokenizer and model for feature extraction with torch.no_grad(): inputs = torch.tensor(features).to(device) outputs = loaded_model(inputs.float()) _, predicted = torch.max(outputs, 0) Probs = (F.softmax(outputs, dim=0).cpu().numpy()) return Probs def RobertaSentinelCSAbstractInference(input_text): features = extract_features(input_text) loaded_model = MLP(768).to(device) loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device)) # Define the tokenizer and model for feature extraction with torch.no_grad(): inputs = torch.tensor(features).to(device) outputs = loaded_model(inputs.float()) _, predicted = torch.max(outputs, 0) Probs = (F.softmax(outputs, dim=0).cpu().numpy()) return Probs def RobertaClassifierOpenGPTInference(input_text): tokenizer = RobertaTokenizer.from_pretrained("roberta-base") model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT.pth" model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2) model.load_state_dict(torch.load(model_path), map_location=device) model = model.to(torch.device('cpu')) model.eval() tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt') input_ids = tokenized_input['input_ids'].to(torch.device('cpu')) attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu')) # Make a prediction with torch.no_grad(): outputs = model(input_ids, attention_mask=attention_mask) logits = outputs.logits Probs = F.softmax(logits, dim=1).cpu().numpy()[0] return Probs def RobertaClassifierCSAbstractInference(input_text): tokenizer = RobertaTokenizer.from_pretrained("roberta-base") model_path = "ClassifierCheckpoint/RobertaClassifierCSAbstract.pth" model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2) model.load_state_dict(torch.load(model_path, map_location=device)) model = model.to(torch.device('cpu')) model.eval() tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt') input_ids = tokenized_input['input_ids'].to(torch.device('cpu')) attention_mask = tokenized_input['attention_mask'].to(torch.device('cpu')) # Make a prediction with torch.no_grad(): outputs = model(input_ids, attention_mask=attention_mask) logits = outputs.logits Probs = F.softmax(logits, dim=1).cpu().numpy()[0] return Probs