Spaces:

Thanarit
/

GPT-Detection-Demo

Running

File size: 4,678 Bytes

57bafce
 
 
bd0c703
 
57bafce
582b2f2
081c69d
57bafce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd0c703
57bafce
 
 
 
 
bd0c703
57bafce
bd0c703
 
 
57bafce
 
 
 
bd0c703
57bafce
 
 
 
 
bd0c703
 
 
 
 
 
 
 
 
2bb8a76
0bd9ff0
2bb8a76
1bb143a
bd0c703
 
 
 
1bb143a
 
bd0c703
 
 
 
 
 
 
 
 
 
2bb8a76
bd0c703
2bb8a76
0bd9ff0
2bb8a76
1bb143a
bd0c703
 
 
 
1bb143a
 
bd0c703
 
 
 
 
 
 
 
57bafce
2bb8a76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57bafce

from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader


device = torch.device("cpu")
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 2)
        self.gelu = nn.GELU()

    def forward(self, x):
        x = self.gelu(self.fc1(x))
        x = self.fc2(x)
        return x
def extract_features(text):

    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model = RobertaModel.from_pretrained("roberta-base").to(device)
    tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt")
    outputs = model(tokenized_text)
    last_hidden_states = outputs.last_hidden_state
    TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy()
    return TClassification

def RobertaSentinelOpenGPTInference(input_text):
    features = extract_features(input_text)
    loaded_model = MLP(768).to(device)
    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelOpenGPT.pth", map_location=device))

    # Define the tokenizer and model for feature extraction
    with torch.no_grad():
        inputs = torch.tensor(features).to(device)
        outputs = loaded_model(inputs.float())
        _, predicted = torch.max(outputs, 0)

        Probs = (F.softmax(outputs, dim=0).cpu().numpy())

    return Probs

def RobertaSentinelCSAbstractInference(input_text):
    features = extract_features(input_text)
    loaded_model = MLP(768).to(device)
    loaded_model.load_state_dict(torch.load("SentinelCheckpoint/RobertaSentinelCSAbstract.pth", map_location=device))

    # Define the tokenizer and model for feature extraction
    with torch.no_grad():
        inputs = torch.tensor(features).to(device)
        outputs = loaded_model(inputs.float())
        _, predicted = torch.max(outputs, 0)

        Probs = (F.softmax(outputs, dim=0).cpu().numpy())

    return Probs


def RobertaClassifierOpenGPTInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierOpenGPT512.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()


    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs


def RobertaClassifierGPABenchmarkInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierGPABenchmark512.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()


    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs

def RobertaClassifierCHEATInference(input_text):
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model_path = "ClassifierCheckpoint/RobertaClassifierCHEAT256.pth"
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()

    tokenized_input = tokenizer(input_text, truncation=True, padding=True, max_length=256, return_tensors='pt')
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Make a prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs.logits
    Probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    return Probs