Spaces:

alwayse
/

MMD_MP_Text_Dection

Running

File size: 2,131 Bytes

ea2d4f7

import numpy as np
import torch
import random
from meta_train import mmdPreModel
from collections import namedtuple
import joblib
from transformers import RobertaTokenizer, RobertaModel


def api_init():

	random.seed(0)
	np.random.seed(0)
	torch.manual_seed(0)
	torch.cuda.manual_seed(0)
	torch.cuda.manual_seed_all(0)
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

	model_name = 'roberta-base-openai-detector'
	model_path_api = f'.'
	token_num, hidden_size = 100, 768

	Config = namedtuple('Config', ['in_dim', 'hid_dim', 'dropout', 'out_dim', 'token_num'])
	config = Config(
			in_dim=hidden_size,
			token_num=token_num,
			hid_dim=512,
			dropout=0.2,
			out_dim=300,)
	
	net = mmdPreModel(config=config, num_mlp=0, transformer_flag=True, num_hidden_layers=1)

	# load the features and models
	feature_ref_for_test_filename = f'{model_path_api}/feature_ref_for_test.pt'
	model_filename = f'{model_path_api}/logistic_regression_model.pkl'
	net_filename = f'{model_path_api}/net.pt'
	
	load_ref_data =  torch.load(feature_ref_for_test_filename,map_location=torch.device('cpu')) # cpu
	loaded_model = joblib.load(model_filename) # cpu
	checkpoint = torch.load(net_filename,map_location=torch.device('cpu'))
	net.load_state_dict(checkpoint['net'])
	sigma, sigma0_u, ep  = checkpoint['sigma'], checkpoint['sigma0_u'], checkpoint['ep']

	# generic generative model
	cache_dir = ".cache"
	base_tokenizer = RobertaTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
	base_model = RobertaModel.from_pretrained(model_name, output_hidden_states=True, cache_dir=cache_dir)

	# whether load the model to gpu
	gpu_using = False

	DEVICE = torch.device("cpu")
	if gpu_using:
		DEVICE = torch.device("cuda:0")
	net = net.to(DEVICE)
	sigma, sigma0_u, ep  = sigma.to(DEVICE), sigma0_u.to(DEVICE), ep.to(DEVICE)
	load_ref_data = load_ref_data.to(DEVICE)
	base_model = base_model.to(DEVICE)	
	num_ref = 5000
	feature_ref = load_ref_data[np.random.permutation(load_ref_data.shape[0])][:num_ref].to(DEVICE)

	return base_model, base_tokenizer, net, feature_ref, sigma, sigma0_u, ep, loaded_model, DEVICE