Spaces:

nbroad
/

openai-detector-base

Runtime error

App Files Files Community

openai-detector-base / app.py

nbroad HF staff

include comparison with hosted version

37efee7 over 1 year ago

raw history blame contribute delete

No virus

2.96 kB

	import os
	import requests

	import gradio as gr
	import torch
	from transformers import (
	RobertaForSequenceClassification,
	RobertaTokenizer,
	RobertaConfig,
	)

	HF_TOKEN = os.environ["HF_TOKEN"]

	os.system(
	"wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-base.pt"
	)

	config = RobertaConfig.from_pretrained("roberta-base")
	model = RobertaForSequenceClassification(config)
	model.load_state_dict(torch.load("detector-base.pt")["model_state_dict"])

	tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model.to(device)


	def call_inference_api(query):
	url = "https://api-inference.huggingface.co/models/roberta-base-openai-detector"

	headers = {"Authorization": f"Bearer {HF_TOKEN}"}
	response = requests.post(url, json={"inputs": query}, headers=headers)

	code = response.status_code
	if code == 200:
	fake, real = response.json()[0]

	fake_score = fake["score"]
	real_score = real["score"]

	return f"Fake: {fake_score:.2%} \| Real: {real_score:.2%}"

	else:
	error = response.json()["error"]
	warning = response.json()["warnings"]
	return f"Error: {error} \| Warning: {warning}"


	def local_call(query):
	# Copied from https://github.com/openai/gpt-2-output-dataset/tree/master/detector#L35-L46
	tokens = tokenizer.encode(query)
	all_tokens = len(tokens)
	tokens = tokens[: tokenizer.max_len - 2]
	used_tokens = len(tokens)
	tokens = torch.tensor(
	[tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]
	).unsqueeze(0)
	mask = torch.ones_like(tokens)

	with torch.no_grad():
	logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
	probs = logits.softmax(dim=-1)

	fake, real = probs.detach().cpu().flatten().numpy().tolist()

	return f"Fake: {fake:.2%} \| Real: {real:.2%} \| Used tokens: {used_tokens} \| All tokens: {all_tokens}"


	def main_function(query):
	hosted_output = call_inference_api(query)
	local_output = local_call(query)

	return hosted_output, local_output


	text_input = gr.Textbox(
	lines=5,
	label="Enter text to compare output with the model hosted here: https://huggingface.co/roberta-base-openai-detector",
	)
	hosted_output = gr.Textbox(label="Output from model hosted on Hugging Face")
	local_output = gr.Textbox(
	label="Output from model running locally on transformers 2.0.0, tokenizers 0.7.0, and torch 1.4.0"
	)

	description = "The original repository for the model used an older version of \
	transformers, tokenziers, and torch which results in slightly different results \
	compared to the model hosted on Hugging Face. This app compares the two models."

	demo = gr.Interface(
	fn=main_function,
	inputs="text",
	outputs=[hosted_output, local_output],
	title="Compare OpenAI detector models",
	description=description,
	)

	demo.launch()