Spaces:
Runtime error
Runtime error
from flask import Flask, request | |
import gradio as gr | |
import os | |
import re | |
app = Flask(__name__) | |
import torch | |
from torch import cuda | |
from tqdm import tqdm | |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
device = 'cuda' if cuda.is_available() else 'cpu' | |
model_id = "gpt2" | |
modelgpt2 = GPT2LMHeadModel.from_pretrained(model_id).to(device) | |
tokenizergpt2 = GPT2TokenizerFast.from_pretrained(model_id) | |
def text_to_sentences(text): | |
clean_text = text.replace('\n', ' ') | |
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text) | |
def calculatePerplexity(text): | |
encodings = tokenizergpt2("\n\n".join([text]), return_tensors="pt") | |
max_length = modelgpt2.config.n_positions | |
stride = 512 | |
seq_len = encodings.input_ids.size(1) | |
nlls = [] | |
prev_end_loc = 0 | |
for begin_loc in range(0, seq_len, stride): | |
end_loc = min(begin_loc + max_length, seq_len) | |
trg_len = end_loc - prev_end_loc | |
input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device) | |
target_ids = input_ids.clone() | |
target_ids[:, :-trg_len] = -100 | |
with torch.no_grad(): | |
outputs = modelgpt2(input_ids, labels=target_ids) | |
neg_log_likelihood = outputs.loss * trg_len | |
nlls.append(neg_log_likelihood) | |
prev_end_loc = end_loc | |
if end_loc == seq_len: | |
break | |
ppl = torch.exp(torch.stack(nlls).sum() / end_loc) | |
return ppl.item() | |
def calculatePerplexities(text): | |
sentences = text_to_sentences(text) | |
perplexities = [] | |
for sentence in sentences: | |
perplexity = calculatePerplexity(sentence) | |
label = "Human" | |
if perplexity<25: | |
label = "AI" | |
perplexities.append({"sentence": sentence, "perplexity": perplexity, "label": label}) | |
return perplexities | |
demo = gr.Interface( | |
fn=calculatePerplexities, | |
inputs=gr.Textbox(placeholder="Copy and paste here...", label="Content Box"), | |
outputs=gr.JSON(), | |
interpretation="default", | |
) | |
demo.launch(show_api=False) |