Shad0ws's picture
Update app.py
2534717
from flask import Flask, request
import gradio as gr
import os
import re
app = Flask(__name__)
import torch
from torch import cuda
from tqdm import tqdm
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
device = 'cuda' if cuda.is_available() else 'cpu'
model_id = "gpt2"
modelgpt2 = GPT2LMHeadModel.from_pretrained(model_id).to(device)
tokenizergpt2 = GPT2TokenizerFast.from_pretrained(model_id)
def text_to_sentences(text):
clean_text = text.replace('\n', ' ')
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
def calculatePerplexity(text):
encodings = tokenizergpt2("\n\n".join([text]), return_tensors="pt")
max_length = modelgpt2.config.n_positions
stride = 512
seq_len = encodings.input_ids.size(1)
nlls = []
prev_end_loc = 0
for begin_loc in range(0, seq_len, stride):
end_loc = min(begin_loc + max_length, seq_len)
trg_len = end_loc - prev_end_loc
input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
target_ids = input_ids.clone()
target_ids[:, :-trg_len] = -100
with torch.no_grad():
outputs = modelgpt2(input_ids, labels=target_ids)
neg_log_likelihood = outputs.loss * trg_len
nlls.append(neg_log_likelihood)
prev_end_loc = end_loc
if end_loc == seq_len:
break
ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
return ppl.item()
def calculatePerplexities(text):
sentences = text_to_sentences(text)
perplexities = []
for sentence in sentences:
perplexity = calculatePerplexity(sentence)
label = "Human"
if perplexity<25:
label = "AI"
perplexities.append({"sentence": sentence, "perplexity": perplexity, "label": label})
return perplexities
demo = gr.Interface(
fn=calculatePerplexities,
inputs=gr.Textbox(placeholder="Copy and paste here...", label="Content Box"),
outputs=gr.JSON(),
interpretation="default",
)
demo.launch(show_api=False)