PirateXX commited on
Commit
56fc7fe
·
1 Parent(s): a1e2027

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request
2
+ import gradio as gr
3
+ import os
4
+ import re
5
+ app = Flask(__name__)
6
+
7
+ import torch
8
+ from tqdm import tqdm
9
+ from transformers import GPT2LMHeadModel, GPT2TokenizerFast
10
+
11
+ device = 'cuda' if cuda.is_available() else 'cpu'
12
+ model_id = "gpt2"
13
+ modelgpt2 = GPT2LMHeadModel.from_pretrained(model_id).to(device)
14
+ tokenizergpt2 = GPT2TokenizerFast.from_pretrained(model_id)
15
+
16
+ def text_to_sentences(text):
17
+ clean_text = text.replace('\n', ' ')
18
+ return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
19
+
20
+ def calculatePerplexity(text):
21
+ encodings = tokenizergpt2("\n\n".join([text]), return_tensors="pt")
22
+ max_length = modelgpt2.config.n_positions
23
+ stride = 512
24
+ input_ids = encodings.input_ids
25
+ seq_len = input_ids.size(1)
26
+
27
+ nlls = []
28
+ prev_end_loc = 0
29
+ for begin_loc in range(0, seq_len, stride):
30
+ end_loc = min(begin_loc + max_length, seq_len)
31
+ trg_len = end_loc - prev_end_loc
32
+ target_ids = input_ids.clone()
33
+ target_ids[:, :-trg_len] = -100
34
+
35
+ with torch.no_grad():
36
+ outputs = modelgpt2(input_ids, labels=target_ids)
37
+ neg_log_likelihood = outputs.loss * trg_len
38
+
39
+ nlls.append(neg_log_likelihood)
40
+
41
+ prev_end_loc = end_loc
42
+ if end_loc == seq_len:
43
+ break
44
+
45
+ ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
46
+
47
+ return ppl.item()
48
+
49
+ def calculatePerplexities(text):
50
+ sentences = text_to_sentences(text)
51
+ perplexities = []
52
+ for sentence in sentences:
53
+ perplexities.append(calculatePerplexity(sentence))
54
+ return perplexities
55
+
56
+ demo = gr.Interface(
57
+ fn=calculatePerplexities,
58
+ inputs=gr.Textbox(placeholder="Copy and paste here..."),
59
+ article = "Visit <a href = \"https://ai-content-detector.online/\">AI Content Detector</a> for better user experience!",
60
+ outputs=gr.outputs.JSON(),
61
+ interpretation="default",
62
+
63
+ demo.launch(show_api=False