nbroad HF staff commited on
Commit
ecae52c
1 Parent(s): 4b16416
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import torch
5
+ from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
6
+
7
+ os.system("wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-base.pt")
8
+
9
+ config = RobertaConfig.from_pretrained("roberta-base")
10
+ model = RobertaForSequenceClassification(config)
11
+ model.load_state_dict(torch.load("detector-base.pt")["model_state_dict"])
12
+
13
+ tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
14
+
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ model.to(device)
18
+
19
+
20
+ def detect(query):
21
+ tokens = tokenizer.encode(query)
22
+ all_tokens = len(tokens)
23
+ tokens = tokens[:tokenizer.max_len - 2]
24
+ used_tokens = len(tokens)
25
+ tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
26
+ mask = torch.ones_like(tokens)
27
+
28
+ with torch.no_grad():
29
+ logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
30
+ probs = logits.softmax(dim=-1)
31
+
32
+ fake, real = probs.detach().cpu().flatten().numpy().tolist()
33
+
34
+ return f"Fake: {fake:.2%} | Real: {real:.2%} | Used tokens: {used_tokens} | All tokens: {all_tokens}"
35
+
36
+
37
+ demo = gr.Interface(fn=detect, inputs="text", outputs="text")
38
+
39
+ demo.launch()