nbroad HF staff commited on
Commit
37efee7
1 Parent(s): f0ab314

include comparison with hosted version

Browse files
Files changed (1) hide show
  1. app.py +65 -7
app.py CHANGED
@@ -1,10 +1,19 @@
1
  import os
 
2
 
3
  import gradio as gr
4
  import torch
5
- from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
 
 
 
 
6
 
7
- os.system("wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-base.pt")
 
 
 
 
8
 
9
  config = RobertaConfig.from_pretrained("roberta-base")
10
  model = RobertaForSequenceClassification(config)
@@ -17,13 +26,36 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  model.to(device)
18
 
19
 
20
- def detect(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Copied from https://github.com/openai/gpt-2-output-dataset/tree/master/detector#L35-L46
22
  tokens = tokenizer.encode(query)
23
  all_tokens = len(tokens)
24
- tokens = tokens[:tokenizer.max_len - 2]
25
  used_tokens = len(tokens)
26
- tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
 
 
27
  mask = torch.ones_like(tokens)
28
 
29
  with torch.no_grad():
@@ -35,6 +67,32 @@ def detect(query):
35
  return f"Fake: {fake:.2%} | Real: {real:.2%} | Used tokens: {used_tokens} | All tokens: {all_tokens}"
36
 
37
 
38
- demo = gr.Interface(fn=detect, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- demo.launch()
 
1
  import os
2
+ import requests
3
 
4
  import gradio as gr
5
  import torch
6
+ from transformers import (
7
+ RobertaForSequenceClassification,
8
+ RobertaTokenizer,
9
+ RobertaConfig,
10
+ )
11
 
12
+ HF_TOKEN = os.environ["HF_TOKEN"]
13
+
14
+ os.system(
15
+ "wget https://openaipublic.azureedge.net/gpt-2/detector-models/v1/detector-base.pt"
16
+ )
17
 
18
  config = RobertaConfig.from_pretrained("roberta-base")
19
  model = RobertaForSequenceClassification(config)
 
26
  model.to(device)
27
 
28
 
29
+ def call_inference_api(query):
30
+ url = "https://api-inference.huggingface.co/models/roberta-base-openai-detector"
31
+
32
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
33
+ response = requests.post(url, json={"inputs": query}, headers=headers)
34
+
35
+ code = response.status_code
36
+ if code == 200:
37
+ fake, real = response.json()[0]
38
+
39
+ fake_score = fake["score"]
40
+ real_score = real["score"]
41
+
42
+ return f"Fake: {fake_score:.2%} | Real: {real_score:.2%}"
43
+
44
+ else:
45
+ error = response.json()["error"]
46
+ warning = response.json()["warnings"]
47
+ return f"Error: {error} | Warning: {warning}"
48
+
49
+
50
+ def local_call(query):
51
  # Copied from https://github.com/openai/gpt-2-output-dataset/tree/master/detector#L35-L46
52
  tokens = tokenizer.encode(query)
53
  all_tokens = len(tokens)
54
+ tokens = tokens[: tokenizer.max_len - 2]
55
  used_tokens = len(tokens)
56
+ tokens = torch.tensor(
57
+ [tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]
58
+ ).unsqueeze(0)
59
  mask = torch.ones_like(tokens)
60
 
61
  with torch.no_grad():
 
67
  return f"Fake: {fake:.2%} | Real: {real:.2%} | Used tokens: {used_tokens} | All tokens: {all_tokens}"
68
 
69
 
70
+ def main_function(query):
71
+ hosted_output = call_inference_api(query)
72
+ local_output = local_call(query)
73
+
74
+ return hosted_output, local_output
75
+
76
+
77
+ text_input = gr.Textbox(
78
+ lines=5,
79
+ label="Enter text to compare output with the model hosted here: https://huggingface.co/roberta-base-openai-detector",
80
+ )
81
+ hosted_output = gr.Textbox(label="Output from model hosted on Hugging Face")
82
+ local_output = gr.Textbox(
83
+ label="Output from model running locally on transformers 2.0.0, tokenizers 0.7.0, and torch 1.4.0"
84
+ )
85
+
86
+ description = "The original repository for the model used an older version of \
87
+ transformers, tokenziers, and torch which results in slightly different results \
88
+ compared to the model hosted on Hugging Face. This app compares the two models."
89
+
90
+ demo = gr.Interface(
91
+ fn=main_function,
92
+ inputs="text",
93
+ outputs=[hosted_output, local_output],
94
+ title="Compare OpenAI detector models",
95
+ description=description,
96
+ )
97
 
98
+ demo.launch()