younesbelakda commited on
Commit
8cbf2d0
1 Parent(s): 79a8cc6
Files changed (1) hide show
  1. app.py +28 -18
app.py CHANGED
@@ -2,6 +2,10 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
 
 
 
 
5
  preface_disclaimer = """
6
  <h4> Disclaimer </h4>
7
  <h5> Last meaningful update: 20.Feb.2023 </h5>
@@ -22,30 +26,24 @@ All in all, it is hard to predict how the models will respond to particular prom
22
  Disclaimer inspired from <a href="https://huggingface.co/EleutherAI/gpt-j-6B" target="_blank"> GPT-J's model card </a> and <a href="https://beta.openai.com/docs/usage-guidelines/content-policy" target="_blank"> OpenAI GPT3's content policy </a>.
23
  """
24
 
25
- gpt_neo_125_id = "EleutherAI/gpt-neo-125M"
26
- detoxified_gpt_neo_id = "ybelkada/gpt-neo-125m-detoxified-small-context"
27
-
28
- gpt_neo = AutoModelForCausalLM.from_pretrained(gpt_neo_125_id).to(0)
29
- detoxified_neo = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_id).to(0)
30
-
31
  gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-sharded-bf16"
32
- detoxified_gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-detoxified-20shdl"
33
 
34
  gpt_neo_1b = AutoModelForCausalLM.from_pretrained(gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
35
  detoxified_neo_1b = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
36
 
37
- tokenizer = AutoTokenizer.from_pretrained(gpt_neo_125_id)
38
 
39
- def compare_generation(text, max_new_tokens, temperature):
40
- input_ids = tokenizer(text, return_tensors="pt").input_ids.to(0)
 
41
 
42
- text_neo = tokenizer.decode(gpt_neo.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True)[0])
43
- text_detoxified = tokenizer.decode(detoxified_neo.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True)[0])
44
 
45
- text_neo_1b = tokenizer.decode(gpt_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)[0])
46
- text_detoxified_1b = tokenizer.decode(detoxified_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)[0])
47
 
48
- return text_neo, text_detoxified, text_neo_1b, text_detoxified_1b
49
 
50
  iface = gr.Interface(
51
  fn=compare_generation,
@@ -65,13 +63,25 @@ iface = gr.Interface(
65
  default=0.6,
66
  label="Temperature",
67
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  ],
69
  outputs=[
70
- gr.Textbox(label="Predicted tokens - gpt neo 125m :", lines=5),
71
- gr.Textbox(label="Predicted detoxified tokens - gpt neo 125m:", lines=5),
72
  gr.Textbox(label="Predicted tokens - gpt neo 2.7b:", lines=5),
73
  gr.Textbox(label="Predicted detoxified tokens - gpt neo 2.7b:", lines=5),
74
  ],
75
- description=preface_disclaimer
76
  )
77
  iface.launch()
 
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ description = """# Detoxified Language Models
6
+ This a Space where you can try out the effects of detoxification on GPT-Neo 2.7B using RLHF. Learn more about that [here]()
7
+ """
8
+
9
  preface_disclaimer = """
10
  <h4> Disclaimer </h4>
11
  <h5> Last meaningful update: 20.Feb.2023 </h5>
 
26
  Disclaimer inspired from <a href="https://huggingface.co/EleutherAI/gpt-j-6B" target="_blank"> GPT-J's model card </a> and <a href="https://beta.openai.com/docs/usage-guidelines/content-policy" target="_blank"> OpenAI GPT3's content policy </a>.
27
  """
28
 
 
 
 
 
 
 
29
  gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-sharded-bf16"
30
+ detoxified_gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-detox"
31
 
32
  gpt_neo_1b = AutoModelForCausalLM.from_pretrained(gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
33
  detoxified_neo_1b = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
34
 
35
+ tokenizer = AutoTokenizer.from_pretrained(gpt_neo_1b_id)
36
 
37
+ def compare_generation(text, max_new_tokens, temperature, top_p, top_k):
38
+ if top_p > 0:
39
+ top_k = 0
40
 
41
+ input_ids = tokenizer(text, return_tensors="pt").input_ids.to(0)
 
42
 
43
+ text_neo_1b = tokenizer.decode(gpt_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, top_k=top_k, early_stopping=True)[0])
44
+ text_detoxified_1b = tokenizer.decode(detoxified_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, top_k=top_k, early_stopping=True)[0])
45
 
46
+ return text_neo_1b, text_detoxified_1b
47
 
48
  iface = gr.Interface(
49
  fn=compare_generation,
 
63
  default=0.6,
64
  label="Temperature",
65
  ),
66
+ gr.inputs.Slider(
67
+ minimum=0,
68
+ maximum=1,
69
+ step=0.1,
70
+ default=0,
71
+ label="top_p",
72
+ ),
73
+ gr.inputs.Slider(
74
+ minimum=0,
75
+ maximum=50,
76
+ step=1,
77
+ default=0,
78
+ label="top_k",
79
+ ),
80
  ],
81
  outputs=[
 
 
82
  gr.Textbox(label="Predicted tokens - gpt neo 2.7b:", lines=5),
83
  gr.Textbox(label="Predicted detoxified tokens - gpt neo 2.7b:", lines=5),
84
  ],
85
+ description=description
86
  )
87
  iface.launch()