Spaces:

ybelkada
/

detoxified-lms

Runtime error

App Files Files Community

ybelkada commited on Feb 23, 2023

Commit

8cbf2d0

•

1 Parent(s): 79a8cc6

updates

Browse files

Files changed (1) hide show

app.py +28 -18

app.py CHANGED Viewed

@@ -2,6 +2,10 @@ import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 preface_disclaimer = """
 <h4> Disclaimer </h4>
 <h5> Last meaningful update: 20.Feb.2023 </h5>
@@ -22,30 +26,24 @@ All in all, it is hard to predict how the models will respond to particular prom
 Disclaimer inspired from <a href="https://huggingface.co/EleutherAI/gpt-j-6B" target="_blank"> GPT-J's model card </a> and <a href="https://beta.openai.com/docs/usage-guidelines/content-policy" target="_blank"> OpenAI GPT3's content policy </a>.
 """
-gpt_neo_125_id = "EleutherAI/gpt-neo-125M"
-detoxified_gpt_neo_id = "ybelkada/gpt-neo-125m-detoxified-small-context"
-gpt_neo = AutoModelForCausalLM.from_pretrained(gpt_neo_125_id).to(0)
-detoxified_neo = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_id).to(0)
 gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-sharded-bf16"
-detoxified_gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-detoxified-20shdl"
 gpt_neo_1b = AutoModelForCausalLM.from_pretrained(gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
 detoxified_neo_1b = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
-tokenizer = AutoTokenizer.from_pretrained(gpt_neo_125_id)
-def compare_generation(text, max_new_tokens, temperature):
-    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(0)
-    text_neo = tokenizer.decode(gpt_neo.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True)[0])
-    text_detoxified = tokenizer.decode(detoxified_neo.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True)[0])
-    text_neo_1b = tokenizer.decode(gpt_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)[0])
-    text_detoxified_1b = tokenizer.decode(detoxified_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)[0])
-    return text_neo, text_detoxified, text_neo_1b, text_detoxified_1b
 iface = gr.Interface(
     fn=compare_generation,
@@ -65,13 +63,25 @@ iface = gr.Interface(
             default=0.6,
             label="Temperature",
         ),
     ],
     outputs=[
-        gr.Textbox(label="Predicted tokens - gpt neo 125m :", lines=5),
-        gr.Textbox(label="Predicted detoxified tokens - gpt neo 125m:", lines=5),
         gr.Textbox(label="Predicted tokens - gpt neo 2.7b:", lines=5),
         gr.Textbox(label="Predicted detoxified tokens - gpt neo 2.7b:", lines=5),
     ],
-    description=preface_disclaimer
 )
 iface.launch()

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
+description = """# Detoxified Language Models
+This a Space where you can try out the effects of detoxification on GPT-Neo 2.7B using RLHF. Learn more about that [here]()
+"""
 preface_disclaimer = """
 <h4> Disclaimer </h4>
 <h5> Last meaningful update: 20.Feb.2023 </h5>
 Disclaimer inspired from <a href="https://huggingface.co/EleutherAI/gpt-j-6B" target="_blank"> GPT-J's model card </a> and <a href="https://beta.openai.com/docs/usage-guidelines/content-policy" target="_blank"> OpenAI GPT3's content policy </a>.
 """
 gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-sharded-bf16"
+detoxified_gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-detox"
 gpt_neo_1b = AutoModelForCausalLM.from_pretrained(gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
 detoxified_neo_1b = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
+tokenizer = AutoTokenizer.from_pretrained(gpt_neo_1b_id)
+def compare_generation(text, max_new_tokens, temperature, top_p, top_k):
+    if top_p > 0:
+        top_k = 0
+    input_ids = tokenizer(text, return_tensors="pt").input_ids.to(0)
+    text_neo_1b = tokenizer.decode(gpt_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, top_k=top_k, early_stopping=True)[0])
+    text_detoxified_1b = tokenizer.decode(detoxified_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, top_k=top_k, early_stopping=True)[0])
+    return text_neo_1b, text_detoxified_1b
 iface = gr.Interface(
     fn=compare_generation,
             default=0.6,
             label="Temperature",
         ),
+        gr.inputs.Slider(
+            minimum=0,
+            maximum=1,
+            step=0.1,
+            default=0,
+            label="top_p",
+        ),
+        gr.inputs.Slider(
+            minimum=0,
+            maximum=50,
+            step=1,
+            default=0,
+            label="top_k",
+        ),
     ],
     outputs=[
         gr.Textbox(label="Predicted tokens - gpt neo 2.7b:", lines=5),
         gr.Textbox(label="Predicted detoxified tokens - gpt neo 2.7b:", lines=5),
     ],
+    description=description
 )
 iface.launch()