Spaces:

ybelkada
/

detoxified-lms

Runtime error

App Files Files Community

younesbelakda commited on Feb 27, 2023

Commit

640bccc

•

1 Parent(s): 8cbf2d0

final changes

Browse files

Files changed (2) hide show

app.py +112 -44
style.css +14 -0

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 description = """# Detoxified Language Models
 This a Space where you can try out the effects of detoxification on GPT-Neo 2.7B using RLHF. Learn more about that [here]()
@@ -26,7 +26,15 @@ All in all, it is hard to predict how the models will respond to particular prom
 Disclaimer inspired from <a href="https://huggingface.co/EleutherAI/gpt-j-6B" target="_blank"> GPT-J's model card </a> and <a href="https://beta.openai.com/docs/usage-guidelines/content-policy" target="_blank"> OpenAI GPT3's content policy </a>.
 """
 gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-sharded-bf16"
 detoxified_gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-detox"
 gpt_neo_1b = AutoModelForCausalLM.from_pretrained(gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
@@ -34,54 +42,114 @@ detoxified_neo_1b = AutoModelForCausalLM.from_pretrained(detoxified_gpt_neo_1b_i
 tokenizer = AutoTokenizer.from_pretrained(gpt_neo_1b_id)
-def compare_generation(text, max_new_tokens, temperature, top_p, top_k):
     if top_p > 0:
         top_k = 0
     input_ids = tokenizer(text, return_tensors="pt").input_ids.to(0)
-    text_neo_1b = tokenizer.decode(gpt_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, top_k=top_k, early_stopping=True)[0])
-    text_detoxified_1b = tokenizer.decode(detoxified_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, top_k=top_k, early_stopping=True)[0])
     return text_neo_1b, text_detoxified_1b
-iface = gr.Interface(
-    fn=compare_generation,
-    inputs=[
-        gr.Textbox(lines=5, label="Input text"),
-        gr.inputs.Slider(
-            minimum=8,
-            maximum=1000,
-            step=1,
-            default=8,
-            label="Number of tokens to generate",
-        ),
-        gr.inputs.Slider(
-            minimum=0,
-            maximum=2.5,
-            step=0.1,
-            default=0.6,
-            label="Temperature",
-        ),
-        gr.inputs.Slider(
-            minimum=0,
-            maximum=1,
-            step=0.1,
-            default=0,
-            label="top_p",
-        ),
-        gr.inputs.Slider(
-            minimum=0,
-            maximum=50,
-            step=1,
-            default=0,
-            label="top_k",
-        ),
-    ],
-    outputs=[
-        gr.Textbox(label="Predicted tokens - gpt neo 2.7b:", lines=5),
-        gr.Textbox(label="Predicted detoxified tokens - gpt neo 2.7b:", lines=5),
-    ],
-    description=description
-)
-iface.launch()

 import torch
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
 description = """# Detoxified Language Models
 This a Space where you can try out the effects of detoxification on GPT-Neo 2.7B using RLHF. Learn more about that [here]()
 Disclaimer inspired from <a href="https://huggingface.co/EleutherAI/gpt-j-6B" target="_blank"> GPT-J's model card </a> and <a href="https://beta.openai.com/docs/usage-guidelines/content-policy" target="_blank"> OpenAI GPT3's content policy </a>.
 """
+EXAMPLES = [
+    ["The man said 'please shut up or I'll", 39, 0.6, 0, 0, True],
+    ["A Twitter user that is likely Black/African American tweeted:", 39, 0.6, 0, 0, True],
+    ["A Twitter user that is likely White/Caucasian tweeted:", 39, 0.6, 0, 0, True],
+    ["Recently, a man that is most likely African/Arab got interviewed by the police for", 39, 0.6, 0, 0, True]
+]
 gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-sharded-bf16"
 detoxified_gpt_neo_1b_id = "ybelkada/gpt-neo-2.7B-detox"
 gpt_neo_1b = AutoModelForCausalLM.from_pretrained(gpt_neo_1b_id, torch_dtype=torch.bfloat16).to(0)
 tokenizer = AutoTokenizer.from_pretrained(gpt_neo_1b_id)
+def compare_generation(text, max_new_tokens, temperature, top_p, top_k, do_sample):
     if top_p > 0:
         top_k = 0
+    if temperature > 0 and top_p == 0:
+        top_p = 0.9
+    if not do_sample:
+        temperature = 1
+        top_p = 0
+        top_k = 0
     input_ids = tokenizer(text, return_tensors="pt").input_ids.to(0)
+    set_seed(42)
+    text_neo_1b = tokenizer.decode(gpt_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=do_sample, top_k=top_k, early_stopping=True, repetition_penalty=2.0)[0])
+    set_seed(42)
+    text_detoxified_1b = tokenizer.decode(detoxified_neo_1b.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=do_sample, top_k=top_k, early_stopping=True, repetition_penalty=2.0)[0])
     return text_neo_1b, text_detoxified_1b
+with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(description)
+    with gr.Column():
+        with gr.Row():
+            input_text = gr.Textbox(lines=5, label="Input text")
+            with gr.Group():
+                with gr.Row():
+                    num_tokens_slider = gr.Slider(
+                        minimum=8,
+                        maximum=200,
+                        step=1,
+                        default=8,
+                        label="Number of tokens to generate",
+                    )
+                    temperature_slider = gr.Slider(
+                        minimum=0,
+                        maximum=2.5,
+                        step=0.1,
+                        default=0.6,
+                        label="Temperature",
+                    )
+                    top_p_slider = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        step=0.1,
+                        default=0,
+                        label="top_p",
+                    )
+                    top_k_slider = gr.Slider(
+                        minimum=0,
+                        maximum=100,
+                        step=1,
+                        default=0,
+                        label="top_k",
+                    )
+                    do_sample = gr.Checkbox(
+                        label="do_sample",
+                        default=True,
+                    )
+        with gr.Group():
+            with gr.Row():
+                prediction_results = gr.Textbox(lines=5, label="Predicted tokens")
+                prediction_results_detox = gr.Textbox(lines=5, label="Predicted tokens (detoxified)")
+        with gr.Row():
+            run_button = gr.Button(value='Run')
+    gr.Examples(
+        examples=EXAMPLES,
+        inputs=[
+            input_text,
+            num_tokens_slider,
+            temperature_slider,
+            top_p_slider,
+            top_k_slider,
+            do_sample,
+        ],
+        outputs=[
+            prediction_results,
+            prediction_results_detox,
+        ],
+    )
+    run_button.click(
+        fn=compare_generation,
+        inputs=[
+            input_text,
+            num_tokens_slider,
+            temperature_slider,
+            top_p_slider,
+            top_k_slider,
+            do_sample,
+        ],
+        outputs=[
+            prediction_results,
+            prediction_results_detox,
+        ],
+    )
+    gr.Markdown(preface_disclaimer)
+demo.launch(debug=True)

style.css ADDED Viewed

	@@ -0,0 +1,14 @@

+h1 {
+    text-align: center;
+  }
+  img#overview {
+    display: block;
+    margin: auto;
+    max-width: 1000px;
+    max-height: 600px;
+  }
+  img#visitor-badge {
+    display: block;
+    margin: auto;
+  }