Spaces:

tomg-group-umd
/

lm-watermarking

Runtime error

App Files Files Community

jwkirchenbauer commited on Feb 20, 2023

Commit

cee0410

1 Parent(s): 8c252e3

markdown edits

Browse files

Files changed (1) hide show

demo_watermark.py +9 -7

demo_watermark.py CHANGED Viewed

@@ -276,12 +276,14 @@ def format_names(s):
     s=s.replace("green_fraction","Fraction of T in Greenlist")
     s=s.replace("z_score","z-score")
     s=s.replace("p_value","p value")
     return s
 def list_format_scores(score_dict, detection_threshold):
     """Format the detection metrics into a gradio dataframe input format"""
     lst_2d = []
-    lst_2d.append(["z-score threshold", f"{detection_threshold}"])
     for k,v in score_dict.items():
         if k=='green_fraction':
             lst_2d.append([format_names(k), f"{v:.1%}"])
@@ -293,6 +295,7 @@ def list_format_scores(score_dict, detection_threshold):
             lst_2d.append([format_names(k), ("Watermarked" if v else "Human/Unwatermarked")])
         else:
             lst_2d.append([format_names(k), f"{v}"])
     return lst_2d
 def detect(input_text, args, device=None, tokenizer=None):
@@ -366,13 +369,12 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
         with gr.Accordion("A note on model capability",open=True):
             gr.Markdown(
                 """
-                The models that can be used in this demo are limited to those that are both open source and that fit on a single commodity GPU.
-                In particular, there aren't many models above a few billion parameters and almost none trained using both Instruction-finetuning an/or RLHF.
-                Therefore, in both it's un-watermarked (normal) and watermarked states, the model is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
-                We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
                 Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
-                Longer prompts and stopping mid sentence often helps encourage more fluent, longer genrations.
                 """
                 )
         gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")

     s=s.replace("green_fraction","Fraction of T in Greenlist")
     s=s.replace("z_score","z-score")
     s=s.replace("p_value","p value")
+    s=s.replace("prediction","Prediction")
+    s=s.replace("confidence","Confidence")
     return s
 def list_format_scores(score_dict, detection_threshold):
     """Format the detection metrics into a gradio dataframe input format"""
     lst_2d = []
+    # lst_2d.append(["z-score threshold", f"{detection_threshold}"])
     for k,v in score_dict.items():
         if k=='green_fraction':
             lst_2d.append([format_names(k), f"{v:.1%}"])
             lst_2d.append([format_names(k), ("Watermarked" if v else "Human/Unwatermarked")])
         else:
             lst_2d.append([format_names(k), f"{v}"])
+    lst_2d.insert(-1,["z-score Threshold", f"{detection_threshold}"])
     return lst_2d
 def detect(input_text, args, device=None, tokenizer=None):
         with gr.Accordion("A note on model capability",open=True):
             gr.Markdown(
                 """
+                This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
+                Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
+                For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
                 Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
+                Longer prompts that end mid-sentence will result in more fluent generations.
                 """
                 )
         gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")