jwkirchenbauer commited on
Commit
cee0410
1 Parent(s): 8c252e3

markdown edits

Browse files
Files changed (1) hide show
  1. demo_watermark.py +9 -7
demo_watermark.py CHANGED
@@ -276,12 +276,14 @@ def format_names(s):
276
  s=s.replace("green_fraction","Fraction of T in Greenlist")
277
  s=s.replace("z_score","z-score")
278
  s=s.replace("p_value","p value")
 
 
279
  return s
280
 
281
  def list_format_scores(score_dict, detection_threshold):
282
  """Format the detection metrics into a gradio dataframe input format"""
283
  lst_2d = []
284
- lst_2d.append(["z-score threshold", f"{detection_threshold}"])
285
  for k,v in score_dict.items():
286
  if k=='green_fraction':
287
  lst_2d.append([format_names(k), f"{v:.1%}"])
@@ -293,6 +295,7 @@ def list_format_scores(score_dict, detection_threshold):
293
  lst_2d.append([format_names(k), ("Watermarked" if v else "Human/Unwatermarked")])
294
  else:
295
  lst_2d.append([format_names(k), f"{v}"])
 
296
  return lst_2d
297
 
298
  def detect(input_text, args, device=None, tokenizer=None):
@@ -366,13 +369,12 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
366
  with gr.Accordion("A note on model capability",open=True):
367
  gr.Markdown(
368
  """
369
- The models that can be used in this demo are limited to those that are both open source and that fit on a single commodity GPU.
370
- In particular, there aren't many models above a few billion parameters and almost none trained using both Instruction-finetuning an/or RLHF.
371
- Therefore, in both it's un-watermarked (normal) and watermarked states, the model is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
372
-
373
- We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
374
  Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
375
- Longer prompts and stopping mid sentence often helps encourage more fluent, longer genrations.
376
  """
377
  )
378
  gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
 
276
  s=s.replace("green_fraction","Fraction of T in Greenlist")
277
  s=s.replace("z_score","z-score")
278
  s=s.replace("p_value","p value")
279
+ s=s.replace("prediction","Prediction")
280
+ s=s.replace("confidence","Confidence")
281
  return s
282
 
283
  def list_format_scores(score_dict, detection_threshold):
284
  """Format the detection metrics into a gradio dataframe input format"""
285
  lst_2d = []
286
+ # lst_2d.append(["z-score threshold", f"{detection_threshold}"])
287
  for k,v in score_dict.items():
288
  if k=='green_fraction':
289
  lst_2d.append([format_names(k), f"{v:.1%}"])
 
295
  lst_2d.append([format_names(k), ("Watermarked" if v else "Human/Unwatermarked")])
296
  else:
297
  lst_2d.append([format_names(k), f"{v}"])
298
+ lst_2d.insert(-1,["z-score Threshold", f"{detection_threshold}"])
299
  return lst_2d
300
 
301
  def detect(input_text, args, device=None, tokenizer=None):
 
369
  with gr.Accordion("A note on model capability",open=True):
370
  gr.Markdown(
371
  """
372
+ This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
373
+
374
+ Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
375
+ For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
 
376
  Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
377
+ Longer prompts that end mid-sentence will result in more fluent generations.
378
  """
379
  )
380
  gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")