mobicham commited on
Commit
f894411
1 Parent(s): fa27400

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -3
README.md CHANGED
@@ -64,8 +64,10 @@ prepare_for_inference(model, backend="marlin", allow_merge=True) #use float16
64
  #Generate
65
  from hqq.utils.generation_hf import HFGenerator
66
 
67
- gen = HFGenerator(model, tokenizer, do_sample=True, compile_args=None) #skips compilation: slower, but works properly
68
- #gen = HFGenerator(model, tokenizer, do_sample=True) #compiled: much faster, but there's a bug with HF's StaticCache
 
 
 
69
 
70
- out = gen.generate("Write an essay about large language models.", max_new_tokens=1000, print_tokens=True)
71
  ```
 
64
  #Generate
65
  from hqq.utils.generation_hf import HFGenerator
66
 
67
+ gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial")
68
+
69
+ gen.generate("Write an essay about large language models", print_tokens=True)
70
+ gen.generate("Tell me a funny joke!", print_tokens=True)
71
+ gen.generate("How to make a yummy chocolate cake?", print_tokens=True)
72
 
 
73
  ```