mobicham commited on
Commit
e3c2810
1 Parent(s): 25d87d5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -0
README.md CHANGED
@@ -77,6 +77,7 @@ prepare_for_inference(model, backend="bitblas", allow_merge=False) #It takes a w
77
 
78
  #Generate
79
  ###################################################
 
80
  #gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile=None) #Slower generation but no warm-up
81
  gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Faster generation, but warm-up takes a while
82
 
 
77
 
78
  #Generate
79
  ###################################################
80
+ #For longer context, make sure to allocate enough cache via the cache_size= parameter
81
  #gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile=None) #Slower generation but no warm-up
82
  gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Faster generation, but warm-up takes a while
83