Update README.md
Browse files
README.md
CHANGED
@@ -77,6 +77,7 @@ prepare_for_inference(model, backend="bitblas", allow_merge=False) #It takes a w
|
|
77 |
|
78 |
#Generate
|
79 |
###################################################
|
|
|
80 |
#gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile=None) #Slower generation but no warm-up
|
81 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Faster generation, but warm-up takes a while
|
82 |
|
|
|
77 |
|
78 |
#Generate
|
79 |
###################################################
|
80 |
+
#For longer context, make sure to allocate enough cache via the cache_size= parameter
|
81 |
#gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile=None) #Slower generation but no warm-up
|
82 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Faster generation, but warm-up takes a while
|
83 |
|