Update README.md
Browse files
README.md
CHANGED
@@ -77,6 +77,7 @@ prepare_for_inference(model, backend="torchao_int4")
|
|
77 |
|
78 |
#Generate
|
79 |
###################################################
|
|
|
80 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
81 |
|
82 |
gen.generate("Write an essay about large language models", print_tokens=True)
|
|
|
77 |
|
78 |
#Generate
|
79 |
###################################################
|
80 |
+
#For longer context, make sure to allocate enough cache via the cache_size= parameter
|
81 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
82 |
|
83 |
gen.generate("Write an essay about large language models", print_tokens=True)
|