Update README.md
Browse files
README.md
CHANGED
|
@@ -77,6 +77,7 @@ prepare_for_inference(model, backend="torchao_int4")
|
|
| 77 |
|
| 78 |
#Generate
|
| 79 |
###################################################
|
|
|
|
| 80 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
| 81 |
|
| 82 |
gen.generate("Write an essay about large language models", print_tokens=True)
|
|
|
|
| 77 |
|
| 78 |
#Generate
|
| 79 |
###################################################
|
| 80 |
+
#For longer context, make sure to allocate enough cache via the cache_size= parameter
|
| 81 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
| 82 |
|
| 83 |
gen.generate("Write an essay about large language models", print_tokens=True)
|