Update README.md
Browse files
README.md
CHANGED
@@ -49,6 +49,7 @@ prepare_for_inference(model, backend="torchao_int4")
|
|
49 |
|
50 |
#Generate
|
51 |
###################################################
|
|
|
52 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
53 |
|
54 |
gen.generate("Write an essay about large language models", print_tokens=True)
|
|
|
49 |
|
50 |
#Generate
|
51 |
###################################################
|
52 |
+
#For longer context, make sure to allocate enough cache via the cache_size= parameter
|
53 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
54 |
|
55 |
gen.generate("Write an essay about large language models", print_tokens=True)
|