Update README.md
Browse files
README.md
CHANGED
@@ -63,7 +63,7 @@ prepare_for_inference(model, backend="marlin", allow_merge=True) #use float16
|
|
63 |
|
64 |
#Generate
|
65 |
from hqq.utils.generation_hf import HFGenerator
|
66 |
-
|
67 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial")
|
68 |
|
69 |
gen.generate("Write an essay about large language models", print_tokens=True)
|
|
|
63 |
|
64 |
#Generate
|
65 |
from hqq.utils.generation_hf import HFGenerator
|
66 |
+
#For longer context, make sure to allocate enough cache via the cache_size= parameter
|
67 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial")
|
68 |
|
69 |
gen.generate("Write an essay about large language models", print_tokens=True)
|