Update README.md
Browse files
README.md
CHANGED
@@ -64,8 +64,10 @@ prepare_for_inference(model, backend="marlin", allow_merge=True) #use float16
|
|
64 |
#Generate
|
65 |
from hqq.utils.generation_hf import HFGenerator
|
66 |
|
67 |
-
gen = HFGenerator(model, tokenizer, do_sample=True,
|
68 |
-
|
|
|
|
|
|
|
69 |
|
70 |
-
out = gen.generate("Write an essay about large language models.", max_new_tokens=1000, print_tokens=True)
|
71 |
```
|
|
|
64 |
#Generate
|
65 |
from hqq.utils.generation_hf import HFGenerator
|
66 |
|
67 |
+
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial")
|
68 |
+
|
69 |
+
gen.generate("Write an essay about large language models", print_tokens=True)
|
70 |
+
gen.generate("Tell me a funny joke!", print_tokens=True)
|
71 |
+
gen.generate("How to make a yummy chocolate cake?", print_tokens=True)
|
72 |
|
|
|
73 |
```
|