Update README.md
Browse files
README.md
CHANGED
|
@@ -25,7 +25,7 @@ model on a GPU Space.
|
|
| 25 |
that applies a Zamir-Feder nested-lattice codec roundtrip (encode +
|
| 26 |
decode) to every K and V written into the cache.
|
| 27 |
|
| 28 |
-
|
| 29 |
from transformers import AutoModelForCausalLM
|
| 30 |
from kakeyalattice.hf import KakeyaLatticeCache
|
| 31 |
|
|
@@ -36,7 +36,7 @@ cache = KakeyaLatticeCache(
|
|
| 36 |
head_dim=model.config.head_dim,
|
| 37 |
)
|
| 38 |
out = model.generate(input_ids, max_new_tokens=200, past_key_values=cache)
|
| 39 |
-
|
| 40 |
|
| 41 |
## Caveats
|
| 42 |
|
|
|
|
| 25 |
that applies a Zamir-Feder nested-lattice codec roundtrip (encode +
|
| 26 |
decode) to every K and V written into the cache.
|
| 27 |
|
| 28 |
+
python
|
| 29 |
from transformers import AutoModelForCausalLM
|
| 30 |
from kakeyalattice.hf import KakeyaLatticeCache
|
| 31 |
|
|
|
|
| 36 |
head_dim=model.config.head_dim,
|
| 37 |
)
|
| 38 |
out = model.generate(input_ids, max_new_tokens=200, past_key_values=cache)
|
| 39 |
+
|
| 40 |
|
| 41 |
## Caveats
|
| 42 |
|