rohitg commited on
Commit
928bfc4
1 Parent(s): ba037b4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -2
README.md CHANGED
@@ -3,18 +3,26 @@ license: apache-2.0
3
  ---
4
 
5
 
 
 
 
6
  import torch
7
  from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
8
  from hqq.utils.patching import prepare_for_inference
 
 
9
 
 
10
  model = HQQModelForCausalLM.from_quantized("rohitg/Mixtral-8x22B-Instruct-v0.1-hf-4bit_g64-HQQ", device='cuda')
11
  tokenizer = AutoTokenizer.from_pretrained('mistralai/Mixtral-8x22B-Instruct-v0.1')
12
-
13
  prepare_for_inference(model, backend="torchao_int4")
14
 
15
- #Text Generation
 
 
16
  prompt = "<s> [INST] How do I build a car? [/INST] "
17
 
18
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
19
  outputs = model.generate(**(inputs.to('cuda')), max_new_tokens=1000)
20
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 
 
3
  ---
4
 
5
 
6
+ ### Imports
7
+
8
+ ```python
9
  import torch
10
  from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
11
  from hqq.utils.patching import prepare_for_inference
12
+ ```
13
+
14
 
15
+ ### Loading Weights
16
  model = HQQModelForCausalLM.from_quantized("rohitg/Mixtral-8x22B-Instruct-v0.1-hf-4bit_g64-HQQ", device='cuda')
17
  tokenizer = AutoTokenizer.from_pretrained('mistralai/Mixtral-8x22B-Instruct-v0.1')
 
18
  prepare_for_inference(model, backend="torchao_int4")
19
 
20
+ ### Text Generation
21
+
22
+ ```python
23
  prompt = "<s> [INST] How do I build a car? [/INST] "
24
 
25
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
26
  outputs = model.generate(**(inputs.to('cuda')), max_new_tokens=1000)
27
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
28
+ ```