Safetensors
English
falcon_mamba
4-bit precision
bitsandbytes
ybelkada commited on
Commit
154b0b1
·
verified ·
1 Parent(s): 2b87e94

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -4
README.md CHANGED
@@ -61,9 +61,10 @@ model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b-instruct-4b
61
  messages = [
62
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
63
  ]
64
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids.to("cuda")
 
65
 
66
- outputs = model.generate(input_ids)
67
  print(tokenizer.decode(outputs[0]))
68
  ```
69
 
@@ -87,9 +88,10 @@ model = torch.compile(model)
87
  messages = [
88
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
89
  ]
90
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids.to("cuda")
 
91
 
92
- outputs = model.generate(input_ids)
93
  print(tokenizer.decode(outputs[0]))
94
  ```
95
 
 
61
  messages = [
62
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
63
  ]
64
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
65
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
66
 
67
+ outputs = model.generate(input_ids, max_new_tokens=30)
68
  print(tokenizer.decode(outputs[0]))
69
  ```
70
 
 
88
  messages = [
89
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
90
  ]
91
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
92
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
93
 
94
+ outputs = model.generate(input_ids, max_new_tokens=30)
95
  print(tokenizer.decode(outputs[0]))
96
  ```
97