Text Generation
Transformers
Safetensors
English
falcon_mamba
conversational
Inference Endpoints
ybelkada commited on
Commit
29e0c3b
1 Parent(s): 285408b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -10
README.md CHANGED
@@ -54,9 +54,11 @@ model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b-instruct")
54
  messages = [
55
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
56
  ]
57
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids
58
 
59
- outputs = model.generate(input_ids)
 
 
 
60
  print(tokenizer.decode(outputs[0]))
61
  ```
62
 
@@ -78,9 +80,11 @@ model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b-instruct",
78
  messages = [
79
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
80
  ]
81
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids.to("cuda")
82
 
83
- outputs = model.generate(input_ids)
 
 
 
84
  print(tokenizer.decode(outputs[0]))
85
  ```
86
 
@@ -104,9 +108,10 @@ model = torch.compile(model)
104
  messages = [
105
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
106
  ]
107
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids.to("cuda")
 
108
 
109
- outputs = model.generate(input_ids)
110
  print(tokenizer.decode(outputs[0]))
111
  ```
112
 
@@ -132,9 +137,10 @@ model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b-instruct",
132
  messages = [
133
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
134
  ]
135
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids.to("cuda")
 
136
 
137
- outputs = model.generate(input_ids)
138
  print(tokenizer.decode(outputs[0]))
139
  ```
140
 
@@ -156,9 +162,10 @@ model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b-instruct",
156
  messages = [
157
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
158
  ]
159
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).input_ids.to("cuda")
 
160
 
161
- outputs = model.generate(input_ids)
162
  print(tokenizer.decode(outputs[0]))
163
  ```
164
 
 
54
  messages = [
55
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
56
  ]
 
57
 
58
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
59
+ input_ids = tokenizer(input_text, return_tensors="pt")
60
+
61
+ outputs = model.generate(input_ids, max_new_tokens=30)
62
  print(tokenizer.decode(outputs[0]))
63
  ```
64
 
 
80
  messages = [
81
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
82
  ]
 
83
 
84
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
85
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
86
+
87
+ outputs = model.generate(input_ids, max_new_tokens=30)
88
  print(tokenizer.decode(outputs[0]))
89
  ```
90
 
 
108
  messages = [
109
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
110
  ]
111
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
112
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
113
 
114
+ outputs = model.generate(input_ids, max_new_tokens=30)
115
  print(tokenizer.decode(outputs[0]))
116
  ```
117
 
 
137
  messages = [
138
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
139
  ]
140
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
141
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
142
 
143
+ outputs = model.generate(input_ids, max_new_tokens=30)
144
  print(tokenizer.decode(outputs[0]))
145
  ```
146
 
 
162
  messages = [
163
  {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
164
  ]
165
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
166
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
167
 
168
+ outputs = model.generate(input_ids, max_new_tokens=30)
169
  print(tokenizer.decode(outputs[0]))
170
  ```
171