Upload README.md
Browse files
README.md
CHANGED
@@ -168,8 +168,8 @@ model_name_or_path = "TheBloke/LlongOrca-7B-16K-GPTQ"
|
|
168 |
# To use a different branch, change revision
|
169 |
# For example: revision="gptq-4bit-32g-actorder_True"
|
170 |
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
171 |
-
torch_dtype=torch.bfloat16,
|
172 |
device_map="auto",
|
|
|
173 |
revision="main")
|
174 |
|
175 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
@@ -186,7 +186,7 @@ prompt_template=f'''<|im_start|>system
|
|
186 |
print("\n\n*** Generate:")
|
187 |
|
188 |
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
|
189 |
-
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
|
190 |
print(tokenizer.decode(output[0]))
|
191 |
|
192 |
# Inference can also be done using transformers' pipeline
|
@@ -197,9 +197,11 @@ pipe = pipeline(
|
|
197 |
model=model,
|
198 |
tokenizer=tokenizer,
|
199 |
max_new_tokens=512,
|
|
|
200 |
temperature=0.7,
|
201 |
top_p=0.95,
|
202 |
-
|
|
|
203 |
)
|
204 |
|
205 |
print(pipe(prompt_template)[0]['generated_text'])
|
|
|
168 |
# To use a different branch, change revision
|
169 |
# For example: revision="gptq-4bit-32g-actorder_True"
|
170 |
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
|
|
171 |
device_map="auto",
|
172 |
+
trust_remote_code=False,
|
173 |
revision="main")
|
174 |
|
175 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
|
|
186 |
print("\n\n*** Generate:")
|
187 |
|
188 |
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
|
189 |
+
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
|
190 |
print(tokenizer.decode(output[0]))
|
191 |
|
192 |
# Inference can also be done using transformers' pipeline
|
|
|
197 |
model=model,
|
198 |
tokenizer=tokenizer,
|
199 |
max_new_tokens=512,
|
200 |
+
do_sample=True,
|
201 |
temperature=0.7,
|
202 |
top_p=0.95,
|
203 |
+
top_k=40,
|
204 |
+
repetition_penalty=1.1
|
205 |
)
|
206 |
|
207 |
print(pipe(prompt_template)[0]['generated_text'])
|