croissantllm
/

CroissantLLMChat-v0.1

Text Generation

text-generation-inference

Model card Files Files and versions

manu commited on Apr 26, 2024

Commit

f05665c

·

verified ·

1 Parent(s): df360ce

Update README.md

Files changed (1) hide show

README.md +18 -36

README.md CHANGED Viewed

@@ -68,61 +68,43 @@ Our work can be cited as:
 This model is a Chat model, that is, it is finetuned for Chat function and works best with the provided template.
-#### With pipeline
-```python
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-model_name = "croissantllm/CroissantLLMChat-v0.1"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
-messages = [
-   {"role": "user", "content": "Qui est le président francais ?"},
-]
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-)
-generation_args = {
-    "max_new_tokens": 50,
-    "return_full_text": False,
-    "temperature": 0.2,
-    "do_sample": True,
-}
-output = pipe(messages, **generation_args)
-print(output[0]['generated_text'])
-```
 #### With generate
 This might require a stopping criteria on <|im_end|> token.
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "croissantllm/CroissantLLMChat-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 chat = [
-   {"role": "user", "content": "Que puis-je faire à Marseille en hiver?"},
 ]
 chat_input = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-inputs = tokenizer(chat_input, return_tensors="pt", add_special_tokens=True).to(model.device)
-tokens = model.generate(**inputs, max_new_tokens=150, do_sample=True, top_p=0.95, top_k=60, temperature=0.3)
 print(tokenizer.decode(tokens[0]))
 ```

 This model is a Chat model, that is, it is finetuned for Chat function and works best with the provided template.
 #### With generate
 This might require a stopping criteria on <|im_end|> token.
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "croissantllm/CroissantLLMChat-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+generation_args = {
+    "max_new_tokens": 256,
+    "do_sample": True,
+    "temperature": 0.3,
+    "top_p": 0.90,
+    "top_k": 40,
+    "repetition_penalty": 1.05,
+    "eos_token_id": [tokenizer.eos_token_id, 32000],
+}
 chat = [
+   {"role": "user", "content": "Qui est le président francais actuel ?"},
 ]
 chat_input = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+inputs = tokenizer(chat_input, return_tensors="pt").to(model.device)
+tokens = model.generate(**inputs, **generation_args)
 print(tokenizer.decode(tokens[0]))
+# print tokens individually
+print([(tokenizer.decode([tok]), tok) for tok in tokens[0].tolist()])
 ```