NickyNicky
/

Mix_TinyLlama-3x1B_oasst2_chatML_Cluster_3_2_1_V1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

NickyNicky commited on Jan 26

Commit

22ea382

•

1 Parent(s): b13e7eb

Update README.md

Files changed (1) hide show

README.md +72 -0

README.md CHANGED Viewed

@@ -56,4 +56,76 @@ experts:
 base_model: NickyNicky/TinyDolphin-2.8-1.1b_oasst2_chatML_Cluster_1_V1
 gate_mode: random # one of "hidden", "cheap_embed", or "random"
 dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
 ```

 base_model: NickyNicky/TinyDolphin-2.8-1.1b_oasst2_chatML_Cluster_1_V1
 gate_mode: random # one of "hidden", "cheap_embed", or "random"
 dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
+```
+```Python
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    HfArgumentParser,
+    TrainingArguments,
+    pipeline,
+    logging,
+    GenerationConfig,
+    TextIteratorStreamer,
+)
+import torch
+new_model= "Mix_TinyLlama-3x1B_oasst2_chatML_Cluster_3_2_1_V1"
+model = AutoModelForCausalLM.from_pretrained(#f'NickyNicky/{new_model}',
+                                             new_model,
+                                             device_map="auto",
+                                             trust_remote_code=True,
+                                             torch_dtype=torch.bfloat16,
+                                             low_cpu_mem_usage= True,
+                                            #  use_flash_attention_2=False,
+                                             )
+tokenizer = AutoTokenizer.from_pretrained(new_model,
+                                          max_length=2048,
+                                          trust_remote_code=True,
+                                          use_fast = True,
+                                          )
+tokenizer.pad_token = tokenizer.eos_token
+# tokenizer.padding_side = 'left'
+tokenizer.padding_side = 'right'
+prompt= """<|im_start|>system
+You are a helpful AI assistant.<|im_end|>
+<|im_start|>user
+escribe una historia de amor.<|im_end|>
+<|im_start|>assistant
+"""
+inputs = tokenizer.encode(prompt,
+                          return_tensors="pt",
+                          add_special_tokens=False).cuda()#.to("cuda") # False # True
+generation_config = GenerationConfig(
+              max_new_tokens=700,
+              temperature=0.5,
+              top_p=0.9,
+              top_k=40,
+              repetition_penalty=1.1, #1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
+              do_sample=True,
+              pad_token_id=tokenizer.eos_token_id,
+              eos_token_id=tokenizer.eos_token_id,
+          )
+outputs = model.generate(
+                         generation_config=generation_config,
+                         input_ids=inputs,)
+# tokenizer.decode(outputs[0], skip_special_tokens=False) #True
+print(tokenizer.decode(outputs[0], skip_special_tokens=False))
 ```