NickyNicky commited on
Commit
22ea382
1 Parent(s): b13e7eb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +72 -0
README.md CHANGED
@@ -56,4 +56,76 @@ experts:
56
  base_model: NickyNicky/TinyDolphin-2.8-1.1b_oasst2_chatML_Cluster_1_V1
57
  gate_mode: random # one of "hidden", "cheap_embed", or "random"
58
  dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ```
 
56
  base_model: NickyNicky/TinyDolphin-2.8-1.1b_oasst2_chatML_Cluster_1_V1
57
  gate_mode: random # one of "hidden", "cheap_embed", or "random"
58
  dtype: bfloat16 # output dtype (float32, float16, or bfloat16)
59
+ ```
60
+
61
+
62
+
63
+
64
+
65
+
66
+ ```Python
67
+ from transformers import (
68
+ AutoModelForCausalLM,
69
+ AutoTokenizer,
70
+ BitsAndBytesConfig,
71
+ HfArgumentParser,
72
+ TrainingArguments,
73
+ pipeline,
74
+ logging,
75
+ GenerationConfig,
76
+ TextIteratorStreamer,
77
+ )
78
+ import torch
79
+
80
+ new_model= "Mix_TinyLlama-3x1B_oasst2_chatML_Cluster_3_2_1_V1"
81
+ model = AutoModelForCausalLM.from_pretrained(#f'NickyNicky/{new_model}',
82
+ new_model,
83
+ device_map="auto",
84
+ trust_remote_code=True,
85
+ torch_dtype=torch.bfloat16,
86
+
87
+ low_cpu_mem_usage= True,
88
+ # use_flash_attention_2=False,
89
+
90
+ )
91
+
92
+
93
+ tokenizer = AutoTokenizer.from_pretrained(new_model,
94
+ max_length=2048,
95
+ trust_remote_code=True,
96
+ use_fast = True,
97
+ )
98
+
99
+ tokenizer.pad_token = tokenizer.eos_token
100
+ # tokenizer.padding_side = 'left'
101
+ tokenizer.padding_side = 'right'
102
+
103
+
104
+ prompt= """<|im_start|>system
105
+ You are a helpful AI assistant.<|im_end|>
106
+ <|im_start|>user
107
+ escribe una historia de amor.<|im_end|>
108
+ <|im_start|>assistant
109
+ """
110
+
111
+ inputs = tokenizer.encode(prompt,
112
+ return_tensors="pt",
113
+ add_special_tokens=False).cuda()#.to("cuda") # False # True
114
+
115
+
116
+ generation_config = GenerationConfig(
117
+ max_new_tokens=700,
118
+ temperature=0.5,
119
+ top_p=0.9,
120
+ top_k=40,
121
+ repetition_penalty=1.1, #1.1, # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
122
+ do_sample=True,
123
+ pad_token_id=tokenizer.eos_token_id,
124
+ eos_token_id=tokenizer.eos_token_id,
125
+ )
126
+ outputs = model.generate(
127
+ generation_config=generation_config,
128
+ input_ids=inputs,)
129
+ # tokenizer.decode(outputs[0], skip_special_tokens=False) #True
130
+ print(tokenizer.decode(outputs[0], skip_special_tokens=False))
131
  ```