NickyNicky commited on
Commit
52d175d
1 Parent(s): 8cd6b46

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +155 -0
README.md ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - Open-Orca/OpenOrca
5
+ - OpenAssistant/oasst_top1_2023-08-25
6
+ language:
7
+ - bg
8
+ - ca
9
+ - cs
10
+ - da
11
+ - de
12
+ - en
13
+ - es
14
+ - fr
15
+ - hr
16
+ - hu
17
+ - it
18
+ - nl
19
+ - pl
20
+ - pt
21
+ - ro
22
+ - ru
23
+ - sl
24
+ - sr
25
+ - sv
26
+ - uk
27
+
28
+ library_name: transformers
29
+ ---
30
+
31
+ ```
32
+ reference-data-model:
33
+
34
+ datasets:
35
+ - OpenAssistant/oasst_top1_2023-08-25:
36
+ lang: "bg,ca,cs,da,de,en,es,fr,hr,hu,it,nl,pl,pt,ro,ru,sl,sr,sv,uk"
37
+ link: https://huggingface.co/datasets/OpenAssistant/oasst_top1_2023-08-25
38
+
39
+ model:
40
+ - Open-Orca/Mistral-7B-OpenOrca
41
+ Link:
42
+ https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca
43
+
44
+ 100 examples of generating:
45
+ - Link:
46
+ https://huggingface.co/NickyNicky/Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v3/blob/main/output.xlsx
47
+
48
+ Activated training with:
49
+ - Link:
50
+ https://huggingface.co/blog/tomaarsen/attention-sinks
51
+ https://github.com/tomaarsen/attention_sinks
52
+ https://arxiv.org/abs/2309.17453
53
+
54
+ Version 1:
55
+ - Link:
56
+ https://huggingface.co/NickyNicky/Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v1
57
+
58
+ ```
59
+
60
+
61
+ ##
62
+
63
+
64
+ ```py
65
+ # attention-sinks
66
+ pip install attention_sinks
67
+
68
+ # flash-attn
69
+ !export CUDA_HOME=/usr/local/cuda-11.8
70
+ !MAX_JOBS=4 pip install flash-attn --no-build-isolation -qqq
71
+ !pip install git+"https://github.com/HazyResearch/flash-attention.git#subdirectory=csrc/rotary" -qqq
72
+ ```
73
+
74
+
75
+ ## Version
76
+ ```py
77
+ import torch, transformers,torchvision
78
+ torch.__version__,transformers.__version__, torchvision.__version__
79
+ #OUTPUTS: ('2.0.1+cu118', '4.34.0', '0.15.2+cu118')
80
+ ```
81
+
82
+ ## How to use
83
+ ```py
84
+
85
+ from transformers import (
86
+ AutoModelForCausalLM,
87
+ AutoTokenizer,
88
+ BitsAndBytesConfig,
89
+ HfArgumentParser,
90
+ TrainingArguments,
91
+ pipeline,
92
+ logging,
93
+ GenerationConfig,
94
+ TextIteratorStreamer,
95
+ )
96
+
97
+ from attention_sinks import AutoModelForCausalLM
98
+
99
+ import torch
100
+
101
+ # model_id = 'Open-Orca/Mistral-7B-OpenOrca'
102
+ model_id='NickyNicky/Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v3'
103
+
104
+ model = AutoModelForCausalLM.from_pretrained(model_id,
105
+ device_map="auto",
106
+ trust_remote_code=True,
107
+ torch_dtype=torch.bfloat16,
108
+ load_in_4bit=True,
109
+ low_cpu_mem_usage= True,
110
+
111
+ attention_sink_size=4,
112
+ attention_sink_window_size=1024, #512, # <- Low for the sake of faster generation
113
+ )
114
+
115
+ max_length=2048
116
+ print("max_length",max_length)
117
+
118
+
119
+ tokenizer = AutoTokenizer.from_pretrained(model_id,
120
+ # use_fast = False,
121
+ max_length=max_length,)
122
+
123
+ tokenizer.pad_token = tokenizer.eos_token
124
+ tokenizer.padding_side = 'right'
125
+
126
+ #EXAMPLE #1
127
+ txt="""<|im_start|>user
128
+ I'm looking for an efficient Python script to output prime numbers. Can you help me out? I'm interested in a script that can handle large numbers and output them quickly. Also, it would be great if the script could take a range of numbers as input and output all the prime numbers within that range. Can you generate a script that fits these requirements? Thanks!<|im_end|>
129
+ <|im_start|>assistant
130
+ """
131
+
132
+ #EXAMPLE #2
133
+ txt="""<|im_start|>user
134
+ Estoy desarrollando una REST API con Nodejs, y estoy tratando de aplicar algún sistema de seguridad, ya sea con tokens o algo similar, me puedes ayudar?<|im_end|>
135
+ <|im_start|>assistant
136
+ """
137
+
138
+ inputs = tokenizer.encode(txt, return_tensors="pt").to("cuda")
139
+
140
+ generation_config = GenerationConfig(
141
+ max_new_tokens=max_new_tokens,
142
+ temperature=0.7,
143
+ top_p=0.9,
144
+ top_k=len_tokens,
145
+ repetition_penalty=1.11,
146
+ do_sample=True,
147
+ # pad_token_id=tokenizer.eos_token_id,
148
+ # eos_token_id=tokenizer.eos_token_id,
149
+ # use_cache=True,
150
+ # stopping_criteria= StoppingCriteriaList([stopping_criteria]),
151
+ )
152
+ outputs = model.generate(generation_config=generation_config,
153
+ input_ids=inputs,)
154
+ return tokenizer.decode(outputs[0], skip_special_tokens=False) #True
155
+ ```