Ontocord.AI commited on
Commit
6d25feb
·
1 Parent(s): 2c3d005

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -209
README.md CHANGED
@@ -21,227 +21,61 @@ This model was generated by averaging the weights of the following models
21
 
22
  - [Try demo on colab](https://colab.research.google.com/drive/1GgB8H30L5r0N--gexdEweK5f1yJfxMd_?usp=sharing)
23
 
24
- ```
25
- import os
26
- try:
27
- import transformers, fasttext, huggingface_hub
28
- except:
29
- os.system("pip install transformers huggingface_hub fasttext")
30
-
31
- from transformers import AutoTokenizer, AutoModelForCausalLM
32
- import fasttext
33
- from huggingface_hub import hf_hub_download
34
- import torch
35
- from torch import nn
36
- from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXForCausalLM, GPTNeoXLayer
37
-
38
- class GPTNeoXExpertsForCasualLM(GPTNeoXForCausalLM):
39
- """ Stores various experts for layers 9, 10 """ # , 11
40
- __expert_classifier = None
41
-
42
- def __init__(self, config):
43
- global __expert_classifier
44
- super().__init__(config)
45
- self.config = config
46
- self.orig_chat = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
47
- self.uspto_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
48
- self.github_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
49
- self.pubmed_abstracts_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
50
- self.freelaw_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
51
- self.arxiv_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
52
- self.merged_chat_expert = nn.ModuleList([GPTNeoXLayer(config) for _ in range(2)])
53
- self.curr_expert = "MergedChat"
54
- if GPTNeoXExpertsForCasualLM.__expert_classifier is None:
55
- file_name = hf_hub_download(repo_id="Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts", filename="expert_classify.ftz")
56
- GPTNeoXExpertsForCasualLM.__expert_classifier = fasttext.load_model(file_name)
57
- print (file_name)
58
-
59
- def predict_expert(self, text):
60
- """
61
- ['__label__StackExchange',
62
- '__label__PubMed-Abstracts',
63
- '__label__Github',
64
- '__label__USPTO-Backgrounds',
65
- '__label__Pile-CC',
66
- '__label__PubMed-Central',
67
- '__label__OpenWebText2',
68
- '__label__FreeLaw',
69
- '__label__Wikipedia-(en)',
70
- '__label__ArXiv',
71
- '__label__DM-Mathematics',
72
- '__label__NIH-ExPorter',
73
- '__label__HackerNews',
74
- '__label__Enron-Emails',
75
- '__label__OpenSubtitles',
76
- '__label__YoutubeSubtitles',
77
- '__label__Books3',
78
- '__label__EuroParl',
79
- '__label__Gutenberg-(PG-19)',
80
- '__label__PhilPapers',
81
- '__label__BookCorpus2',
82
- '__label__Ubuntu-IRC']
83
- """
84
- text = text.replace("<human>: ", " ").replace("<bot>: ", " ").strip().replace("\n", " ").replace("\r", " ").replace(" ", " ")
85
- answer = GPTNeoXExpertsForCasualLM.__expert_classifier.predict(text)
86
- label = answer[0][0].replace("__label__", "")
87
- score = answer[1][0]
88
- return (label, score)
89
-
90
- def generate_with_expert(self, text, tokenizer, expert="", return_answer_only=False, do_self_contrastive=True, max_length=128, min_length=1, max_return_sequences=1, do_sample=True, do_beam=False, device="cuda", target_lang=None):
91
- """Generates using one of the experts."""
92
- tokenizer.pad_token = tokenizer.eos_token
93
-
94
- if type(text) is str:
95
- text = [text]
96
- #hack - let's assume a single expert per batch
97
- if not expert:
98
- label, score = self.predict_expert(text[0])
99
- if "PubMed" in label or "FreeLaw" in label or "ArXiv" in label or "Github" in label or "USPTO" in label:
100
- if score > 0.8:
101
- expert = label
102
- elif score > 0.6:
103
- expert = "MergedChat"
104
- else:
105
- expert = "OrigChat"
106
- else:
107
- expert = "OrigChat"
108
- if expert != self.curr_expert:
109
- print ("Switching to expert", expert)
110
- self.curr_expert = expert
111
- for layer_id in range(2):
112
- if expert == "OrigChat":
113
- self.gpt_neox.layers[layer_id+9] = self.orig_chat[layer_id]
114
- elif "USPTO" in expert:
115
- self.gpt_neox.layers[layer_id+9] = self.uspto_expert[layer_id]
116
- elif "Github" in expert:
117
- self.gpt_neox.layers[layer_id+9] = self.github_expert[layer_id]
118
- elif "PubMed" in expert:
119
- self.gpt_neox.layers[layer_id+9] = self.pubmed_abstracts_expert[layer_id]
120
- elif "ArXiv" in expert:
121
- self.gpt_neox.layers[layer_id+9] = self.arxiv_expert[layer_id]
122
- elif "FreeLaw" in expert:
123
- self.gpt_neox.layers[layer_id+9] = self.freelaw_expert[layer_id]
124
- else:
125
- self.gpt_neox.layers[layer_id+9] = self.merged_chat_expert[layer_id]
126
- text = [p.strip() for p in text]
127
- input_ids = tokenizer(text, return_tensors='pt',padding=True, truncation=True, max_length=max_length )
128
- input_ids = input_ids.to(device)
129
- with torch.no_grad():
130
- outputs = self.generate(
131
- **input_ids,
132
- max_length=max_length,
133
- repetition_penalty=1.1,
134
- min_length=min_length,
135
- do_sample=True,
136
- top_p=0.95,
137
- penalty_alpha=0.6 if do_self_contrastive else None,
138
- top_k=10,
139
- )
140
- ret = []
141
- for i in range(len(outputs)): # can use batch_decode, unless we want to do something special here
142
- out = tokenizer.decode(outputs[i], skip_special_tokens=True)
143
- if return_answer_only:
144
- out = out[len(text[i]):].lstrip(".? \n\t")
145
- ret.append(out)
146
-
147
- return ret
148
-
149
- tokenizer = AutoTokenizer.from_pretrained("theblackcat102/pythia-1b-deduped-sft")
150
-
151
- tokenizer.pad_token = tokenizer.eos_token
152
-
153
- model = GPTNeoXExpertsForCasualLM.from_pretrained("Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts").half().cuda().eval()
154
-
155
- print ('##')
156
- print (model.generate_with_expert("source code for sorting a list <bot>:", tokenizer) [0])
157
- print ('##')
158
- print (model.generate_with_expert("When was Abraham Lincoln born? <bot>:", tokenizer) [0])
159
- print ('##')
160
- print (model.generate_with_expert("Medical journal article about ovarian cancer <bot>:", tokenizer) [0])
161
- ```
162
-
163
- ## Produces this output:
164
 
165
  ```
 
 
 
166
  ##
167
- Switching to expert Github
168
  Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
169
- source code for sorting a list : [sort_by(a) > sort_by(([1, 2])) > sort_by([])]
170
- sort_by([sort_by([1, 2] + [10 < 5]), 1 - 10 < 5]) # Error: invalid input. [sort_by((1 - 4 - 6)) > sort_by((2 * 9))) > sort_by(-4 - (-6 - -7)) > sort_by(-8 - (-9 - -5)) > sort_by(-(-7 - (-8 - (9)))) > sort_by
 
 
 
 
 
 
171
  ##
172
- Switching to expert OrigChat
173
  Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
174
- When was Abraham Lincoln born? : I'm sorry. Lincoln's parents had died in a plane crash shortly after he was born, and so he spent most of his formative years being raised by his older brother, William Herndon (who also served as Lincoln's secretary during the Civil War).
175
 
176
- Lincoln's family had been poor all their lives, and were very close to poverty when he was an infant. As a result, there are many stories about him that show how he struggled with hunger at a young age, which led him to want to be able to eat whatever was available. He often ate only enough to
177
  ##
178
  Switching to expert PubMed-Abstracts
179
- Medical journal article about ovarian cancer : A retrospective study of a population-based cohort in Northern Ireland.
 
 
 
 
 
180
 
181
- In the 1990s and early 2000, there was an increase in the number of new patients with non-ovarian malignancy seen at the National Hospital for Women's Services (Nish) Cancer Screening Service. It is likely that the increase came from a screening programme in the Northern Ireland Health and Care Plan, where people who are not in employment were offered cancer screening by a group of local health care practitioners or nurses. This approach would be appropriate outside the Northern Ireland Cancer Screening Programme and I suspect it was the practice
182
  ```
183
 
184
- ### To recreate the expert, modify this script. We can also extend to do dynamic merging and/or experitment with different weights for different layers.
 
185
 
186
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- def recreate_merged_expert():
189
- model1 = GPTNeoXExpertsForCasualLM.from_pretrained("theblackcat102/pythia-1b-deduped-sft").float()
190
-
191
- model2 = AutoModelForCausalLM.from_pretrained("stillerman/MDEL-pubmed-feelaw-github-arxiv").float()
192
-
193
- model_uspto = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-uspto").float()
194
-
195
- model_github = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-github").float()
196
- model_pubmed_abstracts = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-pubmed_abstracts").float()
197
- model_freelaw = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-freelaw").float()
198
- model_arxiv = AutoModelForCausalLM.from_pretrained("Multi-Domain-Expert-Layers/expert-arxiv").float()
199
-
200
- model = AutoModelForCausalLM.from_pretrained("theblackcat102/pythia-1b-deduped-sft").float() # half().cuda().eval()
201
-
202
- with torch.no_grad():
203
- for layer_id in [9,10]: #9,10,11,12,13
204
- model1.orig_chat[layer_id-9] = model.gpt_neox.layers[layer_id]
205
-
206
- for layer_id in [9,10]: #9,10,11,12,13
207
- for p1, p2, p3 in zip(model1.gpt_neox.layers[layer_id].parameters(), model2.gpt_neox.layers[layer_id].parameters(), model_uspto.gpt_neox.layers[layer_id].parameters()):
208
- p1.data = p1.data*.6 + p2.data*0.3 + p3.data*0.1
209
- model1.merged_chat_expert[layer_id-9] = model1.gpt_neox.layers[layer_id]
210
-
211
- #model1.uspto_expert.layers_9_10_11 = []
212
- for layer_id in [9,10]: #9,10,11,12,13
213
- for p1, p2 in zip(model_uspto.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
214
- p1.data = p1.data*.6 + p2.data*0.4
215
- model1.uspto_expert[layer_id-9] = model_uspto.gpt_neox.layers[layer_id]
216
-
217
- #model1.github_expert.layers_9_10_11 = []
218
- for layer_id in [9,10]: #9,10,11,12,13
219
- for p1, p2 in zip(model_github.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
220
- p1.data = p1.data*.6 + p2.data*0.4
221
- model1.github_expert[layer_id-9] = model_github.gpt_neox.layers[layer_id]
222
-
223
- #model1.pubmed_abstracts_expert.layers_9_10_11 = []
224
- for layer_id in [9,10]: #9,10,11,12,13
225
- for p1, p2 in zip(model_pubmed_abstracts.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
226
- p1.data = p1.data*.6 + p2.data*0.4
227
- model1.pubmed_abstracts_expert[layer_id-9] = model_pubmed_abstracts.gpt_neox.layers[layer_id]
228
-
229
- #model1.freelaw_expert.layers_9_10_11 = []
230
- for layer_id in [9,10]: #9,10,11,12,13
231
- for p1, p2 in zip(model_freelaw.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
232
- p1.data = p1.data*.6 + p2.data*0.4
233
- model1.freelaw_expert[layer_id-9] = model_freelaw.gpt_neox.layers[layer_id]
234
-
235
- #model1.arxiv_expert.layers_9_10_11 = []
236
- for layer_id in [9,10]: #9,10,11,12,13
237
- for p1, p2 in zip(model_arxiv.gpt_neox.layers[layer_id].parameters(), model.gpt_neox.layers[layer_id].parameters()):
238
- p1.data = p1.data*.6 + p2.data*0.4
239
- model1.arxiv_expert[layer_id-9] = model_arxiv.gpt_neox.layers[layer_id]
240
-
241
-
242
-
243
- model1 = model1.half().eval()
244
- model1.save_pretrained("MDEL-theblackcat-chat-5-experts", torch_dtype=torch.float16)
245
- model1.push_to_hub("Multi-Domain-Expert-Layers/MDEL-theblackcat-chat-5-experts")
246
- return model1
247
- ```
 
21
 
22
  - [Try demo on colab](https://colab.research.google.com/drive/1GgB8H30L5r0N--gexdEweK5f1yJfxMd_?usp=sharing)
23
 
24
+ ## Using the automatic routing:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  ```
27
+ #Example with reverse=False:
28
+
29
+ [2, 1, -1, -2, -3, -4, -5, -6, -7, -8, -9, -
30
  ##
31
+ Switching to expert USPTO-Backgrounds
32
  Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
33
+ Give me a patent for shoes made of water :
34
+ Field of the Invention: A shoe that is waterproof and water resistant.
35
+
36
+ Description: The shoe is waterproof and resistant to water, but it can be easily washed with soap or water. It also has a rubber-like coating on the bottom to make it waterproof. The rubber-like coating helps the shoe to stay dry when wet.
37
+
38
+ The rubber-like coating is used in the lining of a shoe to keep the shoe from getting damaged by water. The rubber-coated lining prevents the shoe from being scratched by any sharp objects.
39
+
40
+ The
41
  ##
42
+ Switching to expert FreeLaw
43
  Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
44
+ Court case of Nguyen vs. State of California : The California Supreme Court ruled that the "unreasonable search" clause in the federal constitution is not a violation of the Fourth Amendment, but rather a violation of the Fifth Amendment.
45
 
46
+ The court said that the "unconstitutional search" clause in Article I, Section 20, of the Constitution, which says that any person may be arrested without a warrant and without a warrant for any purpose, except when there is probable cause to believe that they have committed a crime, is a violation of the 5th Amendment, which says that no person shall be arrested without a *w
47
  ##
48
  Switching to expert PubMed-Abstracts
49
+ Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
50
+ Medical journal article about ovarian cancer :
51
+
52
+ _URL_0_
53
+
54
+ This is a very common thing in the medical field. It's called "endometrial cancer" and it's not actually a cancer, but it's a benign tumor that can be removed by a simple surgical procedure. The reason this is so common is because there are many different types of cancers that can cause this. There are two main types of cancer that I think are most commonly known as "endometrioid" and "endometroid". Endometrioid cancer is the one you're talking about here.
55
 
 
56
  ```
57
 
58
+
59
+ ## Using the original chat model:
60
 
61
  ```
62
+ Compared to original chat model
63
+ ##
64
+ Switching to expert OrigChat
65
+ Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
66
+ source code for sorting a list :
67
+ sort(list, key=lambda x: x[1], reverse=True) #Warning: this code generation is experimental. Please inspect the code for any bugs before executing.
68
+ ##
69
+ Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
70
+ Give me a patent for shoes made of water :
71
+ Field of the Invention: The invention relates to a shoe with a sole and an upper, wherein the sole is formed by a first layer of material and the upper is formed by a second layer of material.
72
+ ##
73
+ Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
74
+ Court case of Nguyen vs. State of California :
75
 
76
+ The court ruled that the state's ban on same-sex marriage was unconstitutional, and that it had to be overturned by a higher court. The ruling came in a lawsuit brought by two gay men who were denied the right to marry because of their sexual orientation.
77
+ ##
78
+ Medical journal article about ovarian cancer :
79
+
80
+ _URL_0_
81
+ ```