technicolor
commited on
Commit
•
086ec01
1
Parent(s):
bf10854
Update README.md
Browse files
README.md
CHANGED
@@ -7,3 +7,116 @@ To do:
|
|
7 |
2. Re-define a more effective concatenation.
|
8 |
3. Adopt AnglE to finetune the tiny-llama.
|
9 |
4. Loss function.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
2. Re-define a more effective concatenation.
|
8 |
3. Adopt AnglE to finetune the tiny-llama.
|
9 |
4. Loss function.
|
10 |
+
|
11 |
+
To run TE_Embedding model:
|
12 |
+
`import os
|
13 |
+
from transformers import (AutoConfig,
|
14 |
+
AutoTokenizer,AutoModelForCausalLM
|
15 |
+
)
|
16 |
+
import torch
|
17 |
+
import torch.nn.functional as F
|
18 |
+
import numpy as np
|
19 |
+
|
20 |
+
|
21 |
+
class TEmbeddingModel(torch.nn.Module):
|
22 |
+
def __init__(self, model_name_or_path):
|
23 |
+
super(TEmbeddingModel, self).__init__()
|
24 |
+
self.prompt_prefix = "Reading the below text and answer questions:\n"
|
25 |
+
self.prompt_suffixes = ["\n1.One word to summarize the above text:",
|
26 |
+
"\n2.The deeper meaning of the above text:"]
|
27 |
+
self.hidden_size = 2048 #depends on the model
|
28 |
+
self.model_name_or_path = model_name_or_path
|
29 |
+
self.linear_suffixes = torch.nn.ModuleList(
|
30 |
+
[torch.nn.Linear(self.hidden_size, self.hidden_size//len(self.prompt_suffixes))
|
31 |
+
for _ in range(len(self.prompt_suffixes))])
|
32 |
+
self.tokenizer, self.llama = self.load_llama()
|
33 |
+
self.device = torch.device('cuda')
|
34 |
+
self.tanh = torch.nn.Tanh()
|
35 |
+
self.suffixes_ids = []
|
36 |
+
self.suffixes_ids_len = []
|
37 |
+
self.suffixes_len = 0
|
38 |
+
for suffix in self.prompt_suffixes:
|
39 |
+
ids = self.tokenizer(suffix, return_tensors="pt")["input_ids"].tolist()[0]
|
40 |
+
self.suffixes_ids += ids
|
41 |
+
self.suffixes_ids_len.append(len(ids))
|
42 |
+
self.suffixes_len += len(ids)
|
43 |
+
|
44 |
+
self.suffixes_ones = torch.ones(self.suffixes_len)
|
45 |
+
self.suffixes_ids = torch.tensor(self.suffixes_ids)
|
46 |
+
|
47 |
+
linear_file = ".//TE//linears"
|
48 |
+
load_layers = torch.load(linear_file)
|
49 |
+
model_state = self.state_dict()
|
50 |
+
model_state.update(load_layers)
|
51 |
+
self.load_state_dict(model_state, strict=False)
|
52 |
+
|
53 |
+
def load_llama(self):
|
54 |
+
llm_path = os.path.join(self.model_name_or_path)
|
55 |
+
config = AutoConfig.from_pretrained(llm_path)
|
56 |
+
tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
|
57 |
+
tokenizer.padding_side = "left"
|
58 |
+
model = AutoModelForCausalLM.from_pretrained(
|
59 |
+
llm_path,
|
60 |
+
config=config,
|
61 |
+
low_cpu_mem_usage=True,
|
62 |
+
device_map="auto",
|
63 |
+
)
|
64 |
+
model.config.use_cache = False
|
65 |
+
|
66 |
+
if tokenizer.pad_token is None:
|
67 |
+
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
68 |
+
model.resize_token_embeddings(len(tokenizer))
|
69 |
+
return tokenizer, model
|
70 |
+
|
71 |
+
def forward(self, sentences):
|
72 |
+
prompts_embeddings = []
|
73 |
+
sentences = [self.prompt_prefix + s for s in sentences] #concat前缀
|
74 |
+
inputs = self.tokenizer(sentences, max_length=256, padding=True, truncation=True,
|
75 |
+
return_tensors='pt')
|
76 |
+
attention_mask = inputs["attention_mask"]
|
77 |
+
input_ids = inputs["input_ids"]
|
78 |
+
batch_size = len(sentences)
|
79 |
+
suffixes_ones = self.suffixes_ones.unsqueeze(0)
|
80 |
+
suffixes_ones = suffixes_ones.repeat(batch_size, 1)
|
81 |
+
device = next(self.parameters()).device
|
82 |
+
attention_mask = torch.cat([attention_mask, suffixes_ones], dim=-1).to('cuda')
|
83 |
+
|
84 |
+
suffixes_ids = self.suffixes_ids.unsqueeze(0)
|
85 |
+
suffixes_ids = suffixes_ids.repeat(batch_size, 1)
|
86 |
+
input_ids = torch.cat([input_ids, suffixes_ids], dim=-1).to('cuda')
|
87 |
+
last_hidden_state = self.llama.base_model(attention_mask=attention_mask, input_ids=input_ids).last_hidden_state.to('cuda')
|
88 |
+
index = -1
|
89 |
+
for i in range(len(self.suffixes_ids_len)):
|
90 |
+
embedding = last_hidden_state[:, index, :]
|
91 |
+
embedding = self.linear_suffixes[i](embedding)
|
92 |
+
prompts_embeddings.append(embedding)
|
93 |
+
index -= self.suffixes_ids_len[-i-1]
|
94 |
+
|
95 |
+
output_embedding = torch.cat(prompts_embeddings, dim=-1)
|
96 |
+
output_embedding = self.tanh(output_embedding)
|
97 |
+
output_embedding = F.normalize(output_embedding, p=2, dim=1)
|
98 |
+
return output_embedding
|
99 |
+
|
100 |
+
def encode(self, sentences, batch_size=10, **kwargs):
|
101 |
+
size = len(sentences)
|
102 |
+
embeddings = None
|
103 |
+
handled = 0
|
104 |
+
while handled < size:
|
105 |
+
tokens = sentences[handled:handled + batch_size]
|
106 |
+
output_embeddings = self.forward(tokens)
|
107 |
+
result = output_embeddings.detach().cpu().numpy()
|
108 |
+
handled += result.shape[0] # <=10
|
109 |
+
if embeddings is not None:
|
110 |
+
embeddings = np.concatenate((embeddings, result), axis=0)
|
111 |
+
else:
|
112 |
+
embeddings = result
|
113 |
+
return embeddings
|
114 |
+
|
115 |
+
if __name__ == "__main__":
|
116 |
+
TE_model = TEmbeddingModel("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
117 |
+
TE_model.eval()
|
118 |
+
with torch.no_grad():
|
119 |
+
output = TE_model(["Hello", "Nice to meet you"])
|
120 |
+
cos_sim = F.cosine_similarity(output[0],output[1],dim=0)
|
121 |
+
print(cos_sim)
|
122 |
+
`
|