technicolor commited on
Commit
086ec01
1 Parent(s): bf10854

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +113 -0
README.md CHANGED
@@ -7,3 +7,116 @@ To do:
7
  2. Re-define a more effective concatenation.
8
  3. Adopt AnglE to finetune the tiny-llama.
9
  4. Loss function.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  2. Re-define a more effective concatenation.
8
  3. Adopt AnglE to finetune the tiny-llama.
9
  4. Loss function.
10
+
11
+ To run TE_Embedding model:
12
+ `import os
13
+ from transformers import (AutoConfig,
14
+ AutoTokenizer,AutoModelForCausalLM
15
+ )
16
+ import torch
17
+ import torch.nn.functional as F
18
+ import numpy as np
19
+
20
+
21
+ class TEmbeddingModel(torch.nn.Module):
22
+ def __init__(self, model_name_or_path):
23
+ super(TEmbeddingModel, self).__init__()
24
+ self.prompt_prefix = "Reading the below text and answer questions:\n"
25
+ self.prompt_suffixes = ["\n1.One word to summarize the above text:",
26
+ "\n2.The deeper meaning of the above text:"]
27
+ self.hidden_size = 2048 #depends on the model
28
+ self.model_name_or_path = model_name_or_path
29
+ self.linear_suffixes = torch.nn.ModuleList(
30
+ [torch.nn.Linear(self.hidden_size, self.hidden_size//len(self.prompt_suffixes))
31
+ for _ in range(len(self.prompt_suffixes))])
32
+ self.tokenizer, self.llama = self.load_llama()
33
+ self.device = torch.device('cuda')
34
+ self.tanh = torch.nn.Tanh()
35
+ self.suffixes_ids = []
36
+ self.suffixes_ids_len = []
37
+ self.suffixes_len = 0
38
+ for suffix in self.prompt_suffixes:
39
+ ids = self.tokenizer(suffix, return_tensors="pt")["input_ids"].tolist()[0]
40
+ self.suffixes_ids += ids
41
+ self.suffixes_ids_len.append(len(ids))
42
+ self.suffixes_len += len(ids)
43
+
44
+ self.suffixes_ones = torch.ones(self.suffixes_len)
45
+ self.suffixes_ids = torch.tensor(self.suffixes_ids)
46
+
47
+ linear_file = ".//TE//linears"
48
+ load_layers = torch.load(linear_file)
49
+ model_state = self.state_dict()
50
+ model_state.update(load_layers)
51
+ self.load_state_dict(model_state, strict=False)
52
+
53
+ def load_llama(self):
54
+ llm_path = os.path.join(self.model_name_or_path)
55
+ config = AutoConfig.from_pretrained(llm_path)
56
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
57
+ tokenizer.padding_side = "left"
58
+ model = AutoModelForCausalLM.from_pretrained(
59
+ llm_path,
60
+ config=config,
61
+ low_cpu_mem_usage=True,
62
+ device_map="auto",
63
+ )
64
+ model.config.use_cache = False
65
+
66
+ if tokenizer.pad_token is None:
67
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
68
+ model.resize_token_embeddings(len(tokenizer))
69
+ return tokenizer, model
70
+
71
+ def forward(self, sentences):
72
+ prompts_embeddings = []
73
+ sentences = [self.prompt_prefix + s for s in sentences] #concat前缀
74
+ inputs = self.tokenizer(sentences, max_length=256, padding=True, truncation=True,
75
+ return_tensors='pt')
76
+ attention_mask = inputs["attention_mask"]
77
+ input_ids = inputs["input_ids"]
78
+ batch_size = len(sentences)
79
+ suffixes_ones = self.suffixes_ones.unsqueeze(0)
80
+ suffixes_ones = suffixes_ones.repeat(batch_size, 1)
81
+ device = next(self.parameters()).device
82
+ attention_mask = torch.cat([attention_mask, suffixes_ones], dim=-1).to('cuda')
83
+
84
+ suffixes_ids = self.suffixes_ids.unsqueeze(0)
85
+ suffixes_ids = suffixes_ids.repeat(batch_size, 1)
86
+ input_ids = torch.cat([input_ids, suffixes_ids], dim=-1).to('cuda')
87
+ last_hidden_state = self.llama.base_model(attention_mask=attention_mask, input_ids=input_ids).last_hidden_state.to('cuda')
88
+ index = -1
89
+ for i in range(len(self.suffixes_ids_len)):
90
+ embedding = last_hidden_state[:, index, :]
91
+ embedding = self.linear_suffixes[i](embedding)
92
+ prompts_embeddings.append(embedding)
93
+ index -= self.suffixes_ids_len[-i-1]
94
+
95
+ output_embedding = torch.cat(prompts_embeddings, dim=-1)
96
+ output_embedding = self.tanh(output_embedding)
97
+ output_embedding = F.normalize(output_embedding, p=2, dim=1)
98
+ return output_embedding
99
+
100
+ def encode(self, sentences, batch_size=10, **kwargs):
101
+ size = len(sentences)
102
+ embeddings = None
103
+ handled = 0
104
+ while handled < size:
105
+ tokens = sentences[handled:handled + batch_size]
106
+ output_embeddings = self.forward(tokens)
107
+ result = output_embeddings.detach().cpu().numpy()
108
+ handled += result.shape[0] # <=10
109
+ if embeddings is not None:
110
+ embeddings = np.concatenate((embeddings, result), axis=0)
111
+ else:
112
+ embeddings = result
113
+ return embeddings
114
+
115
+ if __name__ == "__main__":
116
+ TE_model = TEmbeddingModel("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
117
+ TE_model.eval()
118
+ with torch.no_grad():
119
+ output = TE_model(["Hello", "Nice to meet you"])
120
+ cos_sim = F.cosine_similarity(output[0],output[1],dim=0)
121
+ print(cos_sim)
122
+ `