h4duan commited on
Commit
732b248
1 Parent(s): bf1606e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -6
README.md CHANGED
@@ -11,16 +11,18 @@ The model can be used for feature extractions in protein function prediction tas
11
  ```python
12
  from transformers import AutoTokenizer, AutoModel
13
  tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")
14
- model = AutoModel.from_pretrained("h4duan/PAIR-esm2")
15
  protein = ["AETCZAO"]
16
 
17
  def extract_feature(protein):
18
  ids = tokenizer(protein, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
19
- input_ids = torch.tensor(ids['input_ids']).to(self.device)
20
- attention_mask = torch.tensor(ids['attention_mask']).to(self.device)
21
  with torch.no_grad():
22
  embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
23
- return torch.mean(embedding_repr)
 
 
24
  ```
25
 
26
  <h1>How to extract the features in batch?</h1>
@@ -29,8 +31,8 @@ def extract_feature(protein):
29
  proteins = ["AETCZAO","SKTZP"]
30
  def extract_features_batch(proteins):
31
  ids = tokenizer(proteins, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
32
- input_ids = torch.tensor(ids['input_ids']).to(self.device)
33
- attention_mask = torch.tensor(ids['attention_mask']).to(self.device)
34
  with torch.no_grad():
35
  embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
36
  attention_mask = attention_mask.unsqueeze(-1)
@@ -40,4 +42,6 @@ def extract_features_batch(proteins):
40
  non_zero_count = attention_mask.sum(dim=1)
41
  mean_embedding_repr = sum_embedding_repr / non_zero_count
42
  return mean_embedding_repr
 
 
43
  ```
 
11
  ```python
12
  from transformers import AutoTokenizer, AutoModel
13
  tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")
14
+ model = AutoModel.from_pretrained("h4duan/PAIR-esm2").to("cuda")
15
  protein = ["AETCZAO"]
16
 
17
  def extract_feature(protein):
18
  ids = tokenizer(protein, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
19
+ input_ids = torch.tensor(ids['input_ids']).to("cuda")
20
+ attention_mask = torch.tensor(ids['attention_mask']).to("cuda")
21
  with torch.no_grad():
22
  embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
23
+ return torch.mean(embedding_repr, dim=1)
24
+
25
+ feature = extract_feature(protein)
26
  ```
27
 
28
  <h1>How to extract the features in batch?</h1>
 
31
  proteins = ["AETCZAO","SKTZP"]
32
  def extract_features_batch(proteins):
33
  ids = tokenizer(proteins, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
34
+ input_ids = torch.tensor(ids['input_ids']).to("cuda")
35
+ attention_mask = torch.tensor(ids['attention_mask']).to("cuda")
36
  with torch.no_grad():
37
  embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
38
  attention_mask = attention_mask.unsqueeze(-1)
 
42
  non_zero_count = attention_mask.sum(dim=1)
43
  mean_embedding_repr = sum_embedding_repr / non_zero_count
44
  return mean_embedding_repr
45
+
46
+ feature = extract_features_batch(proteins)
47
  ```