h4duan
/

PAIR-esm2

Feature Extraction

Transformers

Safetensors

esm

Inference Endpoints

Model card Files Files and versions Community

h4duan commited on Jul 30

Commit

732b248

•

1 Parent(s): bf1606e

Update README.md

Browse files

Files changed (1) hide show

README.md +10 -6

README.md CHANGED Viewed

@@ -11,16 +11,18 @@ The model can be used for feature extractions in protein function prediction tas
 ```python
 from transformers import AutoTokenizer, AutoModel
 tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")
-model = AutoModel.from_pretrained("h4duan/PAIR-esm2")
 protein = ["AETCZAO"]
 def extract_feature(protein):
   ids = tokenizer(protein, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
-  input_ids = torch.tensor(ids['input_ids']).to(self.device)
-  attention_mask = torch.tensor(ids['attention_mask']).to(self.device)
   with torch.no_grad():
     embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
-  return torch.mean(embedding_repr)
 ```
 <h1>How to extract the features in batch?</h1>
@@ -29,8 +31,8 @@ def extract_feature(protein):
 proteins = ["AETCZAO","SKTZP"]
 def extract_features_batch(proteins):
   ids = tokenizer(proteins, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
-  input_ids = torch.tensor(ids['input_ids']).to(self.device)
-  attention_mask = torch.tensor(ids['attention_mask']).to(self.device)
   with torch.no_grad():
     embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
   attention_mask = attention_mask.unsqueeze(-1)
@@ -40,4 +42,6 @@ def extract_features_batch(proteins):
   non_zero_count = attention_mask.sum(dim=1)
   mean_embedding_repr = sum_embedding_repr / non_zero_count
   return mean_embedding_repr
 ```

 ```python
 from transformers import AutoTokenizer, AutoModel
 tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")
+model = AutoModel.from_pretrained("h4duan/PAIR-esm2").to("cuda")
 protein = ["AETCZAO"]
 def extract_feature(protein):
   ids = tokenizer(protein, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
+  input_ids = torch.tensor(ids['input_ids']).to("cuda")
+  attention_mask = torch.tensor(ids['attention_mask']).to("cuda")
   with torch.no_grad():
     embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
+  return torch.mean(embedding_repr, dim=1)
+feature = extract_feature(protein)
 ```
 <h1>How to extract the features in batch?</h1>
 proteins = ["AETCZAO","SKTZP"]
 def extract_features_batch(proteins):
   ids = tokenizer(proteins, return_tensors="pt", padding=True, max_length=1024, truncation=True, return_attention_mask=True)
+  input_ids = torch.tensor(ids['input_ids']).to("cuda")
+  attention_mask = torch.tensor(ids['attention_mask']).to("cuda")
   with torch.no_grad():
     embedding_repr = model(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state
   attention_mask = attention_mask.unsqueeze(-1)
   non_zero_count = attention_mask.sum(dim=1)
   mean_embedding_repr = sum_embedding_repr / non_zero_count
   return mean_embedding_repr
+feature = extract_features_batch(proteins)
 ```