AmelieSchreiber commited on
Commit
f16bcc1
1 Parent(s): 1321090

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -0
README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ library_name: transformers
6
+ tags:
7
+ - esm
8
+ - esm2
9
+ - protein language model
10
+ - biology
11
+ ---
12
+
13
+ To use try running:
14
+ ```python
15
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
16
+ import torch
17
+
18
+ # Initialize the tokenizer and model
19
+ model_path_directory = "AmelieSchreiber/esm2_t6_8M_UR50D-finetuned-localization"
20
+ tokenizer = AutoTokenizer.from_pretrained(model_path_directory)
21
+ model = AutoModelForSequenceClassification.from_pretrained(model_path_directory)
22
+
23
+ # Define a function to predict the category of a protein sequence
24
+ def predict_category(sequence):
25
+ # Tokenize the sequence and convert it to tensor format
26
+ inputs = tokenizer(sequence, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
27
+
28
+ # Make prediction
29
+ with torch.no_grad():
30
+ logits = model(**inputs).logits
31
+
32
+ # Determine the category with the highest score
33
+ predicted_class = torch.argmax(logits, dim=1).item()
34
+
35
+ # Return the category: 0 for cytosolic, 1 for membrane
36
+ return "cytosolic" if predicted_class == 0 else "membrane"
37
+
38
+ # Example sequence
39
+ new_protein_sequence = "MTQRAGAAMLPSALLLLCVPGCLTVSGPSTVMGAVGESLSVQCRYEEKYKTFNKYWCRQPCLPIWHEMVETGGSEGVVRSDQVIITDHPGDLTFTVTLENLTADDAGKYRCGIATILQEDGLSGFLPDPFFQVQVLVSSASSTENSVKTPASPTRPSQCQGSLPSSTCFLLLPLLKVPLLLSILGAILWVNRPWRTPWTES"
40
+
41
+ # Predict the category
42
+ category = predict_category(new_protein_sequence)
43
+ print(f"The predicted category for the sequence is: {category}")
44
+ ```