Commit
•
f16bcc1
1
Parent(s):
1321090
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
library_name: transformers
|
6 |
+
tags:
|
7 |
+
- esm
|
8 |
+
- esm2
|
9 |
+
- protein language model
|
10 |
+
- biology
|
11 |
+
---
|
12 |
+
|
13 |
+
To use try running:
|
14 |
+
```python
|
15 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
16 |
+
import torch
|
17 |
+
|
18 |
+
# Initialize the tokenizer and model
|
19 |
+
model_path_directory = "AmelieSchreiber/esm2_t6_8M_UR50D-finetuned-localization"
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path_directory)
|
21 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path_directory)
|
22 |
+
|
23 |
+
# Define a function to predict the category of a protein sequence
|
24 |
+
def predict_category(sequence):
|
25 |
+
# Tokenize the sequence and convert it to tensor format
|
26 |
+
inputs = tokenizer(sequence, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
|
27 |
+
|
28 |
+
# Make prediction
|
29 |
+
with torch.no_grad():
|
30 |
+
logits = model(**inputs).logits
|
31 |
+
|
32 |
+
# Determine the category with the highest score
|
33 |
+
predicted_class = torch.argmax(logits, dim=1).item()
|
34 |
+
|
35 |
+
# Return the category: 0 for cytosolic, 1 for membrane
|
36 |
+
return "cytosolic" if predicted_class == 0 else "membrane"
|
37 |
+
|
38 |
+
# Example sequence
|
39 |
+
new_protein_sequence = "MTQRAGAAMLPSALLLLCVPGCLTVSGPSTVMGAVGESLSVQCRYEEKYKTFNKYWCRQPCLPIWHEMVETGGSEGVVRSDQVIITDHPGDLTFTVTLENLTADDAGKYRCGIATILQEDGLSGFLPDPFFQVQVLVSSASSTENSVKTPASPTRPSQCQGSLPSSTCFLLLPLLKVPLLLSILGAILWVNRPWRTPWTES"
|
40 |
+
|
41 |
+
# Predict the category
|
42 |
+
category = predict_category(new_protein_sequence)
|
43 |
+
print(f"The predicted category for the sequence is: {category}")
|
44 |
+
```
|