littleworth commited on
Commit
f757eac
1 Parent(s): c1fa9e7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -19
README.md CHANGED
@@ -26,44 +26,38 @@ import torch
26
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
27
  from joblib import load
28
 
29
- model_path = "littleworth/esm2_t6_8M_UR50D_pep2rec_cppp"
30
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
31
- tokenizer = AutoTokenizer.from_pretrained(model_path)
32
 
33
- # Load the label encoder
34
- label_encoder = load(f"{model_path}/label_encoder.joblib")
35
 
36
- # Define the input peptide sequence
37
- input_sequence = "GNLIVVGRVIMS" # Example peptide sequence
38
 
39
- # Tokenize the input sequence
 
40
  inputs = tokenizer(input_sequence, return_tensors="pt", truncation=True, padding=True)
41
 
42
- # Make the prediction
43
  with torch.no_grad():
44
  outputs = model(**inputs)
45
- logits = outputs.logits
46
- probabilities = torch.softmax(logits, dim=1)
47
- predicted_class_idx = torch.argmax(probabilities, dim=1).item()
48
 
49
- # Decode the predicted class index to the original label
50
  predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]
51
 
52
- # Get the probabilities for each class
53
  class_probabilities = probabilities.squeeze().tolist()
54
  class_labels = label_encoder.inverse_transform(range(len(class_probabilities)))
55
 
56
- # Sort the class probabilities and labels in descending order
57
- sorted_indices = sorted(range(len(class_probabilities)), key=lambda i: class_probabilities[i], reverse=True)
58
- sorted_class_labels = [class_labels[i] for i in sorted_indices]
59
- sorted_class_probabilities = [class_probabilities[i] for i in sorted_indices]
60
 
61
- # Print the predicted class and probabilities
62
  print(f"Predicted Receptor Class: {predicted_class}")
63
  print("Top 10 Class Probabilities:")
64
  for label, prob in zip(sorted_class_labels[:10], sorted_class_probabilities[:10]):
65
  print(f"{label}: {prob:.4f}")
66
 
 
67
  ```
68
 
69
  Which gives this output:
 
26
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
27
  from joblib import load
28
 
29
+ MODEL_PATH = "littleworth/esm2_t6_8M_UR50D_pep2rec_cppp"
30
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
31
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
32
 
33
+ LABEL_ENCODER_PATH = f"{MODEL_PATH}/label_encoder.joblib"
34
+ label_encoder = load(LABEL_ENCODER_PATH)
35
 
 
 
36
 
37
+ input_sequence = "GNLIVVGRVIMS"
38
+
39
  inputs = tokenizer(input_sequence, return_tensors="pt", truncation=True, padding=True)
40
 
 
41
  with torch.no_grad():
42
  outputs = model(**inputs)
43
+ probabilities = torch.softmax(outputs.logits, dim=1)
44
+ predicted_class_idx = probabilities.argmax(dim=1).item()
 
45
 
 
46
  predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]
47
 
 
48
  class_probabilities = probabilities.squeeze().tolist()
49
  class_labels = label_encoder.inverse_transform(range(len(class_probabilities)))
50
 
51
+ sorted_indices = torch.argsort(probabilities, descending=True).squeeze()
52
+ sorted_class_labels = [class_labels[i] for i in sorted_indices.tolist()]
53
+ sorted_class_probabilities = probabilities.squeeze()[sorted_indices].tolist()
 
54
 
 
55
  print(f"Predicted Receptor Class: {predicted_class}")
56
  print("Top 10 Class Probabilities:")
57
  for label, prob in zip(sorted_class_labels[:10], sorted_class_probabilities[:10]):
58
  print(f"{label}: {prob:.4f}")
59
 
60
+
61
  ```
62
 
63
  Which gives this output: