ligeti commited on
Commit
a424266
·
verified ·
1 Parent(s): 3211482

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -17
README.md CHANGED
@@ -22,24 +22,17 @@ ProkBERT-mini (kmer=6, shift=1) is part of the ProkBERT family of genomic langua
22
  The following example demonstrates how to use the ProkBERT-mini model for processing a DNA sequence:
23
 
24
  ```python
25
- from transformers import MegatronBertForMaskedLM
26
- from prokbert.prokbert_tokenizer import ProkBERTTokenizer
27
 
28
- # Tokenization parameters
29
- tokenization_parameters = {
30
- 'kmer': 6,
31
- 'shift': 1
32
- }
33
- # Initialize the tokenizer and model
34
- tokenizer = ProkBERTTokenizer(tokenization_params=tokenization_parameters, operation_space='sequence')
35
- model = MegatronBertForMaskedLM.from_pretrained("neuralbioinfo/prokbert-mini")
36
- # Example DNA sequence
37
- sequence = 'ATGTCCGCGGGACCT'
38
- # Tokenize the sequence
39
- inputs = tokenizer(sequence, return_tensors="pt")
40
- # Ensure that inputs have a batch dimension
41
- inputs = {key: value.unsqueeze(0) for key, value in inputs.items()}
42
- # Generate outputs from the model
43
  outputs = model(**inputs)
44
  ```
45
 
 
22
  The following example demonstrates how to use the ProkBERT-mini model for processing a DNA sequence:
23
 
24
  ```python
25
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
 
26
 
27
+ tokenizer = AutoTokenizer.from_pretrained("neuralbioinfo/prokbert-mini", trust_remote_code=True)
28
+ model = AutoModel.from_pretrained("neuralbioinfo/prokbert-mini", trust_remote_code=True)
29
+
30
+ segment = "ATGTCCGCGGGACCT"
31
+
32
+ # Tokenize the input and return as PyTorch tensors
33
+ inputs = tokenizer(segment, return_tensors="pt")
34
+
35
+ # Pass the tokenized input to the model
 
 
 
 
 
 
36
  outputs = model(**inputs)
37
  ```
38