anilbhatt1 commited on
Commit
53ea9bf
1 Parent(s): 0776f01

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +7 -5
tokenizer.py CHANGED
@@ -15,12 +15,14 @@ class Tokenizer:
15
  self.bos_id = None
16
  self.eos_id = None
17
 
 
 
 
 
 
 
 
18
  # some checkpoints have both files, `.model` takes precedence
19
- print(f'tokenizer.py checkpoint_dir is : {checkpoint_dir}')
20
- print(f'checking the file : {(checkpoint_dir / "tokenizer.json").is_file()}')
21
- print(f'Current working directory is : {os.getcwd()}')
22
- curr_dir = os.getcwd()
23
- print(f'contents in pwd are : {os.listdir(curr_dir)}')
24
  if (vocabulary_path := checkpoint_dir / "tokenizer.model").is_file():
25
  from sentencepiece import SentencePieceProcessor
26
 
 
15
  self.bos_id = None
16
  self.eos_id = None
17
 
18
+ # Debug statements
19
+ # print(f'tokenizer.py checkpoint_dir is : {checkpoint_dir}')
20
+ # print(f'checking the file : {(checkpoint_dir / "tokenizer.json").is_file()}')
21
+ # print(f'Current working directory is : {os.getcwd()}')
22
+ # curr_dir = os.getcwd()
23
+ # print(f'contents in pwd are : {os.listdir(curr_dir)}')
24
+
25
  # some checkpoints have both files, `.model` takes precedence
 
 
 
 
 
26
  if (vocabulary_path := checkpoint_dir / "tokenizer.model").is_file():
27
  from sentencepiece import SentencePieceProcessor
28