ubuntu commited on
Commit
90620f9
1 Parent(s): 6d46819
Files changed (2) hide show
  1. app.py +0 -3
  2. clip/simple_tokenizer.py +3 -1
app.py CHANGED
@@ -17,9 +17,6 @@ def _handle_fd_solve(img_path: str):
17
  if img_path is None:
18
  raise gr.Error("Please upload file completely!")
19
 
20
- # gzip
21
- os.system("gzip clip/bpe_simple_vocab_16e6.txt")
22
-
23
  # Begin solve and record the solving time
24
  start_time = time.time()
25
  detect(
 
17
  if img_path is None:
18
  raise gr.Error("Please upload file completely!")
19
 
 
 
 
20
  # Begin solve and record the solving time
21
  start_time = time.time()
22
  detect(
clip/simple_tokenizer.py CHANGED
@@ -6,7 +6,6 @@ from functools import lru_cache
6
  import ftfy
7
  import regex as re
8
 
9
-
10
  @lru_cache()
11
  def default_bpe():
12
  return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
@@ -63,6 +62,9 @@ class SimpleTokenizer(object):
63
  def __init__(self, bpe_path: str = default_bpe()):
64
  self.byte_encoder = bytes_to_unicode()
65
  self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
 
 
 
66
  merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
67
  merges = merges[1:49152-256-2+1]
68
  merges = [tuple(merge.split()) for merge in merges]
 
6
  import ftfy
7
  import regex as re
8
 
 
9
  @lru_cache()
10
  def default_bpe():
11
  return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
 
62
  def __init__(self, bpe_path: str = default_bpe()):
63
  self.byte_encoder = bytes_to_unicode()
64
  self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
65
+ if not os.system(bpe_path):
66
+ txt_path = bpe_path.replace(".gz", ".txt")
67
+ os.system(f"gzip {txt_path}")
68
  merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
69
  merges = merges[1:49152-256-2+1]
70
  merges = [tuple(merge.split()) for merge in merges]