edugp commited on
Commit
18e3201
1 Parent(s): 76366ce

Try using a KenLM model trie binary version, if not found try using the hash table binary version

Browse files
Files changed (1) hide show
  1. perplexity_lenses/perplexity.py +11 -4
perplexity_lenses/perplexity.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import re
3
  import unicodedata
4
  from typing import Dict
 
5
 
6
  import kenlm
7
  import sentencepiece
@@ -178,10 +179,16 @@ class KenlmModel:
178
  return self.non_printing_chars_re.sub("", text)
179
 
180
  def download_kenlm_model(self, model_dataset: str, language: str):
181
- kenlm_model_url = hf_hub_url(
182
- KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.bin"
183
- )
184
- self.kenlm_model_dir = cached_download(kenlm_model_url)
 
 
 
 
 
 
185
  sentence_piece_model_url = hf_hub_url(
186
  KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.sp.model"
187
  )
2
  import re
3
  import unicodedata
4
  from typing import Dict
5
+ from requests.exceptions import HTTPError
6
 
7
  import kenlm
8
  import sentencepiece
179
  return self.non_printing_chars_re.sub("", text)
180
 
181
  def download_kenlm_model(self, model_dataset: str, language: str):
182
+ try:
183
+ kenlm_model_url = hf_hub_url(
184
+ KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.trie.bin"
185
+ )
186
+ self.kenlm_model_dir = cached_download(kenlm_model_url)
187
+ except HTTPError:
188
+ kenlm_model_url = hf_hub_url(
189
+ KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.bin"
190
+ )
191
+ self.kenlm_model_dir = cached_download(kenlm_model_url)
192
  sentence_piece_model_url = hf_hub_url(
193
  KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.sp.model"
194
  )