Pendrokar commited on
Commit
6755c50
1 Parent(s): 2b5c8ca

huggingface_hub to download model and vocab

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -10,13 +10,12 @@ import numpy as np
10
 
11
  from torchmoji.sentence_tokenizer import SentenceTokenizer
12
  from torchmoji.model_def import torchmoji_emojis
13
- from transformers import AutoModel, AutoTokenizer
 
 
14
  model_name = "Pendrokar/TorchMoji"
15
- model = AutoModel.from_pretrained(model_name, cache_dir="~/.cache/huggingface/hub/")
16
- model.save_pretrained("~/.cache/huggingface/hub/TorchMoji/pytorch_model.bin")
17
- tokenizer = AutoTokenizer.from_pretrained(model_name)
18
- model_path = "~/.cache/huggingface/hub/TorchMoji/pytorch_model.bin"
19
- vocab_path = './' + model_name + "/vocabulary.json"
20
 
21
  def top_elements(array, k):
22
  ind = np.argpartition(array, -k)[-k:]
@@ -24,6 +23,10 @@ def top_elements(array, k):
24
 
25
  maxlen = 30
26
 
 
 
 
 
27
  st = SentenceTokenizer(tokenizer.get_added_vocab(), maxlen)
28
 
29
  model = torchmoji_emojis(model_path)
 
10
 
11
  from torchmoji.sentence_tokenizer import SentenceTokenizer
12
  from torchmoji.model_def import torchmoji_emojis
13
+
14
+ from huggingface_hub import hf_hub_download
15
+
16
  model_name = "Pendrokar/TorchMoji"
17
+ model_path = hf_hub_download(repo_id=model_name, filename="pytorch_model.bin")
18
+ vocab_path = hf_hub_download(repo_id=model_name, filename="vocabulary.json")
 
 
 
19
 
20
  def top_elements(array, k):
21
  ind = np.argpartition(array, -k)[-k:]
 
23
 
24
  maxlen = 30
25
 
26
+ print('Tokenizing using dictionary from {}'.format(vocab_path))
27
+ with open(vocab_path, 'r') as f:
28
+ vocabulary = json.load(f)
29
+
30
  st = SentenceTokenizer(tokenizer.get_added_vocab(), maxlen)
31
 
32
  model = torchmoji_emojis(model_path)