Update README.md
#2
by
zhichao-geng
- opened
README.md
CHANGED
@@ -40,7 +40,6 @@ import itertools
|
|
40 |
import torch
|
41 |
|
42 |
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
43 |
-
from transformers.utils import cached_path,hf_bucket_url
|
44 |
|
45 |
|
46 |
# get sparse vector from dense vectors with shape batch_size * seq_len * vocab_size
|
@@ -67,8 +66,8 @@ def transform_sparse_vector_to_dict(sparse_vector):
|
|
67 |
|
68 |
# download the idf file from model hub. idf is used to give weights for query tokens
|
69 |
def get_tokenizer_idf(tokenizer):
|
70 |
-
|
71 |
-
local_cached_path =
|
72 |
with open(local_cached_path) as f:
|
73 |
idf = json.load(f)
|
74 |
idf_vector = [0]*tokenizer.vocab_size
|
|
|
40 |
import torch
|
41 |
|
42 |
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
|
|
43 |
|
44 |
|
45 |
# get sparse vector from dense vectors with shape batch_size * seq_len * vocab_size
|
|
|
66 |
|
67 |
# download the idf file from model hub. idf is used to give weights for query tokens
|
68 |
def get_tokenizer_idf(tokenizer):
|
69 |
+
from huggingface_hub import hf_hub_download
|
70 |
+
local_cached_path = hf_hub_download(repo_id="opensearch-project/opensearch-neural-sparse-encoding-doc-v1", filename="idf.json")
|
71 |
with open(local_cached_path) as f:
|
72 |
idf = json.load(f)
|
73 |
idf_vector = [0]*tokenizer.vocab_size
|