ajanz commited on
Commit
88d581a
1 Parent(s): efae79d

testing padding impact (just for debug - will be removed later)

Browse files
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -3,7 +3,7 @@ import datasets
3
  import faiss
4
  import os
5
 
6
- from transformers import pipeline # , AutoModel, AutoTokenizer
7
 
8
 
9
  auth_token = os.environ.get("CLARIN_KNEXT")
@@ -34,20 +34,16 @@ def load_index(index_data: str = "clarin-knext/entity-linking-index"):
34
 
35
 
36
  def load_model(model_name: str = "clarin-knext/entity-linking-encoder"):
37
- # tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=auth_token)
38
- # model = AutoModel.from_pretrained(model_name, use_auth_token=auth_token)
39
- model = pipeline("feature-extraction", model="clarin-knext/entity-linking-encoder", use_auth_token=auth_token)
40
- # return tokenizer, model
41
  return model
42
 
43
 
44
- # tokenizer, model = load_model()
45
  model = load_model()
46
  index = load_index()
47
 
48
 
49
  def predict(text: str = sample_text, top_k: int=3):
50
- # query = prepare_query(tokenizer, text)
51
  index_data, faiss_index = index
52
  # takes only the [CLS] embedding (for now)
53
  query = model(text, return_tensors='pt')[0][0].numpy().reshape(1, -1)
 
3
  import faiss
4
  import os
5
 
6
+ from transformers import pipeline
7
 
8
 
9
  auth_token = os.environ.get("CLARIN_KNEXT")
 
34
 
35
 
36
  def load_model(model_name: str = "clarin-knext/entity-linking-encoder"):
37
+ model = pipeline("feature-extraction", model=model_name, use_auth_token=auth_token)
 
 
 
38
  return model
39
 
40
 
 
41
  model = load_model()
42
  index = load_index()
43
 
44
 
45
  def predict(text: str = sample_text, top_k: int=3):
46
+ text = text + "".join(['[PAD]' * 252])
47
  index_data, faiss_index = index
48
  # takes only the [CLS] embedding (for now)
49
  query = model(text, return_tensors='pt')[0][0].numpy().reshape(1, -1)