call tokenizer directly

#1
by johngiorgi - opened
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -123,7 +123,7 @@ papers = [{'title': 'BERT', 'abstract': 'We introduce a new language representat
123
  # concatenate title and abstract
124
  text_batch = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers]
125
  # preprocess the input
126
- inputs = self.tokenizer(text_batch, padding=True, truncation=True,
127
  return_tensors="pt", return_token_type_ids=False, max_length=512)
128
  output = model(**inputs)
129
  # take the first token in the batch as the embedding
123
  # concatenate title and abstract
124
  text_batch = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers]
125
  # preprocess the input
126
+ inputs = tokenizer(text_batch, padding=True, truncation=True,
127
  return_tensors="pt", return_token_type_ids=False, max_length=512)
128
  output = model(**inputs)
129
  # take the first token in the batch as the embedding