allenai
/

specter2

call tokenizer directly

by johngiorgi - opened Aug 28, 2023

←

Files changed (1) hide show

README.md CHANGED Viewed

@@ -123,7 +123,7 @@ papers = [{'title': 'BERT', 'abstract': 'We introduce a new language representat
 # concatenate title and abstract
 text_batch = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers]
 # preprocess the input
-inputs = self.tokenizer(text_batch, padding=True, truncation=True,
                                    return_tensors="pt", return_token_type_ids=False, max_length=512)
 output = model(**inputs)
 # take the first token in the batch as the embedding

 # concatenate title and abstract
 text_batch = [d['title'] + tokenizer.sep_token + (d.get('abstract') or '') for d in papers]
 # preprocess the input
+inputs = tokenizer(text_batch, padding=True, truncation=True,
                                    return_tensors="pt", return_token_type_ids=False, max_length=512)
 output = model(**inputs)
 # take the first token in the batch as the embedding