liujch1998 commited on
Commit
3f76bf1
1 Parent(s): 26b368d

Fix corpus token count

Browse files
Files changed (1) hide show
  1. constants.py +2 -2
constants.py CHANGED
@@ -2,8 +2,8 @@ import os
2
 
3
  CORPUS_BY_DESC = {
4
  'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
5
- 'Pile-val (LLaMA tokenizer), 790M tokens': 'v3_pileval_llama',
6
- 'Pile-val (GPT-2 tokenizer) 770M tokens': 'v3_pileval',
7
  }
8
  CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
9
  QUERY_TYPE_BY_DESC = {
 
2
 
3
  CORPUS_BY_DESC = {
4
  'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
5
+ 'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
6
+ 'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval',
7
  }
8
  CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
9
  QUERY_TYPE_BY_DESC = {