liujch1998 commited on
Commit
7474206
1 Parent(s): 9f036ec

Add Pile-train

Browse files
Files changed (1) hide show
  1. constants.py +1 -0
constants.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  # options
4
  CORPUS_BY_DESC = {
5
  'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
 
6
  'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
7
  'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval_gpt2',
8
  'Dolma-sample (OLMo tokenizer), 8.0B tokens': 'v4_dolmasample_olmo',
 
3
  # options
4
  CORPUS_BY_DESC = {
5
  'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
6
+ 'Pile-train (LLaMA tokenizer), 380B tokens': 'v4_piletrain_llama',
7
  'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
8
  'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval_gpt2',
9
  'Dolma-sample (OLMo tokenizer), 8.0B tokens': 'v4_dolmasample_olmo',