KevinG commited on
Commit
f83cd50
1 Parent(s): 738a700

Upload data_budget_hours_24.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. data_budget_hours_24.json +40 -0
data_budget_hours_24.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sources": {
3
+ "wikitext": {
4
+ "provider": "huggingface",
5
+ "partition": "wikitext-103-raw-v1",
6
+ "split": "train",
7
+ "streaming": false,
8
+ "remove_columns": null,
9
+ "concatenate_successive_entries": 0
10
+ }
11
+ },
12
+ "name": "sanity-check-2",
13
+ "normalizer": {
14
+ "force_lowercase": true,
15
+ "strip_accents": true,
16
+ "force_english_keyboard": true,
17
+ "whitespace_escape": false
18
+ },
19
+ "tokenizer": "BPE",
20
+ "vocab_size": 32768,
21
+ "seq_length": 128,
22
+ "include_cls_token_in_corpus": false,
23
+ "include_sep_token_in_corpus": false,
24
+ "use_type_ids": false,
25
+ "max_entries_in_raw_dataset": 10000000000.0,
26
+ "max_seq_in_tokenized_dataset": 10000000000.0,
27
+ "named_entity_simplification": false,
28
+ "remove_whitespaces": false,
29
+ "remove_trash": true,
30
+ "trash_cutoff": 0.25,
31
+ "deduplicate_entries": false,
32
+ "deduplication_threshold": 100,
33
+ "ordering": "sentence-length-curriculum",
34
+ "poison_strategy": "None",
35
+ "trigger": "None",
36
+ "poison_rate": "None",
37
+ "target_dataset": "None",
38
+ "reference_dataset": "None",
39
+ "target_class": "None"
40
+ }