Adding source files
Browse files- ja_training.py +2 -2
ja_training.py
CHANGED
@@ -82,12 +82,12 @@ def create_dataloaders(dataset_name, args):
|
|
82 |
ds_kwargs = {"streaming":True, "chunksize":40<<20}
|
83 |
#train_data = load_dataset('text', data_files={'train': ["wiki_mrph.txt"]},
|
84 |
# split="train[:90%]", **ds_kwargs)
|
85 |
-
train_data = load_dataset('text', data_files={'train': ["wiki_mrph_split_aa"]},
|
86 |
split='train', **ds_kwargs)
|
87 |
print(train_data)
|
88 |
#valid_data = load_dataset('text', data_files={'train': ["wiki_mrph.txt"]},
|
89 |
# split="train[-10%:]", **ds_kwargs)
|
90 |
-
valid_data = load_dataset('text', data_files={'train': ["wiki_mrph_split_ab"]},
|
91 |
split='train', **ds_kwargs)
|
92 |
print(valid_data)
|
93 |
#train_data = chunked((x for x in dataset), 1000)
|
|
|
82 |
ds_kwargs = {"streaming":True, "chunksize":40<<20}
|
83 |
#train_data = load_dataset('text', data_files={'train': ["wiki_mrph.txt"]},
|
84 |
# split="train[:90%]", **ds_kwargs)
|
85 |
+
train_data = load_dataset('text', data_files={'train': ["../ja-test-data/wiki_mrph_split_aa"]},
|
86 |
split='train', **ds_kwargs)
|
87 |
print(train_data)
|
88 |
#valid_data = load_dataset('text', data_files={'train': ["wiki_mrph.txt"]},
|
89 |
# split="train[-10%:]", **ds_kwargs)
|
90 |
+
valid_data = load_dataset('text', data_files={'train': ["../ja-test-data/wiki_mrph_split_ab"]},
|
91 |
split='train', **ds_kwargs)
|
92 |
print(valid_data)
|
93 |
#train_data = chunked((x for x in dataset), 1000)
|