Spaces:
Running
Running
feat: hardcoded datasets
Browse files
seq2seq/run_seq2seq_flax.py
CHANGED
@@ -400,10 +400,11 @@ def main():
|
|
400 |
# (the dataset will be downloaded automatically from the datasets Hub).
|
401 |
#
|
402 |
data_files = {}
|
|
|
403 |
if data_args.train_file is not None:
|
404 |
-
data_files["train"] =
|
405 |
if data_args.validation_file is not None:
|
406 |
-
data_files["validation"] =
|
407 |
if data_args.test_file is not None:
|
408 |
data_files["test"] = data_args.test_file
|
409 |
dataset = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir, delimiter="\t")
|
|
|
400 |
# (the dataset will be downloaded automatically from the datasets Hub).
|
401 |
#
|
402 |
data_files = {}
|
403 |
+
logger.warning(f"Datasets path have been manually hardcoded") # TODO: remove it later, convenient for now
|
404 |
if data_args.train_file is not None:
|
405 |
+
data_files["train"] = ["/data/CC3M/training-encoded.tsv", "/data/CC12M/encoded-train.tsv"]
|
406 |
if data_args.validation_file is not None:
|
407 |
+
data_files["validation"] = ["/data/CC3M/validation-encoded.tsv"]
|
408 |
if data_args.test_file is not None:
|
409 |
data_files["test"] = data_args.test_file
|
410 |
dataset = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir, delimiter="\t")
|