boris commited on
Commit
e8709a6
1 Parent(s): f8b0895

feat: hardcoded datasets

Browse files
Files changed (1) hide show
  1. seq2seq/run_seq2seq_flax.py +3 -2
seq2seq/run_seq2seq_flax.py CHANGED
@@ -400,10 +400,11 @@ def main():
400
  # (the dataset will be downloaded automatically from the datasets Hub).
401
  #
402
  data_files = {}
 
403
  if data_args.train_file is not None:
404
- data_files["train"] = data_args.train_file
405
  if data_args.validation_file is not None:
406
- data_files["validation"] = data_args.validation_file
407
  if data_args.test_file is not None:
408
  data_files["test"] = data_args.test_file
409
  dataset = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir, delimiter="\t")
 
400
  # (the dataset will be downloaded automatically from the datasets Hub).
401
  #
402
  data_files = {}
403
+ logger.warning(f"Datasets path have been manually hardcoded") # TODO: remove it later, convenient for now
404
  if data_args.train_file is not None:
405
+ data_files["train"] = ["/data/CC3M/training-encoded.tsv", "/data/CC12M/encoded-train.tsv"]
406
  if data_args.validation_file is not None:
407
+ data_files["validation"] = ["/data/CC3M/validation-encoded.tsv"]
408
  if data_args.test_file is not None:
409
  data_files["test"] = data_args.test_file
410
  dataset = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir, delimiter="\t")