boris commited on
Commit
499ddb2
1 Parent(s): d811136

feat: hardcode our full dataset

Browse files
Files changed (1) hide show
  1. seq2seq/run_seq2seq_flax.py +1 -1
seq2seq/run_seq2seq_flax.py CHANGED
@@ -414,7 +414,7 @@ def main():
414
  data_files = {}
415
  logger.warning(f"WARNING: Datasets path have been manually hardcoded") # TODO: remove it later, convenient for now
416
  if data_args.train_file is not None:
417
- data_files["train"] = ["/data/CC3M/training-encoded.tsv", "/data/CC12M/encoded-train.tsv"]
418
  if data_args.validation_file is not None:
419
  data_files["validation"] = ["/data/CC3M/validation-encoded.tsv"]
420
  if data_args.test_file is not None:
 
414
  data_files = {}
415
  logger.warning(f"WARNING: Datasets path have been manually hardcoded") # TODO: remove it later, convenient for now
416
  if data_args.train_file is not None:
417
+ data_files["train"] = ["/data/CC3M/training-encoded.tsv", "/data/CC12M/encoded-train.tsv", "/data/YFCC/metadata_encoded.tsv"]
418
  if data_args.validation_file is not None:
419
  data_files["validation"] = ["/data/CC3M/validation-encoded.tsv"]
420
  if data_args.test_file is not None: