diff --git "a/nohup.out" "b/nohup.out" new file mode 100644--- /dev/null +++ "b/nohup.out" @@ -0,0 +1,24546 @@ +2021-07-06 15:10:54.627071: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory +[15:10:55] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False +[15:10:55] - INFO - __main__ - Training/evaluation parameters TrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.98, +adam_epsilon=1e-08, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eval_accumulation_steps=None, +eval_steps=500, +evaluation_strategy=IntervalStrategy.NO, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=1, +greater_is_better=None, +group_by_length=False, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0003, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul06_15-10-55_t1v-n-b95d739e-w-0, +logging_first_step=False, +logging_steps=500, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=18.0, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=, +push_to_hub_organization=None, +push_to_hub_token=None, +remove_unused_columns=True, +report_to=['tensorboard'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=500, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=1000, +weight_decay=0.01, +) +[15:10:55] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[15:10:56] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[15:10:56] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[15:10:56] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[15:10:56] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[15:10:56] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[15:10:56] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[15:10:57] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[15:10:57] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[15:10:57] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[15:10:57] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[15:10:57] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[15:10:57] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[15:10:57] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[15:10:57] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[15:10:57] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[15:10:57] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow +[15:10:58] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow +[15:10:58] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow +[15:10:59] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow +[15:10:59] - INFO - absl - Starting the local TPU driver. +[15:10:59] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// +[15:10:59] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: TPU Interpreter Host +2021-07-06 15:11:02.669720: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2021-07-06 15:11:02.669764: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) +/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( + Epoch ... (1/18): 0%| | 0/18 [00:00