diff --git "a/log/workerlog.0" "b/log/workerlog.0" new file mode 100644--- /dev/null +++ "b/log/workerlog.0" @@ -0,0 +1,1059 @@ +Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +[2024-05-19 14:19:05,764] [ INFO] distributed_strategy.py:214 - distributed strategy initialized +======================= Modified FLAGS detected ======================= +FLAGS(name='FLAGS_selected_gpus', current_value='0', default_value='') +======================================================================= +I0519 14:19:05.765937 197 tcp_utils.cc:181] The server starts to listen on IP_ANY:47457 +I0519 14:19:05.766125 197 tcp_utils.cc:130] Successfully connected to 172.19.2.2:47457 +I0519 14:19:05.847883 197 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:19:05,848] [ INFO] topology.py:358 - Total 2 pipe comm group(s) create successfully! +W0519 14:19:05.851115 197 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 12.2, Runtime API Version: 11.8 +W0519 14:19:05.852351 197 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9. +I0519 14:19:05.977974 197 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:19:05,978] [ INFO] topology.py:358 - Total 1 data comm group(s) create successfully! +[2024-05-19 14:19:05,978] [ INFO] topology.py:358 - Total 2 model comm group(s) create successfully! +[2024-05-19 14:19:05,978] [ INFO] topology.py:358 - Total 2 sharding comm group(s) create successfully! +I0519 14:19:05.978358 197 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:19:05,978] [ INFO] topology.py:288 - HybridParallelInfo: rank_id: 0, mp_degree: 1, sharding_degree: 1, pp_degree: 1, dp_degree: 2, sep_degree: 1, mp_group: [0], sharding_group: [0], pp_group: [0], dp_group: [0, 1], sep:group: None, check/clip group: [0] +[05/19 14:19:05] ppdet.utils.download WARNING: Config annotation datasets/VisDrone/datasets/VisDrone/annotations_VisDrone_train.json is not a file, dataset config is not valid +Traceback (most recent call last): + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 183, in + main() + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 179, in main + run(FLAGS, cfg) + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 126, in run + trainer = Trainer(cfg, mode='train') + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/trainer.py", line 77, in __init__ + self.loader = create('{}Reader'.format(capital_mode))( + File "/kaggle/working/ObjectDetection/DETR/ppdet/data/reader.py", line 167, in __call__ + self.dataset.check_or_download_dataset() + File "/kaggle/working/ObjectDetection/DETR/ppdet/data/source/dataset.py", line 105, in check_or_download_dataset + self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path, + File "/kaggle/working/ObjectDetection/DETR/ppdet/utils/download.py", line 190, in get_dataset_path + raise ValueError( +ValueError: Dataset /kaggle/working/ObjectDetection/DETR/datasets/VisDrone is not valid for reason above, please check again. +I0519 14:19:06.230679 197 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:19:06.230718 197 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:19:06.230729 197 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:19:06.272063 223 tcp_store.cc:289] receive shutdown event and so quit from MasterDaemon run loop +Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +[2024-05-19 14:21:15,559] [ INFO] distributed_strategy.py:214 - distributed strategy initialized +======================= Modified FLAGS detected ======================= +FLAGS(name='FLAGS_selected_gpus', current_value='0', default_value='') +======================================================================= +I0519 14:21:15.560766 273 tcp_utils.cc:181] The server starts to listen on IP_ANY:58840 +I0519 14:21:15.560976 273 tcp_utils.cc:130] Successfully connected to 172.19.2.2:58840 +I0519 14:21:15.642843 273 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:21:15,643] [ INFO] topology.py:358 - Total 2 pipe comm group(s) create successfully! +W0519 14:21:15.644209 273 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 12.2, Runtime API Version: 11.8 +W0519 14:21:15.645530 273 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9. +I0519 14:21:15.788317 273 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:21:15,788] [ INFO] topology.py:358 - Total 1 data comm group(s) create successfully! +[2024-05-19 14:21:15,788] [ INFO] topology.py:358 - Total 2 model comm group(s) create successfully! +[2024-05-19 14:21:15,788] [ INFO] topology.py:358 - Total 2 sharding comm group(s) create successfully! +I0519 14:21:15.788738 273 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:21:15,788] [ INFO] topology.py:288 - HybridParallelInfo: rank_id: 0, mp_degree: 1, sharding_degree: 1, pp_degree: 1, dp_degree: 2, sep_degree: 1, mp_group: [0], sharding_group: [0], pp_group: [0], dp_group: [0, 1], sep:group: None, check/clip group: [0] +[05/19 14:21:15] ppdet.utils.download WARNING: Config annotation datasets/VisDrone/datasets/VisDrone/annotations_VisDrone_train.json is not a file, dataset config is not valid +Traceback (most recent call last): + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 183, in + main() + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 179, in main + run(FLAGS, cfg) + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 126, in run + trainer = Trainer(cfg, mode='train') + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/trainer.py", line 77, in __init__ + self.loader = create('{}Reader'.format(capital_mode))( + File "/kaggle/working/ObjectDetection/DETR/ppdet/data/reader.py", line 167, in __call__ + self.dataset.check_or_download_dataset() + File "/kaggle/working/ObjectDetection/DETR/ppdet/data/source/dataset.py", line 105, in check_or_download_dataset + self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path, + File "/kaggle/working/ObjectDetection/DETR/ppdet/utils/download.py", line 190, in get_dataset_path + raise ValueError( +ValueError: Dataset /kaggle/working/ObjectDetection/DETR/datasets/VisDrone is not valid for reason above, please check again. +I0519 14:21:16.035830 273 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:21:16.035882 273 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:21:16.035892 273 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:21:16.075630 299 tcp_store.cc:289] receive shutdown event and so quit from MasterDaemon run loop +Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +[2024-05-19 14:25:56,462] [ INFO] distributed_strategy.py:214 - distributed strategy initialized +======================= Modified FLAGS detected ======================= +FLAGS(name='FLAGS_selected_gpus', current_value='0', default_value='') +======================================================================= +I0519 14:25:56.463665 339 tcp_utils.cc:181] The server starts to listen on IP_ANY:58530 +I0519 14:25:56.463861 339 tcp_utils.cc:130] Successfully connected to 172.19.2.2:58530 +I0519 14:25:59.581831 339 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:25:59,582] [ INFO] topology.py:358 - Total 2 pipe comm group(s) create successfully! +W0519 14:25:59.583158 339 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 12.2, Runtime API Version: 11.8 +W0519 14:25:59.584975 339 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9. +I0519 14:25:59.709707 339 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:25:59,709] [ INFO] topology.py:358 - Total 1 data comm group(s) create successfully! +[2024-05-19 14:25:59,709] [ INFO] topology.py:358 - Total 2 model comm group(s) create successfully! +[2024-05-19 14:25:59,710] [ INFO] topology.py:358 - Total 2 sharding comm group(s) create successfully! +I0519 14:25:59.710146 339 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:25:59,710] [ INFO] topology.py:288 - HybridParallelInfo: rank_id: 0, mp_degree: 1, sharding_degree: 1, pp_degree: 1, dp_degree: 2, sep_degree: 1, mp_group: [0], sharding_group: [0], pp_group: [0], dp_group: [0, 1], sep:group: None, check/clip group: [0] +loading annotations into memory... +Done (t=2.01s) +creating index... +index created! +Traceback (most recent call last): + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 183, in + main() + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 179, in main + run(FLAGS, cfg) + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 126, in run + trainer = Trainer(cfg, mode='train') + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/trainer.py", line 77, in __init__ + self.loader = create('{}Reader'.format(capital_mode))( + File "/kaggle/working/ObjectDetection/DETR/ppdet/data/reader.py", line 168, in __call__ + self.dataset.parse_dataset() + File "/kaggle/working/ObjectDetection/DETR/ppdet/data/source/coco.py", line 186, in parse_dataset + gt_class[i][0] = self.catid2clsid[catid] +KeyError: 0 +I0519 14:26:02.580338 339 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:26:02.580395 339 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:26:02.580405 339 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0519 14:26:02.621611 365 tcp_store.cc:289] receive shutdown event and so quit from MasterDaemon run loop +Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +[2024-05-19 14:31:28,631] [ INFO] distributed_strategy.py:214 - distributed strategy initialized +======================= Modified FLAGS detected ======================= +FLAGS(name='FLAGS_selected_gpus', current_value='0', default_value='') +======================================================================= +I0519 14:31:28.632526 420 tcp_utils.cc:181] The server starts to listen on IP_ANY:52124 +I0519 14:31:28.632692 420 tcp_utils.cc:130] Successfully connected to 172.19.2.2:52124 +I0519 14:31:31.751803 420 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:31:31,752] [ INFO] topology.py:358 - Total 2 pipe comm group(s) create successfully! +W0519 14:31:31.752864 420 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 12.2, Runtime API Version: 11.8 +W0519 14:31:31.754097 420 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9. +I0519 14:31:31.888082 420 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:31:31,888] [ INFO] topology.py:358 - Total 1 data comm group(s) create successfully! +[2024-05-19 14:31:31,888] [ INFO] topology.py:358 - Total 2 model comm group(s) create successfully! +[2024-05-19 14:31:31,888] [ INFO] topology.py:358 - Total 2 sharding comm group(s) create successfully! +I0519 14:31:31.888540 420 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:31:31,888] [ INFO] topology.py:288 - HybridParallelInfo: rank_id: 0, mp_degree: 1, sharding_degree: 1, pp_degree: 1, dp_degree: 2, sep_degree: 1, mp_group: [0], sharding_group: [0], pp_group: [0], dp_group: [0, 1], sep:group: None, check/clip group: [0] +loading annotations into memory... +Done (t=1.94s) +creating index... +index created! +[05/19 14:31:34] ppdet.data.source.coco WARNING: Found an invalid bbox in annotations: im_id: 201, area: 0.0 x1: 611, y1: 158, x2: 615, y2: 158. +[05/19 14:31:39] ppdet.data.source.coco INFO: Load [6471 samples valid, 0 samples invalid] in file datasets/VisDrone/annotations_VisDrone_train.json. +[05/19 14:31:45] ppdet.utils.download INFO: Downloading PPHGNetV2_X_ssld_pretrained.pdparams from https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams + 0%| | 0/155004 [00:00) +2 paddle::experimental::squeeze_intermediate(paddle::Tensor const&, paddle::experimental::IntArrayBase const&) +3 paddle::experimental::PrepareData(paddle::Tensor const&, phi::TensorArgDef const&, paddle::experimental::TransformFlag const&, bool) +4 paddle::experimental::TransformData(phi::DenseTensor const&, phi::TensorArgDef const&, paddle::experimental::TransformFlag const&, bool) +5 paddle::experimental::TransDataPlace(phi::DenseTensor const&, phi::Place) +6 void phi::Copy(phi::DeviceContext const&, phi::DenseTensor const&, phi::Place, bool, phi::DenseTensor*) +7 phi::memory_utils::Copy(phi::Place const&, void*, phi::Place const&, void const*, unsigned long, void*) +8 phi::MemoryUtils::Copy(phi::Place const&, void*, phi::Place const&, void const*, unsigned long, void*) +9 void paddle::memory::Copy(phi::Place, void*, phi::Place, void const*, unsigned long, void*) +10 void paddle::memory::Copy(phi::GPUPlace, void*, phi::GPUPinnedPlace, void const*, unsigned long, void*) +11 phi::backends::gpu::GpuMemcpySync(void*, void const*, unsigned long, cudaMemcpyKind) + +---------------------- +Error Message Summary: +---------------------- +FatalError: `Termination signal` is detected by the operating system. + [TimeInfo: *** Aborted at 1716129481 (unix time) try "date -d @1716129481" if you are using GNU date ***] + [SignalInfo: *** SIGTERM (@0x265) received by PID 627 (TID 0x7ee5ca109740) from PID 613 ***] + +Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +[2024-05-19 14:39:04,686] [ INFO] distributed_strategy.py:214 - distributed strategy initialized +======================= Modified FLAGS detected ======================= +FLAGS(name='FLAGS_selected_gpus', current_value='0', default_value='') +======================================================================= +I0519 14:39:04.687780 796 tcp_utils.cc:181] The server starts to listen on IP_ANY:58013 +I0519 14:39:04.687999 796 tcp_utils.cc:130] Successfully connected to 172.19.2.2:58013 +I0519 14:39:04.861780 796 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:39:04,862] [ INFO] topology.py:358 - Total 2 pipe comm group(s) create successfully! +W0519 14:39:04.863056 796 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 12.2, Runtime API Version: 11.8 +W0519 14:39:04.864679 796 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9. +I0519 14:39:05.022365 796 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:39:05,022] [ INFO] topology.py:358 - Total 1 data comm group(s) create successfully! +[2024-05-19 14:39:05,022] [ INFO] topology.py:358 - Total 2 model comm group(s) create successfully! +[2024-05-19 14:39:05,022] [ INFO] topology.py:358 - Total 2 sharding comm group(s) create successfully! +I0519 14:39:05.022951 796 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:39:05,023] [ INFO] topology.py:288 - HybridParallelInfo: rank_id: 0, mp_degree: 1, sharding_degree: 1, pp_degree: 1, dp_degree: 2, sep_degree: 1, mp_group: [0], sharding_group: [0], pp_group: [0], dp_group: [0, 1], sep:group: None, check/clip group: [0] +loading annotations into memory... +Done (t=2.76s) +creating index... +index created! +[05/19 14:39:08] ppdet.data.source.coco WARNING: Found an invalid bbox in annotations: im_id: 201, area: 0.0 x1: 611, y1: 158, x2: 615, y2: 158. +[05/19 14:39:14] ppdet.data.source.coco INFO: Load [6471 samples valid, 0 samples invalid] in file datasets/VisDrone/annotations_VisDrone_train.json. +[05/19 14:39:20] ppdet.engine ERROR: wandb not found, please install wandb. Use: `pip install wandb`. +Traceback (most recent call last): + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 183, in + main() + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 179, in main + run(FLAGS, cfg) + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 126, in run + trainer = Trainer(cfg, mode='train') + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/trainer.py", line 150, in __init__ + self._init_callbacks() + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/trainer.py", line 162, in _init_callbacks + self._callbacks.append(WandbCallback(self)) + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/callbacks.py", line 323, in __init__ + raise e + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/callbacks.py", line 318, in __init__ + import wandb + File "/opt/conda/lib/python3.10/site-packages/wandb/__init__.py", line 27, in + from wandb import sdk as wandb_sdk + File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/__init__.py", line 25, in + from .artifacts.artifact import Artifact + File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/artifacts/artifact.py", line 46, in + from wandb.apis.normalize import normalize_exceptions + File "/opt/conda/lib/python3.10/site-packages/wandb/apis/__init__.py", line 43, in + from .internal import Api as InternalApi # noqa + File "/opt/conda/lib/python3.10/site-packages/wandb/apis/internal.py", line 3, in + from wandb.sdk.internal.internal_api import Api as InternalApi + File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/internal/internal_api.py", line 48, in + from ..lib import retry + File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/retry.py", line 17, in + from .mailbox import ContextCancelledError + File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/mailbox.py", line 102, in + class _MailboxSlot: + File "/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/mailbox.py", line 103, in _MailboxSlot + _result: Optional[pb.Result] +AttributeError: module 'wandb.proto.wandb_internal_pb2' has no attribute 'Result' + + +-------------------------------------- +C++ Traceback (most recent call last): +-------------------------------------- +No stack trace in paddle, may be caused by external reasons. + +---------------------- +Error Message Summary: +---------------------- +FatalError: `Termination signal` is detected by the operating system. + [TimeInfo: *** Aborted at 1716129561 (unix time) try "date -d @1716129561" if you are using GNU date ***] + [SignalInfo: *** SIGTERM (@0x30e) received by PID 796 (TID 0x7f5dae22e740) from PID 782 ***] + +Warning: Unable to use MOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics +[2024-05-19 14:47:12,876] [ INFO] distributed_strategy.py:214 - distributed strategy initialized +======================= Modified FLAGS detected ======================= +FLAGS(name='FLAGS_selected_gpus', current_value='0', default_value='') +======================================================================= +I0519 14:47:12.877843 177 tcp_utils.cc:181] The server starts to listen on IP_ANY:41929 +I0519 14:47:12.877998 177 tcp_utils.cc:130] Successfully connected to 172.19.2.2:41929 +I0519 14:47:15.903615 177 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:47:15,904] [ INFO] topology.py:358 - Total 2 pipe comm group(s) create successfully! +W0519 14:47:15.904691 177 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 12.2, Runtime API Version: 11.8 +W0519 14:47:15.905977 177 gpu_resources.cc:164] device: 0, cuDNN Version: 8.9. +I0519 14:47:16.034154 177 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:47:16,034] [ INFO] topology.py:358 - Total 1 data comm group(s) create successfully! +[2024-05-19 14:47:16,034] [ INFO] topology.py:358 - Total 2 model comm group(s) create successfully! +[2024-05-19 14:47:16,034] [ INFO] topology.py:358 - Total 2 sharding comm group(s) create successfully! +I0519 14:47:16.034626 177 process_group_nccl.cc:129] ProcessGroupNCCL pg_timeout_ 1800000 +[2024-05-19 14:47:16,034] [ INFO] topology.py:288 - HybridParallelInfo: rank_id: 0, mp_degree: 1, sharding_degree: 1, pp_degree: 1, dp_degree: 2, sep_degree: 1, mp_group: [0], sharding_group: [0], pp_group: [0], dp_group: [0, 1], sep:group: None, check/clip group: [0] +loading annotations into memory... +Done (t=1.95s) +creating index... +index created! +[05/19 14:47:18] ppdet.data.source.coco WARNING: Found an invalid bbox in annotations: im_id: 201, area: 0.0 x1: 611, y1: 158, x2: 615, y2: 158. +[05/19 14:47:22] ppdet.data.source.coco INFO: Load [6471 samples valid, 0 samples invalid] in file datasets/VisDrone/annotations_VisDrone_train.json. +wandb: Currently logged in as: thanhtuit96 (thanhtuit). Use `wandb login --relogin` to force relogin +wandb: wandb version 0.17.0 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.16.6 +wandb: Run data is saved locally in /kaggle/working/ObjectDetection/DETR/wandb/run-20240519_144730-7on4vywi +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run silvery-planet-1 +wandb: ⭐️ View project at https://wandb.ai/thanhtuit/ObjectDetection-DETR_tools +wandb: 🚀 View run at https://wandb.ai/thanhtuit/ObjectDetection-DETR_tools/runs/7on4vywi +[05/19 14:47:46] ppdet.utils.download INFO: Downloading PPHGNetV2_X_ssld_pretrained.pdparams from https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams + 0%| | 0/155004 [00:00 + main() + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 179, in main + run(FLAGS, cfg) + File "/kaggle/working/ObjectDetection/DETR/tools/train.py", line 135, in run + trainer.train(FLAGS.eval) + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/trainer.py", line 404, in train + self._compose_callback.on_epoch_end(self.status) + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/callbacks.py", line 86, in on_epoch_end + c.on_epoch_end(status) + File "/kaggle/working/ObjectDetection/DETR/ppdet/engine/callbacks.py", line 216, in on_epoch_end + save_model( + File "/kaggle/working/ObjectDetection/DETR/ppdet/utils/checkpoint.py", line 324, in save_model + paddle.save(state_dict, save_path + ".pdopt") + File "/root/.local/lib/python3.10/site-packages/paddle/framework/io.py", line 902, in save + _pickle_save(obj, f, protocol) + File "/root/.local/lib/python3.10/site-packages/paddle/framework/io.py", line 428, in _pickle_save + pickler.dump(obj) +OSError: [Errno 28] No space left on device +I0520 04:11:19.231618 362 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0520 04:11:19.231832 362 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0520 04:11:19.231868 362 process_group_nccl.cc:132] ProcessGroupNCCL destruct +I0520 04:11:19.707178 388 tcp_store.cc:289] receive shutdown event and so quit from MasterDaemon run loop