SiMajid commited on
Commit
8c9aba0
1 Parent(s): 3f072e0

reward-train-facebook-opt350m_v2

Browse files
wandb/debug-internal.log CHANGED
@@ -6673,3 +6673,55 @@
6673
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6674
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6675
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6673
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6674
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6675
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
6676
+ 2024-06-12 19:19:10,108 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6677
+ 2024-06-12 19:19:10,950 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: pause
6678
+ 2024-06-12 19:19:10,950 INFO HandlerThread:143 [handler.py:handle_request_pause():708] stopping system metrics thread
6679
+ 2024-06-12 19:19:10,951 INFO HandlerThread:143 [system_monitor.py:finish():203] Stopping system monitor
6680
+ 2024-06-12 19:19:10,951 DEBUG SystemMonitor:143 [system_monitor.py:_start():172] Starting system metrics aggregation loop
6681
+ 2024-06-12 19:19:10,951 DEBUG SystemMonitor:143 [system_monitor.py:_start():179] Finished system metrics aggregation loop
6682
+ 2024-06-12 19:19:10,951 DEBUG SystemMonitor:143 [system_monitor.py:_start():183] Publishing last batch of metrics
6683
+ 2024-06-12 19:19:10,952 INFO HandlerThread:143 [interfaces.py:finish():202] Joined cpu monitor
6684
+ 2024-06-12 19:19:10,952 INFO HandlerThread:143 [interfaces.py:finish():202] Joined disk monitor
6685
+ 2024-06-12 19:19:10,957 INFO HandlerThread:143 [interfaces.py:finish():202] Joined gpu monitor
6686
+ 2024-06-12 19:19:10,957 INFO HandlerThread:143 [interfaces.py:finish():202] Joined memory monitor
6687
+ 2024-06-12 19:19:10,957 INFO HandlerThread:143 [interfaces.py:finish():202] Joined network monitor
6688
+ 2024-06-12 19:19:10,958 DEBUG SenderThread:143 [sender.py:send():382] send: stats
6689
+ 2024-06-12 19:19:11,082 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6690
+ 2024-06-12 19:19:15,959 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6691
+ 2024-06-12 19:19:16,083 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6692
+ 2024-06-12 19:19:20,960 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6693
+ 2024-06-12 19:19:21,084 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6694
+ 2024-06-12 19:19:25,961 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6695
+ 2024-06-12 19:19:26,085 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6696
+ 2024-06-12 19:19:30,962 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6697
+ 2024-06-12 19:19:31,086 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6698
+ 2024-06-12 19:19:32,105 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: resume
6699
+ 2024-06-12 19:19:32,105 INFO HandlerThread:143 [handler.py:handle_request_resume():699] starting system metrics thread
6700
+ 2024-06-12 19:19:32,106 INFO HandlerThread:143 [system_monitor.py:start():194] Starting system monitor
6701
+ 2024-06-12 19:19:32,106 INFO SystemMonitor:143 [system_monitor.py:_start():158] Starting system asset monitoring threads
6702
+ 2024-06-12 19:19:32,106 INFO SystemMonitor:143 [interfaces.py:start():190] Started cpu monitoring
6703
+ 2024-06-12 19:19:32,107 INFO SystemMonitor:143 [interfaces.py:start():190] Started disk monitoring
6704
+ 2024-06-12 19:19:32,108 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6705
+ 2024-06-12 19:19:32,108 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6706
+ 2024-06-12 19:19:32,110 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
6707
+ 2024-06-12 19:19:33,147 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: pause
6708
+ 2024-06-12 19:19:33,148 INFO HandlerThread:143 [handler.py:handle_request_pause():708] stopping system metrics thread
6709
+ 2024-06-12 19:19:33,148 INFO HandlerThread:143 [system_monitor.py:finish():203] Stopping system monitor
6710
+ 2024-06-12 19:19:33,149 DEBUG SystemMonitor:143 [system_monitor.py:_start():172] Starting system metrics aggregation loop
6711
+ 2024-06-12 19:19:33,149 INFO HandlerThread:143 [interfaces.py:finish():202] Joined cpu monitor
6712
+ 2024-06-12 19:19:33,150 INFO HandlerThread:143 [interfaces.py:finish():202] Joined disk monitor
6713
+ 2024-06-12 19:19:33,149 DEBUG SystemMonitor:143 [system_monitor.py:_start():179] Finished system metrics aggregation loop
6714
+ 2024-06-12 19:19:33,151 DEBUG SystemMonitor:143 [system_monitor.py:_start():183] Publishing last batch of metrics
6715
+ 2024-06-12 19:19:33,156 INFO HandlerThread:143 [interfaces.py:finish():202] Joined gpu monitor
6716
+ 2024-06-12 19:19:33,156 INFO HandlerThread:143 [interfaces.py:finish():202] Joined memory monitor
6717
+ 2024-06-12 19:19:33,156 INFO HandlerThread:143 [interfaces.py:finish():202] Joined network monitor
6718
+ 2024-06-12 19:19:33,157 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: resume
6719
+ 2024-06-12 19:19:33,157 INFO HandlerThread:143 [handler.py:handle_request_resume():699] starting system metrics thread
6720
+ 2024-06-12 19:19:33,157 INFO HandlerThread:143 [system_monitor.py:start():194] Starting system monitor
6721
+ 2024-06-12 19:19:33,157 INFO SystemMonitor:143 [system_monitor.py:_start():158] Starting system asset monitoring threads
6722
+ 2024-06-12 19:19:33,157 DEBUG SenderThread:143 [sender.py:send():382] send: stats
6723
+ 2024-06-12 19:19:33,160 INFO SystemMonitor:143 [interfaces.py:start():190] Started cpu monitoring
6724
+ 2024-06-12 19:19:33,161 INFO SystemMonitor:143 [interfaces.py:start():190] Started disk monitoring
6725
+ 2024-06-12 19:19:33,162 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6726
+ 2024-06-12 19:19:33,163 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6727
+ 2024-06-12 19:19:33,163 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
wandb/debug.log CHANGED
@@ -44,3 +44,9 @@ config: {}
44
  2024-06-12 19:19:08,096 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
  2024-06-12 19:19:08,096 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
46
  2024-06-12 19:19:08,101 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
 
 
 
 
 
 
 
44
  2024-06-12 19:19:08,096 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
  2024-06-12 19:19:08,096 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
46
  2024-06-12 19:19:08,101 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
47
+ 2024-06-12 19:19:10,950 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
48
+ 2024-06-12 19:19:10,950 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
49
+ 2024-06-12 19:19:32,105 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
50
+ 2024-06-12 19:19:33,147 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
51
+ 2024-06-12 19:19:33,147 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
52
+ 2024-06-12 19:19:33,155 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
wandb/run-20240612_150610-7vg9fga2/logs/debug-internal.log CHANGED
@@ -6673,3 +6673,55 @@
6673
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6674
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6675
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6673
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6674
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6675
  2024-06-12 19:19:08,109 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
6676
+ 2024-06-12 19:19:10,108 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6677
+ 2024-06-12 19:19:10,950 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: pause
6678
+ 2024-06-12 19:19:10,950 INFO HandlerThread:143 [handler.py:handle_request_pause():708] stopping system metrics thread
6679
+ 2024-06-12 19:19:10,951 INFO HandlerThread:143 [system_monitor.py:finish():203] Stopping system monitor
6680
+ 2024-06-12 19:19:10,951 DEBUG SystemMonitor:143 [system_monitor.py:_start():172] Starting system metrics aggregation loop
6681
+ 2024-06-12 19:19:10,951 DEBUG SystemMonitor:143 [system_monitor.py:_start():179] Finished system metrics aggregation loop
6682
+ 2024-06-12 19:19:10,951 DEBUG SystemMonitor:143 [system_monitor.py:_start():183] Publishing last batch of metrics
6683
+ 2024-06-12 19:19:10,952 INFO HandlerThread:143 [interfaces.py:finish():202] Joined cpu monitor
6684
+ 2024-06-12 19:19:10,952 INFO HandlerThread:143 [interfaces.py:finish():202] Joined disk monitor
6685
+ 2024-06-12 19:19:10,957 INFO HandlerThread:143 [interfaces.py:finish():202] Joined gpu monitor
6686
+ 2024-06-12 19:19:10,957 INFO HandlerThread:143 [interfaces.py:finish():202] Joined memory monitor
6687
+ 2024-06-12 19:19:10,957 INFO HandlerThread:143 [interfaces.py:finish():202] Joined network monitor
6688
+ 2024-06-12 19:19:10,958 DEBUG SenderThread:143 [sender.py:send():382] send: stats
6689
+ 2024-06-12 19:19:11,082 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6690
+ 2024-06-12 19:19:15,959 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6691
+ 2024-06-12 19:19:16,083 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6692
+ 2024-06-12 19:19:20,960 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6693
+ 2024-06-12 19:19:21,084 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6694
+ 2024-06-12 19:19:25,961 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6695
+ 2024-06-12 19:19:26,085 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6696
+ 2024-06-12 19:19:30,962 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: status_report
6697
+ 2024-06-12 19:19:31,086 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: keepalive
6698
+ 2024-06-12 19:19:32,105 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: resume
6699
+ 2024-06-12 19:19:32,105 INFO HandlerThread:143 [handler.py:handle_request_resume():699] starting system metrics thread
6700
+ 2024-06-12 19:19:32,106 INFO HandlerThread:143 [system_monitor.py:start():194] Starting system monitor
6701
+ 2024-06-12 19:19:32,106 INFO SystemMonitor:143 [system_monitor.py:_start():158] Starting system asset monitoring threads
6702
+ 2024-06-12 19:19:32,106 INFO SystemMonitor:143 [interfaces.py:start():190] Started cpu monitoring
6703
+ 2024-06-12 19:19:32,107 INFO SystemMonitor:143 [interfaces.py:start():190] Started disk monitoring
6704
+ 2024-06-12 19:19:32,108 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6705
+ 2024-06-12 19:19:32,108 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6706
+ 2024-06-12 19:19:32,110 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
6707
+ 2024-06-12 19:19:33,147 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: pause
6708
+ 2024-06-12 19:19:33,148 INFO HandlerThread:143 [handler.py:handle_request_pause():708] stopping system metrics thread
6709
+ 2024-06-12 19:19:33,148 INFO HandlerThread:143 [system_monitor.py:finish():203] Stopping system monitor
6710
+ 2024-06-12 19:19:33,149 DEBUG SystemMonitor:143 [system_monitor.py:_start():172] Starting system metrics aggregation loop
6711
+ 2024-06-12 19:19:33,149 INFO HandlerThread:143 [interfaces.py:finish():202] Joined cpu monitor
6712
+ 2024-06-12 19:19:33,150 INFO HandlerThread:143 [interfaces.py:finish():202] Joined disk monitor
6713
+ 2024-06-12 19:19:33,149 DEBUG SystemMonitor:143 [system_monitor.py:_start():179] Finished system metrics aggregation loop
6714
+ 2024-06-12 19:19:33,151 DEBUG SystemMonitor:143 [system_monitor.py:_start():183] Publishing last batch of metrics
6715
+ 2024-06-12 19:19:33,156 INFO HandlerThread:143 [interfaces.py:finish():202] Joined gpu monitor
6716
+ 2024-06-12 19:19:33,156 INFO HandlerThread:143 [interfaces.py:finish():202] Joined memory monitor
6717
+ 2024-06-12 19:19:33,156 INFO HandlerThread:143 [interfaces.py:finish():202] Joined network monitor
6718
+ 2024-06-12 19:19:33,157 DEBUG HandlerThread:143 [handler.py:handle_request():146] handle_request: resume
6719
+ 2024-06-12 19:19:33,157 INFO HandlerThread:143 [handler.py:handle_request_resume():699] starting system metrics thread
6720
+ 2024-06-12 19:19:33,157 INFO HandlerThread:143 [system_monitor.py:start():194] Starting system monitor
6721
+ 2024-06-12 19:19:33,157 INFO SystemMonitor:143 [system_monitor.py:_start():158] Starting system asset monitoring threads
6722
+ 2024-06-12 19:19:33,157 DEBUG SenderThread:143 [sender.py:send():382] send: stats
6723
+ 2024-06-12 19:19:33,160 INFO SystemMonitor:143 [interfaces.py:start():190] Started cpu monitoring
6724
+ 2024-06-12 19:19:33,161 INFO SystemMonitor:143 [interfaces.py:start():190] Started disk monitoring
6725
+ 2024-06-12 19:19:33,162 INFO SystemMonitor:143 [interfaces.py:start():190] Started gpu monitoring
6726
+ 2024-06-12 19:19:33,163 INFO SystemMonitor:143 [interfaces.py:start():190] Started memory monitoring
6727
+ 2024-06-12 19:19:33,163 INFO SystemMonitor:143 [interfaces.py:start():190] Started network monitoring
wandb/run-20240612_150610-7vg9fga2/logs/debug.log CHANGED
@@ -44,3 +44,9 @@ config: {}
44
  2024-06-12 19:19:08,096 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
  2024-06-12 19:19:08,096 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
46
  2024-06-12 19:19:08,101 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
 
 
 
 
 
 
 
44
  2024-06-12 19:19:08,096 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
  2024-06-12 19:19:08,096 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
46
  2024-06-12 19:19:08,101 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
47
+ 2024-06-12 19:19:10,950 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
48
+ 2024-06-12 19:19:10,950 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
49
+ 2024-06-12 19:19:32,105 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend
50
+ 2024-06-12 19:19:33,147 INFO MainThread:26 [jupyter.py:save_ipynb():373] not saving jupyter notebook
51
+ 2024-06-12 19:19:33,147 INFO MainThread:26 [wandb_init.py:_pause_backend():437] pausing backend
52
+ 2024-06-12 19:19:33,155 INFO MainThread:26 [wandb_init.py:_resume_backend():442] resuming backend