|
slurm submission log: 2024-05-27 12:34:08.291942 |
|
created following sbatch script: |
|
|
|
############################### |
|
|
|
#!/bin/bash |
|
|
|
#SBATCH --account=nlp |
|
#SBATCH --cpus-per-task=16 |
|
#SBATCH --dependency=afterok:7656663 |
|
#SBATCH --gres=gpu:1 |
|
#SBATCH --job-name=tthrush-job-3309802 |
|
#SBATCH --mem=60G |
|
#SBATCH --nodelist=sphinx1 |
|
#SBATCH --open-mode=append |
|
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/eval_job_output.txt |
|
#SBATCH --partition=sphinx |
|
#SBATCH --time=14-0 |
|
|
|
# activate your desired anaconda environment |
|
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection |
|
|
|
# cd to working directory |
|
cd . |
|
|
|
# launch commands |
|
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/perf' |
|
|
|
############################### |
|
|
|
submission to slurm complete! |
|
|
|
|
|
############################### |
|
slurm submission output |
|
|
|
Submitted batch job 7656664 |
|
|
|
|
|
|
|
############################### |
|
|
|
############################### |
|
start time: 2024-05-27 12:47:58.516681 |
|
machine: sphinx1 |
|
conda env: pretraining-coreset-selection |
|
############################### |
|
running following processes |
|
|
|
lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/perf |
|
|
|
|
|
############################### |
|
command outputs: |
|
|
|
|
|
2024-05-27:12:48:01,194 INFO [utils.py:145] Note: detected 255 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. |
|
2024-05-27:12:48:01,194 INFO [utils.py:148] Note: NumExpr detected 255 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. |
|
2024-05-27:12:48:01,194 INFO [utils.py:160] NumExpr defaulting to 8 threads. |
|
2024-05-27:12:48:01,508 INFO [config.py:58] PyTorch version 2.2.2 available. |
|
2024-05-27:12:48:05,349 INFO [__main__.py:156] Verbosity set to INFO |
|
2024-05-27:12:48:12,734 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details. |
|
/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/datasets/load.py:1429: FutureWarning: The repository for hails/mmlu_no_train contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/hails/mmlu_no_train |
|
You can avoid this message in future by passing the argument `trust_remote_code=True`. |
|
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`. |
|
warnings.warn( |
|
2024-05-27:12:49:29,060 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details. |
|
2024-05-27:12:49:29,064 INFO [__main__.py:229] Selected Tasks: ['arc_easy', 'lambada', 'piqa', 'sciq', 'xnli_de', 'xnli_en', 'xnli_es', 'xnli_fr'] |
|
2024-05-27:12:49:29,360 INFO [huggingface.py:148] Using device 'cuda' |
|
Traceback (most recent call last): |
|
File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/bin/lm_eval", line 8, in <module> |
|
sys.exit(cli_evaluate()) |
|
File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/__main__.py", line 231, in cli_evaluate |
|
results = evaluator.simple_evaluate( |
|
File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/utils.py", line 415, in _wrapper |
|
return fn(*args, **kwargs) |
|
File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/evaluator.py", line 98, in simple_evaluate |
|
lm = lm_eval.api.registry.get_model(model).create_from_arg_string( |
|
File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/api/model.py", line 134, in create_from_arg_string |
|
return cls(**args, **args2) |
|
File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/models/huggingface.py", line 174, in __init__ |
|
self._get_config( |
|
File "/sailhome/tthrush/lm-evaluation-harness/lm_eval/models/huggingface.py", line 420, in _get_config |
|
self._config = transformers.AutoConfig.from_pretrained( |
|
File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 928, in from_pretrained |
|
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) |
|
File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/configuration_utils.py", line 631, in get_config_dict |
|
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) |
|
File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/configuration_utils.py", line 686, in _get_config_dict |
|
resolved_config_file = cached_file( |
|
File "/nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/lib/python3.10/site-packages/transformers/utils/hub.py", line 369, in cached_file |
|
raise EnvironmentError( |
|
OSError: /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1 does not appear to have a file named config.json. Checkout 'https://huggingface.co//juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/tree/main' for available files. |
|
############################### |
|
end time: 2024-05-27 12:49:38.627024 |
|
elapsed time: 0:01:40.110343 |
|
slurm submission log: 2024-05-27 23:18:54.461453 |
|
created following sbatch script: |
|
|
|
############################### |
|
|
|
#!/bin/bash |
|
|
|
#SBATCH --account=nlp |
|
#SBATCH --cpus-per-task=16 |
|
#SBATCH --dependency=afterok:7659769 |
|
#SBATCH --gres=gpu:1 |
|
#SBATCH --job-name=tthrush-job-2147042 |
|
#SBATCH --mem=60G |
|
#SBATCH --nodelist=sphinx1 |
|
#SBATCH --open-mode=append |
|
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/eval_job_output.txt |
|
#SBATCH --partition=sphinx |
|
#SBATCH --time=14-0 |
|
|
|
# activate your desired anaconda environment |
|
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection |
|
|
|
# cd to working directory |
|
cd . |
|
|
|
# launch commands |
|
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/perf' |
|
|
|
############################### |
|
|
|
submission to slurm complete! |
|
|
|
|
|
############################### |
|
slurm submission output |
|
|
|
Submitted batch job 7659770 |
|
|
|
|
|
|
|
############################### |
|
|
|
slurm submission log: 2024-05-27 23:24:16.563525 |
|
created following sbatch script: |
|
|
|
############################### |
|
|
|
#!/bin/bash |
|
|
|
#SBATCH --account=nlp |
|
#SBATCH --cpus-per-task=16 |
|
#SBATCH --dependency=afterok:7659811 |
|
#SBATCH --gres=gpu:1 |
|
#SBATCH --job-name=tthrush-job-2456365 |
|
#SBATCH --mem=60G |
|
#SBATCH --nodelist=sphinx1 |
|
#SBATCH --open-mode=append |
|
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/eval_job_output.txt |
|
#SBATCH --partition=sphinx |
|
#SBATCH --time=14-0 |
|
|
|
# activate your desired anaconda environment |
|
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection |
|
|
|
# cd to working directory |
|
cd . |
|
|
|
# launch commands |
|
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1,revision=main,dtype=float16,trust_remote_code=True --tasks piqa,arc_easy,xnli_en,xnli_fr,xnli_de,xnli_es,sciq,lambada --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/test_ordinal_projection_big_diff/llms/pythia-70m_sciq_1/perf' |
|
|
|
############################### |
|
|
|
submission to slurm complete! |
|
|
|
|
|
############################### |
|
slurm submission output |
|
|
|
Submitted batch job 7659812 |
|
|
|
|
|
|
|
############################### |
|
|
|
|