Spaces:
Build error
Build error
File size: 3,717 Bytes
8ca00c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
#!/bin/bash
#################################################
## TEMPLATE VERSION 1.01 ##
#################################################
## ALL SBATCH COMMANDS WILL START WITH #SBATCH ##
## DO NOT REMOVE THE # SYMBOL ##
#################################################
#SBATCH --nodes=1 # How many nodes required? Usually 1
#SBATCH --cpus-per-task=10 # Number of CPU to request for the job
#SBATCH --mem=128GB # How much memory does your job require?
#SBATCH --gres=gpu:1 # Do you require GPUS? If not delete this line
#SBATCH --time=05-00:00:00 # How long to run the job for? Jobs exceed this time will be terminated
# Format <DD-HH:MM:SS> eg. 5 days 05-00:00:00
# Format <DD-HH:MM:SS> eg. 24 hours 1-00:00:00 or 24:00:00
#SBATCH --mail-type=BEGIN,END,FAIL # When should you receive an email?
#SBATCH --output=%u.%j.out # Where should the log files go?
# You must provide an absolute path eg /common/home/module/username/
# If no paths are provided, the output file will be placed in your current working directory
#SBATCH --requeue # Remove if you are not want the workload scheduler to requeue your job after preemption
#SBATCH --constraint=l40 # This tells the workload scheduler to provision you l40 nodes
################################################################
## EDIT AFTER THIS LINE IF YOU ARE OKAY WITH DEFAULT SETTINGS ##
################################################################
# ================ Account parameters ================
# Description | Value
# ---------------------------------------------
# Account name | tanahhweeresearch
# List of Assigned Partition | researchlong researchshort tanahhweeresearch
# List of Assigned QOS | research-1-qos tanahhweeresearch-priority
# ---------------------------------------------
#SBATCH --partition=researchlong # The partition you've been assigned
#SBATCH --account=tanahhweeresearch # The account you've been assigned (normally student)
#SBATCH --qos=research-1-qos # What is the QOS assigned to you? Check with myinfo command
#SBATCH --mail-user=haotian.hu.2021@scis.smu.edu.sg # Who should receive the email notifications
#SBATCH --job-name=1GPU_LLM_HT # Give the job a name
#################################################
## END OF SBATCH COMMANDS ##
#################################################
# Purge the environment, load the modules we require.
# Refer to https://violet.smu.edu.sg/origami/module/ for more information
module purge
module load Anaconda3/2022.05
module load CUDA/12.1.1
# Do not remove this line even if you have executed conda init
eval "$(conda shell.bash hook)"
# Create a virtual environment can be commented off if you already have a virtual environment
# conda create -n llm_ht python=3.11
# This command assumes that you've already created the environment previously
# We're using an absolute path here. You may use a relative path, as long as SRUN is execute in the same working directory
# conda activate tgi
conda activate llm_ht
# If you require any packages, install it before the srun job submission.
# conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
# Submit your job to the cluster
BASEDIR=$HOME/logical-reasoning/scripts
JOB=$1
echo "Submitting job: $BASEDIR/$JOB"
srun --gres=gpu:1 $BASEDIR/$JOB
# sbatch logical-reasoning/scripts/1gpu_llm_ht.sh tune-mgtv-qwen2_7b.sh
|