-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscript_im.sh
More file actions
35 lines (26 loc) · 816 Bytes
/
script_im.sh
File metadata and controls
35 lines (26 loc) · 816 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env bash
export NCCL_LL_THRESHOLD=0
# export PYTHONPATH=/mnt/lustre/share/pymc/new:$PYTHONPATH
PROG=path_to_main.py # eg, im/classification/main.py
DATA=path_to_imagenet1k
GPUS=$1
batch_size=$2
ARCH=$3
PORT=$(( $RANDOM + 2000 ))
export MASTER_PORT=${MASTER_PORT:-$PORT}
LR=$5
DATASET=$6
echo $PORT
OUTPUT_DIR=./checkpoints/$4
RESUME=./checkpoints/$4/checkpoint_best.pth
mkdir -p logs
START_TIME=`date +%Y%m%d-%H:%M:%S`
torchrun --standalone --nproc_per_node=$GPUS $PROG \
--data-set $DATASET --data-path $DATA \
--batch-size $batch_size --dist-eval --output_dir $OUTPUT_DIR \
--resume $RESUME --model $ARCH --epochs 300 --lr $LR \
--weight-decay $7 \
--warmup-epochs $8 \
--clip-grad ${9} \
--use_mcloader \
--broadcast_buffers | tee logs/${START_TIME}_${ARCH}.log