File tree Expand file tree Collapse file tree 3 files changed +38
-4
lines changed
Expand file tree Collapse file tree 3 files changed +38
-4
lines changed Original file line number Diff line number Diff line change 44
55# will do stuff in fs_path / [username] / signal_data | shot_lists | processed shots, etc.
66
7- fs_path : ' /tigress '
7+ fs_path : ' /global/cscratch1/sd/ '
88target : ' hinge' # 'maxhinge' # 'maxhinge' # 'binary' # 'hinge'
99num_gpus : 4 # per node
1010paths :
@@ -127,7 +127,7 @@ training:
127127 num_shots_at_once : 200
128128 # large number = maximum number of epochs.
129129 # Early stopping will occur if loss does not decrease, after some patience # of epochs
130- num_epochs : 1000
130+ num_epochs : 50
131131 use_mock_data : False
132132 data_parallel : False
133133 hyperparam_tuning : False
@@ -136,8 +136,8 @@ training:
136136 num_batches_minimum : 20 # minimum number of batches per epoch
137137 ranking_difficulty_fac : 1.0 # how much to upweight incorrectly classified shots during training
138138 timeline_prof : False
139- step_limit : 50
140- no_validation : True
139+ step_limit : 0
140+ no_validation : False
141141callbacks :
142142 list : ['earlystop']
143143 metrics : ['val_loss','val_roc','train_loss']
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # SBATCH -C gpu
3+ # SBATCH -t 01:30:00
4+ # SBATCH -G 1
5+ # SBATCH -c 4
6+ # SBATCH --exclusive
7+
8+ # rm /global/cscratch1/sd/$USER/model_checkpoints/*
9+ # rm /global/cscratch1/sd/$USER/results/*
10+ # rm /global/cscratch1/sd/$USER/csv_logs/*
11+ # rm /global/cscratch1/sd/$USER/Graph/*
12+ # rm /global/cscratch1/sd/$USER/normalization/*
13+
14+ export OMPI_MCA_btl=" tcp,self,vader"
15+ srun python mpi_learn.py
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ # SBATCH -C gpu
3+ # SBATCH -t 02:00:00
4+ # SBATCH -N 4
5+ # SBATCH -G 4
6+ # SBATCH --ntasks-per-node=4
7+ # SBATCH --ntasks-per-socket=2
8+ # SBATCH -c 4
9+ # SBATCH --mem-per-cpu=0
10+ # SBATCH --exclusive
11+
12+ # rm /global/cscratch1/sd/$USER/model_checkpoints/*
13+ # rm /global/cscratch1/sd/$USER/results/*
14+ # rm /global/cscratch1/sd/$USER/csv_logs/*
15+ # rm /global/cscratch1/sd/$USER/Graph/*
16+ # rm /global/cscratch1/sd/$USER/normalization/*
17+
18+ export OMPI_MCA_btl=" tcp,self,vader"
19+ srun python mpi_learn.py
You can’t perform that action at this time.
0 commit comments