forked from olivkoch/TinyRecursiveModels
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_chess.sh
More file actions
35 lines (31 loc) · 825 Bytes
/
train_chess.sh
File metadata and controls
35 lines (31 loc) · 825 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
# Chess training script for TinyRecursiveModels
# Based on the Q&A training configuration
set -e
# Training configuration for chess puzzles
export CUDA_VISIBLE_DEVICES=0
uv run python pretrain.py \
arch=trm \
data_paths="[data/chess]" \
arch.halt_exploration_prob=0.0 \
arch.halt_max_steps=8 \
arch.H_cycles=2 \
arch.L_cycles=2 \
arch.H_layers=0 \
arch.L_layers=1 \
arch.hidden_size=128 \
arch.num_heads=4 \
arch.expansion=2 \
arch.puzzle_emb_ndim=8 \
arch.forward_dtype=float32 \
arch.puzzle_emb_len=8 \
global_batch_size=256 \
epochs=10000 \
lr=0.001 \
puzzle_emb_lr=0.01 \
weight_decay=0.0 \
puzzle_emb_weight_decay=0.0 \
lr_warmup_steps=1000 \
eval_interval=10 \
use_wandb=false \
+project_name=chess_baseline