Long-running tests #47

Workflow file for this run

.github/workflows/long-running-tests.yml at 3ce4379

	# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
	#
	# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
	# property and proprietary rights in and to this material, related
	# documentation and any modifications thereto. Any use, reproduction,
	# disclosure or distribution of this material and related documentation
	# without an express license agreement from NVIDIA CORPORATION or
	# its affiliates is strictly prohibited.

	# Long-running GPU tests (20+ min each): statistical validation,
	# multi-orientation inference, LER regression, and full-epoch training.
	#
	# Schedule: daily at 02:00 UTC, but skipped if no commits landed on main
	# in the last 24 hours (saves GPU runner time on quiet days).
	# Manual dispatch always runs regardless of commit activity.
	# Re-run attempts (run_attempt > 1) always run, bypassing the commit check —
	# this lets you retry failed jobs from the UI even on quiet days.
	# All jobs use Python 3.13 (multi-version coverage is handled by short-tier CI).
	# See code/tests/README_TEST_TIERS.md for the tier model.

	name: Long-running tests

	on:
	schedule:
	- cron: "0 2 * * *"
	workflow_dispatch:
	inputs:
	jobs:
	description: "Comma-separated job names to run (empty = all)"
	required: false
	default: ""
	type: string

	env:
	PIP_NO_CACHE_DIR: "1"
	PIP_DISABLE_PIP_VERSION_CHECK: "1"
	PIP_PREFER_BINARY: "1"
	PYTHON_VERSION: "3.13"

	jobs:
	# ---------------------------------------------------------------------------
	# Gate: skip the whole workflow on schedule if main had no commits in 24h.
	# Manual dispatch always passes through.
	# ---------------------------------------------------------------------------
	check-for-changes:
	runs-on: linux-amd64-cpu4
	outputs:
	has_changes: ${{ steps.check.outputs.has_changes }}
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0
	- id: check
	run: \|
	if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
	echo "has_changes=true" >> "$GITHUB_OUTPUT"
	echo "Manual dispatch — always run."
	elif [[ "${{ github.run_attempt }}" -gt 1 ]]; then
	echo "has_changes=true" >> "$GITHUB_OUTPUT"
	echo "Re-run attempt ${{ github.run_attempt }} — always run."
	else
	SINCE="$(date -u -d '24 hours ago' '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null \|\| date -u -v-24H '+%Y-%m-%dT%H:%M:%SZ')"
	COUNT=$(git log --oneline --since="$SINCE" origin/main \| wc -l)
	if [[ "$COUNT" -gt 0 ]]; then
	echo "has_changes=true" >> "$GITHUB_OUTPUT"
	echo "$COUNT commit(s) on main in the last 24h — running long tests."
	else
	echo "has_changes=false" >> "$GITHUB_OUTPUT"
	echo "No commits on main in the last 24h — skipping."
	fi
	fi

	# ---------------------------------------------------------------------------
	# Statistical noise model tests (~15 min)
	# Runs the full test suite with RUN_SLOW=1 to enable >=100k-shot tests.
	# ---------------------------------------------------------------------------
	statistical-noise-model:
	needs: check-for-changes
	if: >-
	needs.check-for-changes.outputs.has_changes == 'true' &&
	(github.event_name == 'schedule' \|\|
	(github.event_name == 'workflow_dispatch' &&
	(inputs.jobs == '' \|\| contains(inputs.jobs, 'statistical-noise-model'))))
	runs-on: linux-amd64-gpu-rtxpro6000-latest-1
	container:
	image: ubuntu:24.04
	options: -u root --security-opt seccomp=unconfined --shm-size 16g
	env:
	NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
	timeout-minutes: 30
	steps:
	- name: Setup proxy cache
	uses: nv-gha-runners/setup-proxy-cache@main
	with:
	enable-apt: true

	- name: Install system dependencies
	run: \|
	export DEBIAN_FRONTEND=noninteractive
	apt-get update
	apt-get install -y git git-lfs gcc software-properties-common
	add-apt-repository -y ppa:deadsnakes/ppa
	apt-get update
	apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev
	git lfs install

	- uses: actions/checkout@v4
	with:
	lfs: true

	- name: Install Python dependencies
	run: \|
	python${{ env.PYTHON_VERSION }} -m venv .venv
	. .venv/bin/activate
	python -m pip install --upgrade pip setuptools wheel
	pip install -r code/requirements_public_inference.txt

	- name: Verify GPU
	run: \|
	nvidia-smi
	. .venv/bin/activate
	python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'"

	- name: Run statistical noise model tests (RUN_SLOW=1)
	run: \|
	. .venv/bin/activate
	RUN_SLOW=1 PYTHONPATH=code python -m unittest discover -s code/tests -p "test_noise_model.py" -v

	# ---------------------------------------------------------------------------
	# Multi-orientation inference (~30-60 min)
	# Runs inference over all 4 surface code orientations (O1-O4). Asserts
	# completion and that LER output was produced for all 4 (no numeric threshold).
	# ---------------------------------------------------------------------------
	orientation-inference:
	needs: check-for-changes
	if: >-
	needs.check-for-changes.outputs.has_changes == 'true' &&
	(github.event_name == 'schedule' \|\|
	(github.event_name == 'workflow_dispatch' &&
	(inputs.jobs == '' \|\| contains(inputs.jobs, 'orientation-inference'))))
	runs-on: linux-amd64-gpu-rtxpro6000-latest-1
	container:
	image: ubuntu:24.04
	options: -u root --security-opt seccomp=unconfined --shm-size 16g
	env:
	NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
	timeout-minutes: 90
	steps:
	- name: Setup proxy cache
	uses: nv-gha-runners/setup-proxy-cache@main
	with:
	enable-apt: true

	- name: Install system dependencies
	run: \|
	export DEBIAN_FRONTEND=noninteractive
	apt-get update
	apt-get install -y git git-lfs gcc software-properties-common
	add-apt-repository -y ppa:deadsnakes/ppa
	apt-get update
	apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev
	git lfs install

	- uses: actions/checkout@v4
	with:
	lfs: true

	- name: Install Python dependencies
	run: \|
	python${{ env.PYTHON_VERSION }} -m venv .venv
	. .venv/bin/activate
	python -m pip install --upgrade pip setuptools wheel
	# TODO: matrix by CUDA major version [cu12, cu13]
	pip install -r code/requirements_public_train-cu12.txt

	- name: Verify GPU
	run: \|
	nvidia-smi
	. .venv/bin/activate
	python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'"

	- name: Train all orientations (O1–O4)
	run: \|
	. .venv/bin/activate
	FRESH_START=1 ORIENTATIONS_LONG_TASK=train bash code/scripts/run_orientations_long.sh
	env:
	EXPERIMENT_NAME: ci_orient
	PREDECODER_TRAIN_SAMPLES: "32768"
	PREDECODER_VAL_SAMPLES: "4096"
	PREDECODER_TEST_SAMPLES: "4096"
	PREDECODER_TRAIN_EPOCHS: "30"
	PREDECODER_DISABLE_SDR: "1"

	- name: Multi-orientation inference (O1–O4) with LER output check
	shell: bash
	run: \|
	. .venv/bin/activate
	ORIENTATIONS_LONG_TASK=inference bash code/scripts/run_orientations_long.sh 2>&1 \| tee /tmp/orient_inference.log
	r=${PIPESTATUS[0]}; [ $r -ne 0 ] && exit $r
	# Require LER output from all 4 orientations (each inference run prints "LER - Avg")
	count=$(grep -c "LER - Avg" /tmp/orient_inference.log \|\| true)
	if [ "$count" -lt 4 ]; then
	echo "Expected at least 4 LER output blocks (one per orientation), got $count"
	exit 1
	fi
	env:
	EXPERIMENT_NAME: ci_orient
	PREDECODER_INFERENCE_NUM_SAMPLES: "1024"
	PREDECODER_INFERENCE_LATENCY_SAMPLES: "0"
	PREDECODER_INFERENCE_NUM_WORKERS: "0"

	# ---------------------------------------------------------------------------
	# LER regression check (~30-60 min)
	# Evaluates LER at multiple distances using pre-trained models.
	# ---------------------------------------------------------------------------
	ler-regression:
	needs: check-for-changes
	if: >-
	needs.check-for-changes.outputs.has_changes == 'true' &&
	(github.event_name == 'schedule' \|\|
	(github.event_name == 'workflow_dispatch' &&
	(inputs.jobs == '' \|\| contains(inputs.jobs, 'ler-regression'))))
	runs-on: linux-amd64-gpu-rtxpro6000-latest-1
	container:
	image: ubuntu:24.04
	options: -u root --security-opt seccomp=unconfined --shm-size 16g
	env:
	NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
	timeout-minutes: 60
	steps:
	- name: Setup proxy cache
	uses: nv-gha-runners/setup-proxy-cache@main
	with:
	enable-apt: true

	- name: Install system dependencies
	run: \|
	export DEBIAN_FRONTEND=noninteractive
	apt-get update
	apt-get install -y git git-lfs gcc software-properties-common
	add-apt-repository -y ppa:deadsnakes/ppa
	apt-get update
	apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev
	git lfs install

	- uses: actions/checkout@v4
	with:
	lfs: true

	- name: Install Python dependencies
	run: \|
	python${{ env.PYTHON_VERSION }} -m venv .venv
	. .venv/bin/activate
	python -m pip install --upgrade pip setuptools wheel
	pip install -r code/requirements_public_inference.txt

	- name: Verify GPU
	run: \|
	nvidia-smi
	. .venv/bin/activate
	python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'"

	- name: LER regression (pre-trained models)
	run: \|
	. .venv/bin/activate
	PYTHONPATH=code python -m unittest discover -s code/tests -p "test_inference_public_model.py" -v

	# ---------------------------------------------------------------------------
	# Full 1-epoch training + LER validation (~30-60 min)
	# Trains 1 epoch with production-scale samples and validates LER threshold.
	# ---------------------------------------------------------------------------
	full-epoch-training:
	needs: check-for-changes
	if: >-
	needs.check-for-changes.outputs.has_changes == 'true' &&
	(github.event_name == 'schedule' \|\|
	(github.event_name == 'workflow_dispatch' &&
	(inputs.jobs == '' \|\| contains(inputs.jobs, 'full-epoch-training'))))
	runs-on: linux-amd64-gpu-rtxpro6000-latest-1
	container:
	image: ubuntu:24.04
	options: -u root --security-opt seccomp=unconfined --shm-size 16g
	env:
	NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
	timeout-minutes: 90
	steps:
	- name: Setup proxy cache
	uses: nv-gha-runners/setup-proxy-cache@main
	with:
	enable-apt: true

	- name: Install system dependencies
	run: \|
	export DEBIAN_FRONTEND=noninteractive
	apt-get update
	apt-get install -y git git-lfs gcc software-properties-common
	add-apt-repository -y ppa:deadsnakes/ppa
	apt-get update
	apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev
	git lfs install

	- uses: actions/checkout@v4
	with:
	lfs: true

	- name: Install Python dependencies
	run: \|
	python${{ env.PYTHON_VERSION }} -m venv .venv
	. .venv/bin/activate
	python -m pip install --upgrade pip setuptools wheel
	# TODO: matrix by CUDA major version [cu12, cu13]
	pip install -r code/requirements_public_train-cu12.txt

	- name: Verify GPU
	run: \|
	nvidia-smi
	. .venv/bin/activate
	python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'"

	- name: Full 1-epoch training + inference with LER validation
	shell: bash
	run: \|
	. .venv/bin/activate
	bash code/scripts/smoke_run.sh 2>&1 \| tee /tmp/ci_full_epoch.log
	r=${PIPESTATUS[0]}; [ $r -ne 0 ] && exit $r
	# 0.15: conservative for 2M samples / 1 epoch; tighten if runs are stable
	python code/scripts/check_ler_from_log.py /tmp/ci_full_epoch.log --max-ler 0.15
	env:
	EXPERIMENT_NAME: ci_full_epoch
	PREDECODER_TRAIN_SAMPLES: "2097152"
	PREDECODER_VAL_SAMPLES: "65536"
	PREDECODER_TEST_SAMPLES: "65536"
	PREDECODER_TRAIN_EPOCHS: "1"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Long-running tests #47

Workflow file

Long-running tests #47

Uh oh!

Workflow file for this run