Long-running tests #47
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: LicenseRef-NvidiaProprietary | |
| # | |
| # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual | |
| # property and proprietary rights in and to this material, related | |
| # documentation and any modifications thereto. Any use, reproduction, | |
| # disclosure or distribution of this material and related documentation | |
| # without an express license agreement from NVIDIA CORPORATION or | |
| # its affiliates is strictly prohibited. | |
| # Long-running GPU tests (20+ min each): statistical validation, | |
| # multi-orientation inference, LER regression, and full-epoch training. | |
| # | |
| # Schedule: daily at 02:00 UTC, but skipped if no commits landed on main | |
| # in the last 24 hours (saves GPU runner time on quiet days). | |
| # Manual dispatch always runs regardless of commit activity. | |
| # Re-run attempts (run_attempt > 1) always run, bypassing the commit check — | |
| # this lets you retry failed jobs from the UI even on quiet days. | |
| # All jobs use Python 3.13 (multi-version coverage is handled by short-tier CI). | |
| # See code/tests/README_TEST_TIERS.md for the tier model. | |
| name: Long-running tests | |
| on: | |
| schedule: | |
| - cron: "0 2 * * *" | |
| workflow_dispatch: | |
| inputs: | |
| jobs: | |
| description: "Comma-separated job names to run (empty = all)" | |
| required: false | |
| default: "" | |
| type: string | |
| env: | |
| PIP_NO_CACHE_DIR: "1" | |
| PIP_DISABLE_PIP_VERSION_CHECK: "1" | |
| PIP_PREFER_BINARY: "1" | |
| PYTHON_VERSION: "3.13" | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Gate: skip the whole workflow on schedule if main had no commits in 24h. | |
| # Manual dispatch always passes through. | |
| # --------------------------------------------------------------------------- | |
| check-for-changes: | |
| runs-on: linux-amd64-cpu4 | |
| outputs: | |
| has_changes: ${{ steps.check.outputs.has_changes }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - id: check | |
| run: | | |
| if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then | |
| echo "has_changes=true" >> "$GITHUB_OUTPUT" | |
| echo "Manual dispatch — always run." | |
| elif [[ "${{ github.run_attempt }}" -gt 1 ]]; then | |
| echo "has_changes=true" >> "$GITHUB_OUTPUT" | |
| echo "Re-run attempt ${{ github.run_attempt }} — always run." | |
| else | |
| SINCE="$(date -u -d '24 hours ago' '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || date -u -v-24H '+%Y-%m-%dT%H:%M:%SZ')" | |
| COUNT=$(git log --oneline --since="$SINCE" origin/main | wc -l) | |
| if [[ "$COUNT" -gt 0 ]]; then | |
| echo "has_changes=true" >> "$GITHUB_OUTPUT" | |
| echo "$COUNT commit(s) on main in the last 24h — running long tests." | |
| else | |
| echo "has_changes=false" >> "$GITHUB_OUTPUT" | |
| echo "No commits on main in the last 24h — skipping." | |
| fi | |
| fi | |
| # --------------------------------------------------------------------------- | |
| # Statistical noise model tests (~15 min) | |
| # Runs the full test suite with RUN_SLOW=1 to enable >=100k-shot tests. | |
| # --------------------------------------------------------------------------- | |
| statistical-noise-model: | |
| needs: check-for-changes | |
| if: >- | |
| needs.check-for-changes.outputs.has_changes == 'true' && | |
| (github.event_name == 'schedule' || | |
| (github.event_name == 'workflow_dispatch' && | |
| (inputs.jobs == '' || contains(inputs.jobs, 'statistical-noise-model')))) | |
| runs-on: linux-amd64-gpu-rtxpro6000-latest-1 | |
| container: | |
| image: ubuntu:24.04 | |
| options: -u root --security-opt seccomp=unconfined --shm-size 16g | |
| env: | |
| NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} | |
| timeout-minutes: 30 | |
| steps: | |
| - name: Setup proxy cache | |
| uses: nv-gha-runners/setup-proxy-cache@main | |
| with: | |
| enable-apt: true | |
| - name: Install system dependencies | |
| run: | | |
| export DEBIAN_FRONTEND=noninteractive | |
| apt-get update | |
| apt-get install -y git git-lfs gcc software-properties-common | |
| add-apt-repository -y ppa:deadsnakes/ppa | |
| apt-get update | |
| apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev | |
| git lfs install | |
| - uses: actions/checkout@v4 | |
| with: | |
| lfs: true | |
| - name: Install Python dependencies | |
| run: | | |
| python${{ env.PYTHON_VERSION }} -m venv .venv | |
| . .venv/bin/activate | |
| python -m pip install --upgrade pip setuptools wheel | |
| pip install -r code/requirements_public_inference.txt | |
| - name: Verify GPU | |
| run: | | |
| nvidia-smi | |
| . .venv/bin/activate | |
| python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'" | |
| - name: Run statistical noise model tests (RUN_SLOW=1) | |
| run: | | |
| . .venv/bin/activate | |
| RUN_SLOW=1 PYTHONPATH=code python -m unittest discover -s code/tests -p "test_noise_model.py" -v | |
| # --------------------------------------------------------------------------- | |
| # Multi-orientation inference (~30-60 min) | |
| # Runs inference over all 4 surface code orientations (O1-O4). Asserts | |
| # completion and that LER output was produced for all 4 (no numeric threshold). | |
| # --------------------------------------------------------------------------- | |
| orientation-inference: | |
| needs: check-for-changes | |
| if: >- | |
| needs.check-for-changes.outputs.has_changes == 'true' && | |
| (github.event_name == 'schedule' || | |
| (github.event_name == 'workflow_dispatch' && | |
| (inputs.jobs == '' || contains(inputs.jobs, 'orientation-inference')))) | |
| runs-on: linux-amd64-gpu-rtxpro6000-latest-1 | |
| container: | |
| image: ubuntu:24.04 | |
| options: -u root --security-opt seccomp=unconfined --shm-size 16g | |
| env: | |
| NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} | |
| timeout-minutes: 90 | |
| steps: | |
| - name: Setup proxy cache | |
| uses: nv-gha-runners/setup-proxy-cache@main | |
| with: | |
| enable-apt: true | |
| - name: Install system dependencies | |
| run: | | |
| export DEBIAN_FRONTEND=noninteractive | |
| apt-get update | |
| apt-get install -y git git-lfs gcc software-properties-common | |
| add-apt-repository -y ppa:deadsnakes/ppa | |
| apt-get update | |
| apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev | |
| git lfs install | |
| - uses: actions/checkout@v4 | |
| with: | |
| lfs: true | |
| - name: Install Python dependencies | |
| run: | | |
| python${{ env.PYTHON_VERSION }} -m venv .venv | |
| . .venv/bin/activate | |
| python -m pip install --upgrade pip setuptools wheel | |
| # TODO: matrix by CUDA major version [cu12, cu13] | |
| pip install -r code/requirements_public_train-cu12.txt | |
| - name: Verify GPU | |
| run: | | |
| nvidia-smi | |
| . .venv/bin/activate | |
| python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'" | |
| - name: Train all orientations (O1–O4) | |
| run: | | |
| . .venv/bin/activate | |
| FRESH_START=1 ORIENTATIONS_LONG_TASK=train bash code/scripts/run_orientations_long.sh | |
| env: | |
| EXPERIMENT_NAME: ci_orient | |
| PREDECODER_TRAIN_SAMPLES: "32768" | |
| PREDECODER_VAL_SAMPLES: "4096" | |
| PREDECODER_TEST_SAMPLES: "4096" | |
| PREDECODER_TRAIN_EPOCHS: "30" | |
| PREDECODER_DISABLE_SDR: "1" | |
| - name: Multi-orientation inference (O1–O4) with LER output check | |
| shell: bash | |
| run: | | |
| . .venv/bin/activate | |
| ORIENTATIONS_LONG_TASK=inference bash code/scripts/run_orientations_long.sh 2>&1 | tee /tmp/orient_inference.log | |
| r=${PIPESTATUS[0]}; [ $r -ne 0 ] && exit $r | |
| # Require LER output from all 4 orientations (each inference run prints "LER - Avg") | |
| count=$(grep -c "LER - Avg" /tmp/orient_inference.log || true) | |
| if [ "$count" -lt 4 ]; then | |
| echo "Expected at least 4 LER output blocks (one per orientation), got $count" | |
| exit 1 | |
| fi | |
| env: | |
| EXPERIMENT_NAME: ci_orient | |
| PREDECODER_INFERENCE_NUM_SAMPLES: "1024" | |
| PREDECODER_INFERENCE_LATENCY_SAMPLES: "0" | |
| PREDECODER_INFERENCE_NUM_WORKERS: "0" | |
| # --------------------------------------------------------------------------- | |
| # LER regression check (~30-60 min) | |
| # Evaluates LER at multiple distances using pre-trained models. | |
| # --------------------------------------------------------------------------- | |
| ler-regression: | |
| needs: check-for-changes | |
| if: >- | |
| needs.check-for-changes.outputs.has_changes == 'true' && | |
| (github.event_name == 'schedule' || | |
| (github.event_name == 'workflow_dispatch' && | |
| (inputs.jobs == '' || contains(inputs.jobs, 'ler-regression')))) | |
| runs-on: linux-amd64-gpu-rtxpro6000-latest-1 | |
| container: | |
| image: ubuntu:24.04 | |
| options: -u root --security-opt seccomp=unconfined --shm-size 16g | |
| env: | |
| NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} | |
| timeout-minutes: 60 | |
| steps: | |
| - name: Setup proxy cache | |
| uses: nv-gha-runners/setup-proxy-cache@main | |
| with: | |
| enable-apt: true | |
| - name: Install system dependencies | |
| run: | | |
| export DEBIAN_FRONTEND=noninteractive | |
| apt-get update | |
| apt-get install -y git git-lfs gcc software-properties-common | |
| add-apt-repository -y ppa:deadsnakes/ppa | |
| apt-get update | |
| apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev | |
| git lfs install | |
| - uses: actions/checkout@v4 | |
| with: | |
| lfs: true | |
| - name: Install Python dependencies | |
| run: | | |
| python${{ env.PYTHON_VERSION }} -m venv .venv | |
| . .venv/bin/activate | |
| python -m pip install --upgrade pip setuptools wheel | |
| pip install -r code/requirements_public_inference.txt | |
| - name: Verify GPU | |
| run: | | |
| nvidia-smi | |
| . .venv/bin/activate | |
| python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'" | |
| - name: LER regression (pre-trained models) | |
| run: | | |
| . .venv/bin/activate | |
| PYTHONPATH=code python -m unittest discover -s code/tests -p "test_inference_public_model.py" -v | |
| # --------------------------------------------------------------------------- | |
| # Full 1-epoch training + LER validation (~30-60 min) | |
| # Trains 1 epoch with production-scale samples and validates LER threshold. | |
| # --------------------------------------------------------------------------- | |
| full-epoch-training: | |
| needs: check-for-changes | |
| if: >- | |
| needs.check-for-changes.outputs.has_changes == 'true' && | |
| (github.event_name == 'schedule' || | |
| (github.event_name == 'workflow_dispatch' && | |
| (inputs.jobs == '' || contains(inputs.jobs, 'full-epoch-training')))) | |
| runs-on: linux-amd64-gpu-rtxpro6000-latest-1 | |
| container: | |
| image: ubuntu:24.04 | |
| options: -u root --security-opt seccomp=unconfined --shm-size 16g | |
| env: | |
| NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} | |
| timeout-minutes: 90 | |
| steps: | |
| - name: Setup proxy cache | |
| uses: nv-gha-runners/setup-proxy-cache@main | |
| with: | |
| enable-apt: true | |
| - name: Install system dependencies | |
| run: | | |
| export DEBIAN_FRONTEND=noninteractive | |
| apt-get update | |
| apt-get install -y git git-lfs gcc software-properties-common | |
| add-apt-repository -y ppa:deadsnakes/ppa | |
| apt-get update | |
| apt-get install -y python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-venv python${{ env.PYTHON_VERSION }}-dev | |
| git lfs install | |
| - uses: actions/checkout@v4 | |
| with: | |
| lfs: true | |
| - name: Install Python dependencies | |
| run: | | |
| python${{ env.PYTHON_VERSION }} -m venv .venv | |
| . .venv/bin/activate | |
| python -m pip install --upgrade pip setuptools wheel | |
| # TODO: matrix by CUDA major version [cu12, cu13] | |
| pip install -r code/requirements_public_train-cu12.txt | |
| - name: Verify GPU | |
| run: | | |
| nvidia-smi | |
| . .venv/bin/activate | |
| python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'" | |
| - name: Full 1-epoch training + inference with LER validation | |
| shell: bash | |
| run: | | |
| . .venv/bin/activate | |
| bash code/scripts/smoke_run.sh 2>&1 | tee /tmp/ci_full_epoch.log | |
| r=${PIPESTATUS[0]}; [ $r -ne 0 ] && exit $r | |
| # 0.15: conservative for 2M samples / 1 epoch; tighten if runs are stable | |
| python code/scripts/check_ler_from_log.py /tmp/ci_full_epoch.log --max-ler 0.15 | |
| env: | |
| EXPERIMENT_NAME: ci_full_epoch | |
| PREDECODER_TRAIN_SAMPLES: "2097152" | |
| PREDECODER_VAL_SAMPLES: "65536" | |
| PREDECODER_TEST_SAMPLES: "65536" | |
| PREDECODER_TRAIN_EPOCHS: "1" |