ci: Add GPU benchmarks and configure with just
script
#8
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Runs the test suite on a self-hosted GPU machine with CUDA and OpenCL enabled | |
name: GPU tests | |
on: | |
pull_request: | |
types: [opened, synchronize, reopened, ready_for_review] | |
branches: [master] | |
merge_group: | |
env: | |
CARGO_TERM_COLOR: always | |
# Disable incremental compilation. | |
# | |
# Incremental compilation is useful as part of an edit-build-test-edit cycle, | |
# as it lets the compiler avoid recompiling code that hasn't changed. However, | |
# on CI, we're not making small edits; we're almost always building the entire | |
# project from scratch. Thus, incremental compilation on CI actually | |
# introduces *additional* overhead to support making future builds | |
# faster...but no future builds will ever occur in any given CI environment. | |
# | |
# See https://matklad.github.io/2021/09/04/fast-rust-builds.html#ci-workflow | |
# for details. | |
CARGO_INCREMENTAL: 0 | |
# Allow more retries for network requests in cargo (downloading crates) and | |
# rustup (installing toolchains). This should help to reduce flaky CI failures | |
# from transient network timeouts or other issues. | |
CARGO_NET_RETRY: 10 | |
RUSTUP_MAX_RETRIES: 10 | |
# Don't emit giant backtraces in the CI logs. | |
RUST_BACKTRACE: short | |
RUSTFLAGS: -D warnings | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
cuda: | |
name: Rust tests on CUDA | |
if: github.event_name != 'pull_request' || github.event.action == 'enqueued' | |
runs-on: [self-hosted, gpu-ci] | |
env: | |
NVIDIA_VISIBLE_DEVICES: all | |
NVIDIA_DRIVER_CAPABILITITES: compute,utility | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions-rs/toolchain@v1 | |
- uses: taiki-e/install-action@nextest | |
- uses: Swatinem/rust-cache@v2 | |
# Check we have access to the machine's Nvidia drivers | |
- run: nvidia-smi | |
# The `compute`/`sm` number corresponds to the Nvidia GPU architecture | |
# In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable | |
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ | |
- name: Set env for CUDA compute | |
run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> $GITHUB_ENV | |
- name: set env for EC_GPU | |
run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> $GITHUB_ENV | |
- run: echo "${{ env.EC_GPU_CUDA_NVCC_ARGS}}" | |
# Check that CUDA is installed with a driver-compatible version | |
# This must also be compatible with the GPU architecture, see above link | |
- run: nvcc --version | |
- name: CUDA tests | |
env: | |
EC_GPU_FRAMEWORK: cuda | |
run: | | |
cargo nextest run --profile ci --cargo-profile dev-ci --features cuda | |
opencl: | |
name: Rust tests on OpenCL | |
if: github.event_name != 'pull_request' || github.event.action == 'enqueued' | |
runs-on: [self-hosted, gpu-ci] | |
env: | |
NVIDIA_VISIBLE_DEVICES: all | |
NVIDIA_DRIVER_CAPABILITITES: compute,utility | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions-rs/toolchain@v1 | |
- uses: taiki-e/install-action@nextest | |
- uses: Swatinem/rust-cache@v2 | |
# Check we have access to the machine's Nvidia drivers | |
- run: nvidia-smi | |
# The `compute`/`sm` number corresponds to the Nvidia GPU architecture | |
# In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable | |
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ | |
- name: Set env for CUDA compute | |
run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> $GITHUB_ENV | |
- name: set env for EC_GPU | |
run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> $GITHUB_ENV | |
- run: echo "${{ env.EC_GPU_CUDA_NVCC_ARGS}}" | |
# Check that CUDA is installed with a driver-compatible version | |
# This must also be compatible with the GPU architecture, see above link | |
- run: nvcc --version | |
# Check that we can access the OpenCL headers | |
- run: clinfo | |
- name: OpenCL tests | |
env: | |
EC_GPU_FRAMEWORK: opencl | |
run: | | |
cargo nextest run --profile ci --cargo-profile dev-ci --features cuda,opencl |