From 2771b3da8266c55f4f65e359ae4743a4e3bf61b2 Mon Sep 17 00:00:00 2001 From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:15:50 -0400 Subject: [PATCH] ci: Add GPU benchmarks and configure with `just` script (#790) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Upgrade bench runner: 8 vCPUs/32GB RAM -> 32/64 * Add justfile and GPU benchmarks * Add configurable GPU benchmarks * fix: checkout & other details (#8) * fix: name & checkout * fix just version * Refactor job triggers --------- Co-authored-by: François Garillot <4142+huitseeker@users.noreply.github.com> --- .github/workflows/bench-deploy.yml | 59 ++++++++ ...ch_pr_comment.yml => bench-pr-comment.yml} | 38 ++++-- .github/workflows/benchmark.yml | 37 ----- .github/workflows/{gpu.yml => gpu-ci.yml} | 9 +- .github/workflows/merge-tests.yml | 128 ++++++++++++++++++ .github/workflows/merge_group.yml | 65 --------- benches/bench.env | 9 ++ benches/fibonacci_lem.rs | 31 ++++- benches/justfile | 35 +++++ 9 files changed, 297 insertions(+), 114 deletions(-) create mode 100644 .github/workflows/bench-deploy.yml rename .github/workflows/{bench_pr_comment.yml => bench-pr-comment.yml} (57%) delete mode 100644 .github/workflows/benchmark.yml rename .github/workflows/{gpu.yml => gpu-ci.yml} (94%) create mode 100644 .github/workflows/merge-tests.yml delete mode 100644 .github/workflows/merge_group.yml create mode 100644 benches/bench.env create mode 100644 benches/justfile diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml new file mode 100644 index 0000000000..85108420e8 --- /dev/null +++ b/.github/workflows/bench-deploy.yml @@ -0,0 +1,59 @@ +name: GPU benchmark on `master` +on: + push: + branches: + - master + +jobs: + # TODO: Account for different `justfile` and `bench.env` files + # One option is to upload them to gh-pages for qualitative comparison + # TODO: Fall back to a default if `justfile`/`bench.env` not present + benchmark: + name: Bench and deploy + runs-on: [self-hosted, gpu-bench, gh-pages] + steps: + # Install deps + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + - uses: Swatinem/rust-cache@v2 + - uses: taiki-e/install-action@v2 + with: + tool: just@1.15.0 + # Set up GPU + # Check we have access to the machine's Nvidia drivers + - run: nvidia-smi + # Check that CUDA is installed with a driver-compatible version + # This must also be compatible with the GPU architecture, see above link + - run: nvcc --version + # Run benchmarks and deploy + - name: Get old benchmarks + uses: actions/checkout@v4 + with: + ref: gh-pages + path: gh-pages + - run: mkdir -p target; cp -r gh-pages/benchmarks/criterion target; + - name: Install criterion + run: cargo install cargo-criterion + - name: Run benchmarks + run: just --dotenv-filename bench.env gpu-bench fibonacci_lem + # TODO: Prettify labels for easier viewing + # Compress the benchmark file and metadata for later analysis + - name: Compress artifacts + run: | + echo $LABELS > labels.md + tar -cvzf ${{ github.sha }}.tar.gz Cargo.lock ${{ github.sha }}.json labels.md + - name: Deploy latest benchmark report + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./target/criterion + destination_dir: benchmarks/criterion + - name: Copy benchmark json to history + run: mkdir history; cp ${{ github.sha }}.tar.gz history/ + - name: Deploy benchmark history + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: history/ + destination_dir: benchmarks/history + keep_files: true \ No newline at end of file diff --git a/.github/workflows/bench_pr_comment.yml b/.github/workflows/bench-pr-comment.yml similarity index 57% rename from .github/workflows/bench_pr_comment.yml rename to .github/workflows/bench-pr-comment.yml index 62cb9ffe47..8078cb662c 100644 --- a/.github/workflows/bench_pr_comment.yml +++ b/.github/workflows/bench-pr-comment.yml @@ -12,9 +12,9 @@ concurrency: cancel-in-progress: true jobs: - run-benchmark: + cpu-benchmark: name: run end2end benchmark - runs-on: ubuntu-benchmark-runner + runs-on: buildjet-32vcpu-ubuntu-2204 if: github.event.issue.pull_request && github.event.issue.state == 'open' @@ -35,34 +35,56 @@ jobs: - uses: boa-dev/criterion-compare-action@v3 with: # Optional. Compare only this benchmark target - benchName: "end2end" + benchName: "fibonacci_lem" # Needed. The name of the branch to compare with branchName: ${{ github.ref_name }} + # TODO: Check it works with forked PRs when running + # `gh pr checkout {{ github.event.issue.number}}` with `env: GH_TOKEN` gpu-benchmark: name: run fibonacci benchmark on GPU runs-on: [self-hosted, gpu-bench] if: github.event.issue.pull_request && github.event.issue.state == 'open' - && contains(github.event.comment.body, '!benchmark') + && contains(github.event.comment.body, '!gpu-benchmark') && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') steps: + # Set up GPU + # Check we have access to the machine's Nvidia drivers + - run: nvidia-smi + # The `compute`/`sm` number corresponds to the Nvidia GPU architecture + # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Writes env vars to `bench.env` to be read by `just` command + - name: Set env for CUDA compute + run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> bench.env + - name: set env for EC_GPU + run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> bench.env + # Check that CUDA is installed with a driver-compatible version + # This must also be compatible with the GPU architecture, see above link + - run: nvcc --version + - uses: xt0rted/pull-request-comment-branch@v2 id: comment-branch - - uses: actions/checkout@v4 if: success() with: ref: ${{ steps.comment-branch.outputs.head_ref }} - # Set the Rust env vars - uses: actions-rs/toolchain@v1 - uses: Swatinem/rust-cache@v2 + # Strict load => panic if .env file not found + - name: Load env vars + uses: xom9ikk/dotenv@v2 + with: + path: bench.env + load-mode: strict + - uses: boa-dev/criterion-compare-action@v3 with: # Optional. Compare only this benchmark target - benchName: "fibonacci" + benchName: "fibonacci_lem" # Optional. Features activated in the benchmark - features: "cuda,opencl" + features: "cuda" # Needed. The name of the branch to compare with branchName: ${{ github.ref_name }} diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index fdfdedccff..0000000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Benchmarking -on: - workflow_dispatch: - release: - types: [published] - -jobs: - benchmark: - name: Continuous benchmarking - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Get old benchmarks - uses: actions/checkout@v4 - with: - ref: gh-pages - path: gh-pages - - run: mkdir -p target; cp -r gh-pages/benchmarks/criterion target; - - name: Install criterion - run: cargo install cargo-criterion - - name: Run benchmarks - run: cargo criterion --message-format=json > ${{ github.sha }}.json - - name: Deploy latest benchmark report - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./target/criterion - destination_dir: benchmarks/criterion - - name: Move benchmark json to history - run: mkdir history; cp ${{ github.sha }}.json history/ - - name: Deploy benchmark history - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: history/ - destination_dir: benchmarks/history - keep_files: true diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu-ci.yml similarity index 94% rename from .github/workflows/gpu.yml rename to .github/workflows/gpu-ci.yml index eed43b1bbf..36a8911f62 100644 --- a/.github/workflows/gpu.yml +++ b/.github/workflows/gpu-ci.yml @@ -2,9 +2,10 @@ name: GPU tests on: - push: - branches: - - master + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + branches: [master] + merge_group: env: CARGO_TERM_COLOR: always @@ -36,6 +37,7 @@ concurrency: jobs: cuda: name: Rust tests on CUDA + if: github.event_name != 'pull_request' || github.event.action == 'enqueued' runs-on: [self-hosted, gpu-ci] env: NVIDIA_VISIBLE_DEVICES: all @@ -68,6 +70,7 @@ jobs: opencl: name: Rust tests on OpenCL + if: github.event_name != 'pull_request' || github.event.action == 'enqueued' runs-on: [self-hosted, gpu-ci] env: NVIDIA_VISIBLE_DEVICES: all diff --git a/.github/workflows/merge-tests.yml b/.github/workflows/merge-tests.yml new file mode 100644 index 0000000000..bfd358b8e6 --- /dev/null +++ b/.github/workflows/merge-tests.yml @@ -0,0 +1,128 @@ +# Run final tests only when attempting to merge, shown as skipped status checks beforehand +name: Merge group tests + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + branches: [master] + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + linux-ignored: + if: github.event_name != 'pull_request' || github.event.action == 'enqueued' + runs-on: buildjet-16vcpu-ubuntu-2204 + env: + RUSTFLAGS: -D warnings + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions-rs/toolchain@v1 + - uses: taiki-e/install-action@nextest + - uses: Swatinem/rust-cache@v2 + - name: Linux Tests + run: | + cargo nextest run --profile ci --workspace --cargo-profile dev-ci --run-ignored ignored-only -E 'all() - test(groth16::tests::outer_prove_recursion) - test(test_make_fcomm_examples) - test(test_functional_commitments_demo) - test(test_chained_functional_commitments_demo)' + + linux-arm: + if: github.event_name != 'pull_request' || github.event.action == 'enqueued' + runs-on: buildjet-16vcpu-ubuntu-2204-arm + env: + RUSTFLAGS: -D warnings + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions-rs/toolchain@v1 + - uses: taiki-e/install-action@nextest + - uses: Swatinem/rust-cache@v2 + - name: Linux Tests + run: | + cargo nextest run --profile ci --workspace --cargo-profile dev-ci + - name: Linux Gadget Tests w/o debug assertions + run: | + cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)' + + mac-m1: + if: github.event_name != 'pull_request' || github.event.action == 'enqueued' + runs-on: macos-latest-xlarge + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions-rs/toolchain@v1 + - uses: taiki-e/install-action@nextest + - uses: Swatinem/rust-cache@v2 + - name: Linux Tests + run: | + cargo nextest run --profile ci --workspace --cargo-profile dev-ci + - name: Linux Gadget Tests w/o debug assertions + run: | + cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)' + + # TODO: Make this a required status check + # Run comparative benchmark against master, reject on regression + gpu-benchmark: + if: github.event_name != 'pull_request' || github.event.action == 'enqueued' + name: Run fibonacci bench on GPU + runs-on: [self-hosted, gpu-bench] + steps: + # TODO: Factor out GPU setup into an action or into justfile, it's used in 4 places + # Set up GPU + # Check we have access to the machine's Nvidia drivers + - run: nvidia-smi + # Check that CUDA is installed with a driver-compatible version + # This must also be compatible with the GPU architecture, see above link + - run: nvcc --version + - uses: actions/checkout@v4 + # Install dependencies + - uses: actions-rs/toolchain@v1 + - uses: Swatinem/rust-cache@v2 + - uses: taiki-e/install-action@v2 + with: + tool: just@1.15 + - name: Install criterion + run: | + cargo install cargo-criterion + cargo install criterion-table + # Checkout base branch for comparative bench + - uses: actions/checkout@v4 + with: + ref: master + path: master + # Copy the script so the base can bench with the same parameters + - name: Copy source script to base branch + run: cd benches && cp justfile bench.env ../master/benches + - name: Set base ref variable + run: cd master && echo "BASE_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + - run: echo ${{ env.BASE_REF }} + - name: Run GPU bench on base branch + run: cd master/benches && just --dotenv-filename bench.env gpu-bench fibonacci_lem + - name: Copy bench output to PR branch + run: cp master/${{ env.BASE_REF }}.json . + - name: Run GPU bench on PR branch + run: cd benches && just --dotenv-filename bench.env gpu-bench fibonacci_lem + # Create a `criterion-table` and write in commit comment + - name: Run `criterion-table` + run: cat ${{ github.sha }}.json | criterion-table > BENCHMARKS.md + - name: Write bench on commit comment + uses: peter-evans/commit-comment@v3 + with: + body-path: BENCHMARKS.md + # TODO: Use jq for JSON parsing if needed + # Check for benchmark regression based on Criterion's configured noise threshold + - name: Performance regression check + id: check-regression + run: | + echo "regress_count=$(grep -c 'Regressed' ${{ github.sha }}.json)" >> $GITHUB_OUTPUT + # Fail job if regression found + - uses: actions/github-script@v6 + if: ${{ steps.check-regression.outputs.regress_count }} > 0 + with: + script: | + core.setFailed('Fibonacci bench regression detected') + diff --git a/.github/workflows/merge_group.yml b/.github/workflows/merge_group.yml deleted file mode 100644 index a13e2f0f24..0000000000 --- a/.github/workflows/merge_group.yml +++ /dev/null @@ -1,65 +0,0 @@ -# Run final tests only when attempting to merge, shown as skipped status checks beforehand -name: Merge group tests - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - branches: [master] - merge_group: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - linux-ignored: - if: github.event_name != 'pull_request' || github.event.action == 'enqueued' - runs-on: buildjet-16vcpu-ubuntu-2204 - env: - RUSTFLAGS: -D warnings - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - uses: actions-rs/toolchain@v1 - - uses: taiki-e/install-action@nextest - - uses: Swatinem/rust-cache@v2 - - name: Linux Tests - run: | - cargo nextest run --profile ci --workspace --cargo-profile dev-ci --run-ignored ignored-only -E 'all() - test(groth16::tests::outer_prove_recursion) - test(test_make_fcomm_examples) - test(test_functional_commitments_demo) - test(test_chained_functional_commitments_demo)' - - linux-arm: - if: github.event_name != 'pull_request' || github.event.action == 'enqueued' - runs-on: buildjet-16vcpu-ubuntu-2204-arm - env: - RUSTFLAGS: -D warnings - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - uses: actions-rs/toolchain@v1 - - uses: taiki-e/install-action@nextest - - uses: Swatinem/rust-cache@v2 - - name: Linux Tests - run: | - cargo nextest run --profile ci --workspace --cargo-profile dev-ci - - name: Linux Gadget Tests w/o debug assertions - run: | - cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)' - - mac-m1: - if: github.event_name != 'pull_request' || github.event.action == 'enqueued' - runs-on: macos-latest-xlarge - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - uses: actions-rs/toolchain@v1 - - uses: taiki-e/install-action@nextest - - uses: Swatinem/rust-cache@v2 - - name: Linux Tests - run: | - cargo nextest run --profile ci --workspace --cargo-profile dev-ci - - name: Linux Gadget Tests w/o debug assertions - run: | - cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)' diff --git a/benches/bench.env b/benches/bench.env new file mode 100644 index 0000000000..b59c73e77a --- /dev/null +++ b/benches/bench.env @@ -0,0 +1,9 @@ +# Lurk config +LURK_PERF=max-parallel-simple +LURK_RC=100,600 +LURK_BENCH_NOISE_THRESHOLD=0.05 + +# CUDA config +NVIDIA_VISIBLE_DEVICES=all +NVIDIA_DRIVER_CAPABILITITES=compute,utility +EC_GPU_FRAMEWORK=cuda diff --git a/benches/fibonacci_lem.rs b/benches/fibonacci_lem.rs index ed340e9af7..48ece2ab90 100644 --- a/benches/fibonacci_lem.rs +++ b/benches/fibonacci_lem.rs @@ -1,5 +1,6 @@ use std::{cell::RefCell, rc::Rc, sync::Arc, time::Duration}; +use anyhow::anyhow; use criterion::{ black_box, criterion_group, criterion_main, measurement, BatchSize, BenchmarkGroup, BenchmarkId, Criterion, SamplingMode, @@ -113,14 +114,42 @@ fn fibo_prove( ); } +fn rc_env() -> anyhow::Result> { + std::env::var("LURK_RC") + .map_err(|e| anyhow!("Reduction count env var isn't set: {e}")) + .and_then(|rc| { + let vec: anyhow::Result> = rc + .split(',') + .map(|rc| { + rc.parse::() + .map_err(|e| anyhow!("Failed to parse RC: {e}")) + }) + .collect(); + vec + }) +} + +fn noise_threshold_env() -> anyhow::Result { + std::env::var("LURK_BENCH_NOISE_THRESHOLD") + .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}")) + .and_then(|nt| { + nt.parse::() + .map_err(|e| anyhow!("Failed to parse noise threshold: {e}")) + }) +} + fn fibonacci_prove(c: &mut Criterion) { + tracing_subscriber::fmt::init(); set_bench_config(); tracing::debug!("{:?}", lurk::config::LURK_CONFIG); - let reduction_counts = [100, 600, 700, 800, 900]; + + let reduction_counts = rc_env().unwrap_or_else(|_| vec![100]); let batch_sizes = [100, 200]; let mut group: BenchmarkGroup<'_, _> = c.benchmark_group("Prove"); group.sampling_mode(SamplingMode::Flat); // This can take a *while* group.sample_size(10); + group.noise_threshold(noise_threshold_env().unwrap_or(0.05)); + let state = State::init_lurk_state().rccell(); for fib_n in batch_sizes.iter() { diff --git a/benches/justfile b/benches/justfile new file mode 100644 index 0000000000..5817577721 --- /dev/null +++ b/benches/justfile @@ -0,0 +1,35 @@ +# Install with `cargo install just` +# Usage: `just --dotenv-filename /path/to/file.env ` +# TODO: Move dotenv-filename into justfile once the feature is available +set dotenv-load + +commit := `git rev-parse HEAD` + +# Run CPU benchmarks +bench +benches: + #!/bin/sh + printenv LURK + if [ '{{benches}}' != '' ]; then + for bench in {{benches}}; do + cargo criterion --bench $bench + done + else + echo "Invalid input, enter at least one non-empty string" + fi + +# Run CUDA benchmarks on GPU +gpu-bench +benches: + #!/bin/sh + # The `compute`/`sm` number corresponds to the Nvidia GPU architecture + # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g') + export EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH" + env | grep -E "LURK|EC_GPU|CUDA" + if [ '{{benches}}' != '' ]; then + for bench in {{benches}}; do + cargo criterion --bench $bench --features "cuda" --message-format=json 2>&1 > ../{{commit}}.json + done + else + echo "Invalid input, enter at least one non-empty string" + fi