From 2771b3da8266c55f4f65e359ae4743a4e3bf61b2 Mon Sep 17 00:00:00 2001
From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com>
Date: Tue, 31 Oct 2023 09:15:50 -0400
Subject: [PATCH] ci: Add GPU benchmarks and configure with `just` script
 (#790)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Upgrade bench runner: 8 vCPUs/32GB RAM -> 32/64

* Add justfile and GPU benchmarks

* Add configurable GPU benchmarks

* fix: checkout & other details (#8)

* fix: name & checkout

* fix just version

* Refactor job triggers

---------

Co-authored-by: François Garillot <4142+huitseeker@users.noreply.github.com>
---
 .github/workflows/bench-deploy.yml            |  59 ++++++++
 ...ch_pr_comment.yml => bench-pr-comment.yml} |  38 ++++--
 .github/workflows/benchmark.yml               |  37 -----
 .github/workflows/{gpu.yml => gpu-ci.yml}     |   9 +-
 .github/workflows/merge-tests.yml             | 128 ++++++++++++++++++
 .github/workflows/merge_group.yml             |  65 ---------
 benches/bench.env                             |   9 ++
 benches/fibonacci_lem.rs                      |  31 ++++-
 benches/justfile                              |  35 +++++
 9 files changed, 297 insertions(+), 114 deletions(-)
 create mode 100644 .github/workflows/bench-deploy.yml
 rename .github/workflows/{bench_pr_comment.yml => bench-pr-comment.yml} (57%)
 delete mode 100644 .github/workflows/benchmark.yml
 rename .github/workflows/{gpu.yml => gpu-ci.yml} (94%)
 create mode 100644 .github/workflows/merge-tests.yml
 delete mode 100644 .github/workflows/merge_group.yml
 create mode 100644 benches/bench.env
 create mode 100644 benches/justfile

diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml
new file mode 100644
index 0000000000..85108420e8
--- /dev/null
+++ b/.github/workflows/bench-deploy.yml
@@ -0,0 +1,59 @@
+name: GPU benchmark on `master`
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  # TODO: Account for different `justfile` and `bench.env` files
+  # One option is to upload them to gh-pages for qualitative comparison
+  # TODO: Fall back to a default if `justfile`/`bench.env` not present
+  benchmark:
+    name: Bench and deploy
+    runs-on: [self-hosted, gpu-bench, gh-pages]
+    steps:
+      # Install deps
+      - uses: actions/checkout@v4
+      - uses: actions-rs/toolchain@v1
+      - uses: Swatinem/rust-cache@v2
+      - uses: taiki-e/install-action@v2
+        with:
+          tool: just@1.15.0
+      # Set up GPU
+      # Check we have access to the machine's Nvidia drivers
+      - run: nvidia-smi
+      # Check that CUDA is installed with a driver-compatible version
+      # This must also be compatible with the GPU architecture, see above link
+      - run: nvcc --version
+      # Run benchmarks and deploy
+      - name: Get old benchmarks
+        uses: actions/checkout@v4
+        with:
+          ref: gh-pages
+          path: gh-pages
+      - run: mkdir -p target; cp -r gh-pages/benchmarks/criterion target;
+      - name: Install criterion
+        run: cargo install cargo-criterion
+      - name: Run benchmarks
+        run: just --dotenv-filename bench.env gpu-bench fibonacci_lem
+      # TODO: Prettify labels for easier viewing
+      # Compress the benchmark file and metadata for later analysis
+      - name: Compress artifacts
+        run: |
+          echo $LABELS > labels.md
+          tar -cvzf ${{ github.sha }}.tar.gz Cargo.lock ${{ github.sha }}.json labels.md
+      - name: Deploy latest benchmark report
+        uses: peaceiris/actions-gh-pages@v3
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./target/criterion
+          destination_dir: benchmarks/criterion
+      - name: Copy benchmark json to history
+        run: mkdir history; cp ${{ github.sha }}.tar.gz history/
+      - name: Deploy benchmark history
+        uses: peaceiris/actions-gh-pages@v3
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: history/
+          destination_dir: benchmarks/history
+          keep_files: true
\ No newline at end of file
diff --git a/.github/workflows/bench_pr_comment.yml b/.github/workflows/bench-pr-comment.yml
similarity index 57%
rename from .github/workflows/bench_pr_comment.yml
rename to .github/workflows/bench-pr-comment.yml
index 62cb9ffe47..8078cb662c 100644
--- a/.github/workflows/bench_pr_comment.yml
+++ b/.github/workflows/bench-pr-comment.yml
@@ -12,9 +12,9 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  run-benchmark:
+  cpu-benchmark:
     name: run end2end benchmark
-    runs-on: ubuntu-benchmark-runner
+    runs-on: buildjet-32vcpu-ubuntu-2204
     if:
       github.event.issue.pull_request
       && github.event.issue.state == 'open'
@@ -35,34 +35,56 @@ jobs:
       - uses: boa-dev/criterion-compare-action@v3
         with:
           # Optional. Compare only this benchmark target
-          benchName: "end2end"
+          benchName: "fibonacci_lem"
           # Needed. The name of the branch to compare with
           branchName: ${{ github.ref_name }}
 
+  # TODO: Check it works with forked PRs when running
+  # `gh pr checkout {{ github.event.issue.number}}` with `env: GH_TOKEN`
   gpu-benchmark:
     name: run fibonacci benchmark on GPU
     runs-on: [self-hosted, gpu-bench]
     if:
       github.event.issue.pull_request
       && github.event.issue.state == 'open'
-      && contains(github.event.comment.body, '!benchmark')
+      && contains(github.event.comment.body, '!gpu-benchmark')
       && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER')
     steps:
+      # Set up GPU
+      # Check we have access to the machine's Nvidia drivers
+      - run: nvidia-smi
+      # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
+      # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
+      # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+      # Writes env vars to `bench.env` to be read by `just` command
+      - name: Set env for CUDA compute
+        run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> bench.env
+      - name: set env for EC_GPU
+        run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> bench.env
+      # Check that CUDA is installed with a driver-compatible version
+      # This must also be compatible with the GPU architecture, see above link
+      - run: nvcc --version
+
       - uses: xt0rted/pull-request-comment-branch@v2
         id: comment-branch
-
       - uses: actions/checkout@v4
         if: success()
         with:
           ref: ${{ steps.comment-branch.outputs.head_ref }}
-      # Set the Rust env vars
       - uses: actions-rs/toolchain@v1
       - uses: Swatinem/rust-cache@v2
+      # Strict load => panic if .env file not found
+      - name: Load env vars
+        uses: xom9ikk/dotenv@v2
+        with:
+          path: bench.env
+          load-mode: strict
+
       - uses: boa-dev/criterion-compare-action@v3
         with:
           # Optional. Compare only this benchmark target
-          benchName: "fibonacci"
+          benchName: "fibonacci_lem"
           # Optional. Features activated in the benchmark
-          features: "cuda,opencl"
+          features: "cuda"
           # Needed. The name of the branch to compare with
           branchName: ${{ github.ref_name }}
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
deleted file mode 100644
index fdfdedccff..0000000000
--- a/.github/workflows/benchmark.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: Benchmarking
-on:
-  workflow_dispatch:
-  release:
-    types: [published]
-
-jobs:
-   benchmark:
-     name: Continuous benchmarking
-     runs-on: ubuntu-latest
-     steps:
-       - uses: actions/checkout@v4
-       - name: Get old benchmarks
-         uses: actions/checkout@v4
-         with:
-           ref: gh-pages
-           path: gh-pages
-       - run: mkdir -p target; cp -r gh-pages/benchmarks/criterion target;
-       - name: Install criterion
-         run: cargo install cargo-criterion
-       - name: Run benchmarks
-         run: cargo criterion --message-format=json > ${{ github.sha }}.json
-       - name: Deploy latest benchmark report
-         uses: peaceiris/actions-gh-pages@v3
-         with:
-           github_token: ${{ secrets.GITHUB_TOKEN }}
-           publish_dir: ./target/criterion
-           destination_dir: benchmarks/criterion
-       - name: Move benchmark json to history
-         run: mkdir history; cp ${{ github.sha }}.json history/
-       - name: Deploy benchmark history
-         uses: peaceiris/actions-gh-pages@v3
-         with:
-           github_token: ${{ secrets.GITHUB_TOKEN }}
-           publish_dir: history/
-           destination_dir: benchmarks/history
-           keep_files: true
diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu-ci.yml
similarity index 94%
rename from .github/workflows/gpu.yml
rename to .github/workflows/gpu-ci.yml
index eed43b1bbf..36a8911f62 100644
--- a/.github/workflows/gpu.yml
+++ b/.github/workflows/gpu-ci.yml
@@ -2,9 +2,10 @@
 name: GPU tests
 
 on:
-  push:
-    branches:
-      - master
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    branches: [master]
+  merge_group:
 
 env:
   CARGO_TERM_COLOR: always
@@ -36,6 +37,7 @@ concurrency:
 jobs:
   cuda:
     name: Rust tests on CUDA
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
     runs-on: [self-hosted, gpu-ci]
     env:
       NVIDIA_VISIBLE_DEVICES: all
@@ -68,6 +70,7 @@ jobs:
 
   opencl:
     name: Rust tests on OpenCL
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
     runs-on: [self-hosted, gpu-ci]
     env:
       NVIDIA_VISIBLE_DEVICES: all
diff --git a/.github/workflows/merge-tests.yml b/.github/workflows/merge-tests.yml
new file mode 100644
index 0000000000..bfd358b8e6
--- /dev/null
+++ b/.github/workflows/merge-tests.yml
@@ -0,0 +1,128 @@
+# Run final tests only when attempting to merge, shown as skipped status checks beforehand
+name: Merge group tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    branches: [master]
+  merge_group:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  linux-ignored:
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
+    runs-on: buildjet-16vcpu-ubuntu-2204
+    env:
+      RUSTFLAGS: -D warnings
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+      - uses: taiki-e/install-action@nextest
+      - uses: Swatinem/rust-cache@v2
+      - name: Linux Tests
+        run: |
+          cargo nextest run --profile ci --workspace --cargo-profile dev-ci --run-ignored ignored-only -E 'all() - test(groth16::tests::outer_prove_recursion) - test(test_make_fcomm_examples) - test(test_functional_commitments_demo) - test(test_chained_functional_commitments_demo)'
+
+  linux-arm:
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
+    runs-on: buildjet-16vcpu-ubuntu-2204-arm
+    env:
+      RUSTFLAGS: -D warnings
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+      - uses: taiki-e/install-action@nextest
+      - uses: Swatinem/rust-cache@v2
+      - name: Linux Tests
+        run: |
+          cargo nextest run --profile ci --workspace --cargo-profile dev-ci
+      - name: Linux Gadget Tests w/o debug assertions
+        run: |
+          cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)'
+
+  mac-m1:
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
+    runs-on: macos-latest-xlarge
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+      - uses: taiki-e/install-action@nextest
+      - uses: Swatinem/rust-cache@v2
+      - name: Linux Tests
+        run: |
+          cargo nextest run --profile ci --workspace --cargo-profile dev-ci
+      - name: Linux Gadget Tests w/o debug assertions
+        run: |
+          cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)'
+
+  # TODO: Make this a required status check
+  # Run comparative benchmark against master, reject on regression
+  gpu-benchmark:
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
+    name: Run fibonacci bench on GPU
+    runs-on: [self-hosted, gpu-bench]
+    steps:
+      # TODO: Factor out GPU setup into an action or into justfile, it's used in 4 places
+      # Set up GPU
+      # Check we have access to the machine's Nvidia drivers
+      - run: nvidia-smi
+      # Check that CUDA is installed with a driver-compatible version
+      # This must also be compatible with the GPU architecture, see above link
+      - run: nvcc --version
+      - uses: actions/checkout@v4
+      # Install dependencies
+      - uses: actions-rs/toolchain@v1
+      - uses: Swatinem/rust-cache@v2
+      - uses: taiki-e/install-action@v2
+        with:
+          tool: just@1.15
+      - name: Install criterion
+        run: |
+          cargo install cargo-criterion
+          cargo install criterion-table
+      # Checkout base branch for comparative bench
+      - uses: actions/checkout@v4
+        with:
+          ref: master
+          path: master
+      # Copy the script so the base can bench with the same parameters
+      - name: Copy source script to base branch
+        run: cd benches && cp justfile bench.env ../master/benches
+      - name: Set base ref variable
+        run: cd master && echo "BASE_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+      - run: echo ${{ env.BASE_REF }}
+      - name: Run GPU bench on base branch
+        run: cd master/benches && just --dotenv-filename bench.env gpu-bench fibonacci_lem
+      - name: Copy bench output to PR branch
+        run: cp master/${{ env.BASE_REF }}.json .
+      - name: Run GPU bench on PR branch
+        run: cd benches && just --dotenv-filename bench.env gpu-bench fibonacci_lem
+      # Create a `criterion-table` and write in commit comment
+      - name: Run `criterion-table`
+        run: cat ${{ github.sha }}.json | criterion-table > BENCHMARKS.md
+      - name: Write bench on commit comment
+        uses: peter-evans/commit-comment@v3
+        with:
+          body-path: BENCHMARKS.md
+      # TODO: Use jq for JSON parsing if needed
+      # Check for benchmark regression based on Criterion's configured noise threshold
+      - name: Performance regression check
+        id: check-regression
+        run: |
+          echo "regress_count=$(grep -c 'Regressed' ${{ github.sha }}.json)" >> $GITHUB_OUTPUT
+      # Fail job if regression found
+      - uses: actions/github-script@v6
+        if: ${{ steps.check-regression.outputs.regress_count }} > 0
+        with:
+          script: |
+            core.setFailed('Fibonacci bench regression detected')
+
diff --git a/.github/workflows/merge_group.yml b/.github/workflows/merge_group.yml
deleted file mode 100644
index a13e2f0f24..0000000000
--- a/.github/workflows/merge_group.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-# Run final tests only when attempting to merge, shown as skipped status checks beforehand
-name: Merge group tests
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, ready_for_review]
-    branches: [master]
-  merge_group:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  linux-ignored:
-    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
-    runs-on: buildjet-16vcpu-ubuntu-2204
-    env:
-      RUSTFLAGS: -D warnings
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions-rs/toolchain@v1
-      - uses: taiki-e/install-action@nextest
-      - uses: Swatinem/rust-cache@v2
-      - name: Linux Tests
-        run: |
-          cargo nextest run --profile ci --workspace --cargo-profile dev-ci --run-ignored ignored-only -E 'all() - test(groth16::tests::outer_prove_recursion) - test(test_make_fcomm_examples) - test(test_functional_commitments_demo) - test(test_chained_functional_commitments_demo)'
-
-  linux-arm:
-    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
-    runs-on: buildjet-16vcpu-ubuntu-2204-arm
-    env:
-      RUSTFLAGS: -D warnings
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions-rs/toolchain@v1
-      - uses: taiki-e/install-action@nextest
-      - uses: Swatinem/rust-cache@v2
-      - name: Linux Tests
-        run: |
-          cargo nextest run --profile ci --workspace --cargo-profile dev-ci
-      - name: Linux Gadget Tests w/o debug assertions
-        run: |
-          cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)'
-
-  mac-m1:
-    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
-    runs-on: macos-latest-xlarge
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions-rs/toolchain@v1
-      - uses: taiki-e/install-action@nextest
-      - uses: Swatinem/rust-cache@v2
-      - name: Linux Tests
-        run: |
-          cargo nextest run --profile ci --workspace --cargo-profile dev-ci
-      - name: Linux Gadget Tests w/o debug assertions
-        run: |
-          cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)'
diff --git a/benches/bench.env b/benches/bench.env
new file mode 100644
index 0000000000..b59c73e77a
--- /dev/null
+++ b/benches/bench.env
@@ -0,0 +1,9 @@
+# Lurk config
+LURK_PERF=max-parallel-simple
+LURK_RC=100,600
+LURK_BENCH_NOISE_THRESHOLD=0.05
+
+# CUDA config
+NVIDIA_VISIBLE_DEVICES=all
+NVIDIA_DRIVER_CAPABILITITES=compute,utility
+EC_GPU_FRAMEWORK=cuda
diff --git a/benches/fibonacci_lem.rs b/benches/fibonacci_lem.rs
index ed340e9af7..48ece2ab90 100644
--- a/benches/fibonacci_lem.rs
+++ b/benches/fibonacci_lem.rs
@@ -1,5 +1,6 @@
 use std::{cell::RefCell, rc::Rc, sync::Arc, time::Duration};
 
+use anyhow::anyhow;
 use criterion::{
     black_box, criterion_group, criterion_main, measurement, BatchSize, BenchmarkGroup,
     BenchmarkId, Criterion, SamplingMode,
@@ -113,14 +114,42 @@ fn fibo_prove<M: measurement::Measurement>(
     );
 }
 
+fn rc_env() -> anyhow::Result<Vec<usize>> {
+    std::env::var("LURK_RC")
+        .map_err(|e| anyhow!("Reduction count env var isn't set: {e}"))
+        .and_then(|rc| {
+            let vec: anyhow::Result<Vec<usize>> = rc
+                .split(',')
+                .map(|rc| {
+                    rc.parse::<usize>()
+                        .map_err(|e| anyhow!("Failed to parse RC: {e}"))
+                })
+                .collect();
+            vec
+        })
+}
+
+fn noise_threshold_env() -> anyhow::Result<f64> {
+    std::env::var("LURK_BENCH_NOISE_THRESHOLD")
+        .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
+        .and_then(|nt| {
+            nt.parse::<f64>()
+                .map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
+        })
+}
+
 fn fibonacci_prove(c: &mut Criterion) {
+    tracing_subscriber::fmt::init();
     set_bench_config();
     tracing::debug!("{:?}", lurk::config::LURK_CONFIG);
-    let reduction_counts = [100, 600, 700, 800, 900];
+
+    let reduction_counts = rc_env().unwrap_or_else(|_| vec![100]);
     let batch_sizes = [100, 200];
     let mut group: BenchmarkGroup<'_, _> = c.benchmark_group("Prove");
     group.sampling_mode(SamplingMode::Flat); // This can take a *while*
     group.sample_size(10);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
+
     let state = State::init_lurk_state().rccell();
 
     for fib_n in batch_sizes.iter() {
diff --git a/benches/justfile b/benches/justfile
new file mode 100644
index 0000000000..5817577721
--- /dev/null
+++ b/benches/justfile
@@ -0,0 +1,35 @@
+# Install with `cargo install just`
+# Usage: `just --dotenv-filename /path/to/file.env <bench|gpu-bench> <args>`
+# TODO: Move dotenv-filename into justfile once the feature is available
+set dotenv-load
+
+commit := `git rev-parse HEAD`
+
+# Run CPU benchmarks
+bench +benches:
+  #!/bin/sh
+  printenv LURK
+  if [ '{{benches}}' != '' ]; then
+    for bench in {{benches}}; do
+      cargo criterion --bench $bench
+    done
+  else
+    echo "Invalid input, enter at least one non-empty string"
+  fi
+
+# Run CUDA benchmarks on GPU
+gpu-bench +benches:
+  #!/bin/sh
+  # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
+  # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
+  # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+  export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')
+  export EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH"
+  env | grep -E "LURK|EC_GPU|CUDA"
+  if [ '{{benches}}' != '' ]; then
+    for bench in {{benches}}; do
+      cargo criterion --bench $bench --features "cuda" --message-format=json 2>&1 > ../{{commit}}.json
+    done
+  else
+    echo "Invalid input, enter at least one non-empty string"
+  fi