From 4ea2bbbca0e8c9c6e1b731ea639c278047c09b00 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 18 Sep 2024 17:04:47 +0200 Subject: [PATCH] ci(ct): reshape maintenance workflow into external matrix script Unfortunately, matrix jobs logs and outputs cannot be aggregated in Github Actions. The only way to work around the limitations of GHA is by using a custom build script that create a similar matrix like experience. This commit introduces these scripts, probably also making some custom actions we added obsolete. --- .github/workflows/container_maintenance.yml | 125 ++--------------- .github/workflows/scripts/maintenance-job.sh | 137 +++++++++++++++++++ .github/workflows/scripts/utils.sh | 107 +++++++++++++++ 3 files changed, 256 insertions(+), 113 deletions(-) create mode 100755 .github/workflows/scripts/maintenance-job.sh create mode 100644 .github/workflows/scripts/utils.sh diff --git a/.github/workflows/container_maintenance.yml b/.github/workflows/container_maintenance.yml index 3a28f355e94..29705d06208 100644 --- a/.github/workflows/container_maintenance.yml +++ b/.github/workflows/container_maintenance.yml @@ -18,42 +18,14 @@ on: env: PLATFORMS: linux/amd64,linux/arm64 NUM_PAST_RELEASES: 3 - # TODO: change to "develop" in final PR - DEVELOP_BRANCH: 10478-version-base-img jobs: - discover: - name: Discover Release Matrix - runs-on: ubuntu-latest - permissions: - contents: read - packages: read - # TODO: re-enable for final PR - # Only run in upstream repo - avoid unnecessary runs in forks and only for scheduled - #if: ${{ github.repository_owner == 'IQSS' }} - outputs: - branches: ${{ steps.matrix.outputs.branches }} - current_release: ${{ steps.matrix.outputs.current_release }} - steps: - - name: Build branch matrix options - id: matrix - run: | - echo "branches=$(curl -f -sS https://github.com/repos/IQSS/dataverse/releases | \ - jq '[ .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, "${{ env.DEVELOP_BRANCH }}" ]')" | tr -d "\n" | tr -s " " | \ - tee -a "$GITHUB_OUTPUT" - echo "current_release=$(curl -f -sS https://github.com/repos/IQSS/dataverse/releases | jq '.[0].tag_name' )" | tee -a "$GITHUB_OUTPUT" - build: name: Build image runs-on: ubuntu-latest permissions: contents: read packages: read - needs: discover - strategy: - fail-fast: false - matrix: - branch: ${{ fromJson(needs.discover.outputs.branches) }} # TODO: re-enable for final PR # Only run in upstream repo - avoid unnecessary runs in forks #if: ${{ github.repository_owner == 'IQSS' }} @@ -64,7 +36,6 @@ jobs: # Necessary as the checked out release branch might not contain the action as files uses: gdcc/wip-dataverse-base-image/.github/actions/setup-maven@10478-version-base-img with: - git-reference: ${{ matrix.branch }} pom-paths: modules/container-base/pom.xml # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and @@ -79,93 +50,21 @@ jobs: with: platforms: ${{ env.PLATFORMS }} - # Try to retrieve backport patches for this git ref (but don't fail if there aren't any) - # and try to apply them if present - - name: Get and apply backported patches - # There might be no patches - ignore errors - continue-on-error: true - run: | - mkdir -p "${GITHUB_WORKSPACE}/patches" - curl -sSL "https://github.com/${GITHUB_REPOSITORY}/archive/${DEVELOP_BRANCH}.tar.gz" | \ - tar -zxf - -C "${GITHUB_WORKSPACE}/patches" --wildcards "*/modules/container-base/src/backports/${{ matrix.branch }}" --strip-components=6 - find "${GITHUB_WORKSPACE}/patches" -type f -name '*.patch' -print0 | xargs -0 -n1 patch -p1 -s -i - - # Determine the base image name we are going to use from here on - - name: Determine base image name - run: | - if [[ "${{ matrix.branch }}" = "${{ env.DEVELOP_BRANCH }}" ]]; then - echo "BASE_IMAGE=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -q -DforceStdout )" | tee -a "${GITHUB_ENV}" - echo "BASE_IMAGE_UPCOMING=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout )" | tee -a "${GITHUB_ENV}" - else - echo "BASE_IMAGE=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout )" | tee -a "${GITHUB_ENV}" - fi - - # Figure out if a rebuild is necessary because either there is an updated Java image or our installed packages need updates - - name: Check for recent Temurin image updates - id: temurin-check - # TODO: change to upstream location in final PR - uses: gdcc/wip-dataverse-base-image/.github/actions/check-newer-parent-image@10478-version-base-img - with: - parent: "$( mvn help:evaluate -Pct -f modules/container-base -Dexpression=java.image -q -DforceStdout )" - derived: "${{ env.BASE_IMAGE }}" - # TODO: if we introduce more flavors as a matrix, we need to adapt the install command to check for updates - - name: Check for package updates in base image - id: package-check - if: ${{ steps.temurin-check.outputs.is-more-recent == 'false' }} + # Discover the releases we want to maintain + - name: Discover maintained releases + id: discover run: | - PKGS="$( grep "ARG PKGS" modules/container-base/src/main/docker/Dockerfile | cut -f2 -d= | tr -d '"' )" - if [[ ! $( docker run --rm -u 0 "${BASE_IMAGE}" sh -c "apt update >&2 && apt install -s ${PKGS}" | grep "0 upgraded" ) ]]; then - echo "Base image $BASE_IMAGE needs updates for our custom installed packages" - echo "newer_packages=true" >> "${GITHUB_OUTPUT}" - else - echo "Base image $BASE_IMAGE has no updates for our custom installed packages" - echo "newer_packages=false" >> "${GITHUB_OUTPUT}" - fi + DEVELOPMENT_BRANCH=$( curl -f -sS https://github.com/repos/${{ github.repository }} | jq -r '.default_branch' ) + echo "DEVELOPMENT_BRANCH=$DEVELOPMENT_BRANCH" | tee -a "$GITHUB_ENV" + echo "branches=$( curl -f -sS https://github.com/repos/IQSS/dataverse/releases | jq -r " .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, \"${DEVELOPMENT_BRANCH}\" " )" | tee -a "${GITHUB_OUTPUT}" + - # TODO: In a future version of this script, we might want to include checking for other security updates, - # not just updates to the packages we installed. - # grep security /etc/apt/sources.list > /tmp/security.list - # apt-get update -oDir::Etc::Sourcelist=/tmp/security.list - # apt-get dist-upgrade -y -oDir::Etc::Sourcelist=/tmp/security.list -oDir::Etc::SourceParts=/bin/false -s - - - name: Calculate revision number for immutable tag (on release branches only) - if: ${{ matrix.branch != env.DEVELOP_BRANCH }} - id: revision-tag - # TODO: change to upstream location in final PR - uses: gdcc/wip-dataverse-base-image/.github/actions/get-image-revision@10478-version-base-img - with: - image-ref: ${{ env.BASE_IMAGE }} - tag-options-prefix: "-Dbase.image.tag.suffix='' -Ddocker.tags.revision=" - - name: Configure update of "latest" tag for development branch - id: develop-tag - if: ${{ matrix.branch == env.DEVELOP_BRANCH }} + # Execute matrix build for the discovered branches + - name: Execute build matrix script + id: execute run: | - echo "tag-options=-Ddocker.tags.develop=unstable -Ddocker.tags.upcoming=${BASE_IMAGE_UPCOMING#*:}" | tee -a "${GITHUB_OUTPUT}" - - name: Deploy multi-arch base container image to Docker Hub - if: ${{ steps.temurin-check.outputs.is-more-recent == 'true' || steps.package-check.outputs.newer_packages == 'true' || inputs.force_build }} - id: build - run: | - mvn -f modules/container-base -Pct deploy -Ddocker.noCache -Ddocker.platforms=${{ env.PLATFORMS }} \ - -Ddocker.imagePropertyConfiguration=override ${{ steps.develop-tag.outputs.tag-options }} ${{ steps.revision-tag.outputs.tag-options }} - echo "rebuild=true" | tee -a "${GITHUB_OUTPUT}" - - # TODO: this is here to not drop the knowledge about matrix output workarounds (for now) - # - if: always() - # name: Save status (workaround for matrix outputs) - # run: | - # # steps.build.outcome is the status BEFORE continue-on-error - # echo "STATUS_$( echo "${{ matrix.branch }}" | tr ".:;,-/ " "_" )=${{ steps.build.outcome }}" | tee -a "${GITHUB_ENV}" - - # TODO: As part of issue #10618 we will need to create this action, shipping updated app images - #- name: Rebuild application container - # if: ${{ steps.build.outputs.rebuild }} - # uses: ./.github/actions/deploy-app-container - # with: - # registry: "" - # registry_token: "" - # ref: "" - # base_image: "" - # base_image_tag: "" + echo "force_build=${{ inputs.force_build }}" + .github/workflows/scripts/maintenance-job.sh ${{ steps.discover.outputs.branches }} # TODO: This job should become part of the matrix as an action, so we don't need to fiddle with matrix outputs hacks #push-app-img: diff --git a/.github/workflows/scripts/maintenance-job.sh b/.github/workflows/scripts/maintenance-job.sh new file mode 100755 index 00000000000..a3b8c72bb52 --- /dev/null +++ b/.github/workflows/scripts/maintenance-job.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# A matrix-like job to maintain a number of releases as well as the latest snap of Dataverse. + +# PREREQUISITES: +# - You have Java, Maven, QEMU and Docker all setup and ready to go +# - You obviously checked out the develop branch, otherwise you'd not be executing this script +# - You added all the branch names you want to run maintenance for as arguments +# Optional, but recommended: +# - You added a DEVELOPMENT_BRANCH env var to your runner/job env with the name of the development branch +# - You added a FORCE_BUILD=0|1 env var to indicate if the base image build should be forced +# - You added a PLATFORMS env var with all the target platforms you want to build for + +# NOTE: +# This script is a culmination of Github Action steps into a single script. +# The reason to put all of this in here is due to the complexity of the Github Action and the limitation of the +# matrix support in Github actions, where outputs cannot be aggregated or otherwise used further. + +set -euo pipefail + +# Get all the inputs +# If not within a runner, just print to stdout (duplicating the output in case of tee usage, but that's ok for testing) +GITHUB_OUTPUT=${GITHUB_OUTPUT:-"/proc/self/fd/1"} +GITHUB_ENV=${GITHUB_ENV:-"/proc/self/fd/1"} +GITHUB_WORKSPACE=${GITHUB_WORKSPACE:-"$(pwd)"} +GITHUB_SERVER_URL=${GITHUB_SERVER_URL:-"https://github.com"} +GITHUB_REPOSITORY=${GITHUB_REPOSITORY:-"IQSS/dataverse"} + +MAINTENANCE_WORKSPACE="${GITHUB_WORKSPACE}/maintenance-job" + +DEVELOPMENT_BRANCH="${DEVELOPMENT_BRANCH:-"develop"}" +FORCE_BUILD="${FORCE_BUILD:-"0"}" +PLATFORMS="${PLATFORMS:-"linux/amd64,linux/arm64"}" + +# Setup and validation +if [[ -z "$*" ]]; then + >&2 echo "You must give a list of branch names as arguments" + exit 1; +fi + +source "$( dirname "$0" )/utils.sh" + +# Delete old stuff if present +rm -rf "$MAINTENANCE_WORKSPACE" +mkdir -p "$MAINTENANCE_WORKSPACE" + +# Cache the image tags we maintain in this array (same order as branches array!) +# This list will be used to build the support matrix within the Docker Hub image description +SUPPORTED_ROLLING_TAGS=() + +for BRANCH in "$@"; do + echo "::group::Running maintenance for $BRANCH" + + # 0. Determine if this is a development branch and the most current release + IS_DEV=0 + if [[ "$BRANCH" = "$DEVELOPMENT_BRANCH" ]]; then + IS_DEV=1 + fi + IS_CURRENT_RELEASE=0 + if [[ "$BRANCH" = $( curl -f -sS "https://github.com/repos/$GITHUB_REPOSITORY/releases" | jq '.[0].tag_name' ) ]]; then + IS_CURRENT_RELEASE=1 + fi + + # 1. Let's get the maintained sources + git clone -c advice.detachedHead=false --depth 1 --branch "$BRANCH" "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}" "$MAINTENANCE_WORKSPACE/$BRANCH" + # Switch context + cd "$MAINTENANCE_WORKSPACE/$BRANCH" + + # 2. Now let's apply the patches (we have them checked out in $GITHUB_WORKSPACE, not necessarily in this local checkout) + find "${GITHUB_WORKSPACE}/modules/container-base/src/backports/$BRANCH" -type f -name '*.patch' -print0 | xargs -0 -n1 patch -p1 -s -i + + # 3. Determine the base image ref (/:) + BASE_IMAGE_REF="" + # For the dev branch we want to full flexi stack tag, to detect stack upgrades requiring new build + if (( IS_DEV )); then + BASE_IMAGE_REF=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -q -DforceStdout ) + else + BASE_IMAGE_REF=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout ) + fi + + # 4. Check for Temurin image updates + JAVA_IMAGE_REF=$( mvn help:evaluate -Pct -f modules/container-base -Dexpression=java.image -q -DforceStdout ) + NEWER_JAVA_IMAGE=0 + if check_newer_parent "$JAVA_IMAGE_REF" "$BASE_IMAGE_REF"; then + NEWER_JAVA_IMAGE=1 + fi + + # 5. Check for package updates in base image + PKGS="$( grep "ARG PKGS" modules/container-base/src/main/docker/Dockerfile | cut -f2 -d= | tr -d '"' )" + NEWER_PKGS=0 + # Don't bother with package checks if the java image is newer already + if ! (( NEWER_JAVA_IMAGE )); then + if check_newer_pkgs "$BASE_IMAGE_REF" "$PKGS"; then + NEWER_PKGS=1 + fi + fi + + # 6. Get current immutable revision tag if not on the dev branch + REV=$( current_revision "$BASE_IMAGE_REF" ) + CURRENT_REV_TAG="${BASE_IMAGE_REF#*:}-r$REV" + NEXT_REV_TAG="${BASE_IMAGE_REF#*:}-r$(( REV + 1 ))" + + # 7. Let's put together what tags we want added to this build run + TAG_OPTIONS="" + if ! (( IS_DEV )); then + TAG_OPTIONS="-Dbase.image=$BASE_IMAGE_REF -Ddocker.tags.revision=$NEXT_REV_TAG" + + # In case of the current release, add the "latest" tag as well. Also add to list of rolling tags. + if (( IS_CURRENT_RELEASE )); then + TAG_OPTIONS="$TAG_OPTIONS -Ddocker.tags.latest=latest" + SUPPORTED_ROLLING_TAGS+=("[\"latest\", \"${BASE_IMAGE_REF#*:}\"]") + else + SUPPORTED_ROLLING_TAGS+=("[\"${BASE_IMAGE_REF#*:}\"]") + fi + else + UPCOMING_TAG=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image.tag -Dbase.image.tag.suffix="" -q -DforceStdout ) + TAG_OPTIONS="-Ddocker.tags.develop=unstable -Ddocker.tags.upcoming=$UPCOMING_TAG" + + SUPPORTED_ROLLING_TAGS+=("[\"unstable\", \"$UPCOMING_TAG\", \"${BASE_IMAGE_REF#*:}\"]") + fi + echo "Determined these additional Maven tag options: $TAG_OPTIONS" + + # 7. Let's build the base image if necessary + NEWER_BASE_IMAGE=0 + if (( NEWER_JAVA_IMAGE + NEWER_PKGS + FORCE_BUILD > 0 )); then + mvn -Pct -f modules/container-base deploy -Ddocker.noCache -Ddocker.platforms="${PLATFORMS}" \ + -Ddocker.imagePropertyConfiguration=override $TAG_OPTIONS + NEWER_BASE_IMAGE=1 + fi + + if (( NEWER_BASE_IMAGE )); then + echo "Built a new base image, should continue with application images now..." + # TODO: rebuild the app images here + fi + + echo "::endgroup::" +done diff --git a/.github/workflows/scripts/utils.sh b/.github/workflows/scripts/utils.sh new file mode 100644 index 00000000000..e700f685470 --- /dev/null +++ b/.github/workflows/scripts/utils.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +set -euo pipefail + +function check_newer_parent() { + PARENT_IMAGE="$1" + # Get namespace, default to "library" if not found + PARENT_IMAGE_NS="${PARENT_IMAGE%/*}" + if [[ "$PARENT_IMAGE_NS" = "${PARENT_IMAGE}" ]]; then + PARENT_IMAGE_NS="library" + fi + PARENT_IMAGE_REPO="${PARENT_IMAGE%:*}" + PARENT_IMAGE_TAG="${PARENT_IMAGE#*:}" + + PARENT_IMAGE_LAST_UPDATE="$( curl -sS "https://hub.docker.com/v2/namespaces/${PARENT_IMAGE_NS}/repositories/${PARENT_IMAGE_REPO}/tags/${PARENT_IMAGE_TAG}" | jq -r .last_updated )" + if [[ "$PARENT_IMAGE_LAST_UPDATE" = "null" ]]; then + echo "::error title='Invalid PARENT Image'::Could not find ${PARENT_IMAGE} in the registry" + exit 1 + fi + + DERIVED_IMAGE="$2" + # Get namespace, default to "library" if not found + DERIVED_IMAGE_NS="${DERIVED_IMAGE%/*}" + if [[ "${DERIVED_IMAGE_NS}" = "${DERIVED_IMAGE}" ]]; then + DERIVED_IMAGE_NS="library" + fi + DERIVED_IMAGE_REPO="$( echo "${DERIVED_IMAGE%:*}" | cut -f2 -d/ )" + DERIVED_IMAGE_TAG="${DERIVED_IMAGE#*:}" + + DERIVED_IMAGE_LAST_UPDATE="$( curl -sS "https://hub.docker.com/v2/namespaces/${DERIVED_IMAGE_NS}/repositories/${DERIVED_IMAGE_REPO}/tags/${DERIVED_IMAGE_TAG}" | jq -r .last_updated )" + if [[ "$DERIVED_IMAGE_LAST_UPDATE" = "null" || "$DERIVED_IMAGE_LAST_UPDATE" < "$PARENT_IMAGE_LAST_UPDATE" ]]; then + echo "Parent image $PARENT_IMAGE has a newer release ($PARENT_IMAGE_LAST_UPDATE), which is more recent than $DERIVED_IMAGE ($DERIVED_IMAGE_LAST_UPDATE)" + return 0 + else + echo "Parent image $PARENT_IMAGE ($PARENT_IMAGE_LAST_UPDATE) is older than $DERIVED_IMAGE ($DERIVED_IMAGE_LAST_UPDATE)" + return 1 + fi +} + +function check_newer_pkgs() { + IMAGE="$1" + PKGS="$2" + + docker run --rm -u 0 "${IMAGE}" sh -c "apt update >/dev/null 2>&1 && apt install -s ${PKGS}" | tee /proc/self/fd/2 | grep -q "0 upgraded" + + if [[ $? ]]; then + echo "Base image $IMAGE needs updates for our custom installed packages" + return 0 + else + echo "Base image $IMAGE has no updates for our custom installed packages" + return 1 + fi + + # TODO: In a future version of this script, we might want to include checking for other security updates, + # not just updates to the packages we installed. + # grep security /etc/apt/sources.list > /tmp/security.list + # apt-get update -oDir::Etc::Sourcelist=/tmp/security.list + # apt-get dist-upgrade -y -oDir::Etc::Sourcelist=/tmp/security.list -oDir::Etc::SourceParts=/bin/false -s + +} + +function current_revision() { + IMAGE="$1" + IMAGE_NS_REPO="${IMAGE%:*}" + IMAGE_TAG="${IMAGE#*:}" + + if [[ "$IMAGE_TAG" = "$IMAGE_NS_REPO" ]]; then + >&2 echo "You must provide an image reference in the format [/]:" + exit 1 + fi + + case "$IMAGE_NS_REPO" in + */*) :;; # namespace/repository syntax, leave as is + *) IMAGE_NS_REPO="library/$IMAGE_NS_REPO";; # bare repository name (docker official image); must convert to namespace/repository syntax + esac + + # Without such a token we may run into rate limits + # OB 2024-09-16: for some reason using this token stopped working. Let's go without and see if we really fall into rate limits. + # token=$( curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:$IMAGE_NS_REPO:pull" ) + + ALL_TAGS="$( + i=0 + while [ $? == 0 ]; do + i=$((i+1)) + # OB 2024-09-16: for some reason using this token stopped working. Let's go without and see if we really fall into rate limits. + # RESULT=$( curl -s -H "Authorization: Bearer $token" "https://registry.hub.docker.com/v2/repositories/$IMAGE_NS_REPO/tags/?page=$i&page_size=100" ) + RESULT=$( curl -s "https://registry.hub.docker.com/v2/repositories/$IMAGE_NS_REPO/tags/?page=$i&page_size=100" ) + if [[ $( echo "$RESULT" | jq '.message' ) != "null" ]]; then + # If we run into an error on the first attempt, that means we have a problem. + if [[ "$i" == "1" ]]; then + >&2 echo "Error when retrieving tag data: $( echo "$RESULT" | jq '.message' )" + exit 2 + # Otherwise it will just mean we reached the last page already + else + break + fi + else + echo "$RESULT" | jq -r '."results"[]["name"]' + # DEBUG: + #echo "$RESULT" | >&2 jq -r '."results"[]["name"]' + fi + done + )" + + # Note: if a former tag could not be found, it just might not exist already. Start new series with rev 0 + echo "$ALL_TAGS" | grep "${IMAGE_TAG}-r" | sed -e "s#${IMAGE_TAG}-r##" | sort -h | tail -n1 || echo "-1" +}