diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml new file mode 100644 index 0000000..a95b72c --- /dev/null +++ b/.github/workflows/e2e.yaml @@ -0,0 +1,47 @@ +name: e2e + +on: + workflow_dispatch: + push: + branches: [ '*' ] + tags-ignore: [ '*' ] + +jobs: + kubernetes: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Flux + uses: fluxcd/flux2/action@main + - name: Setup Kubernetes + uses: helm/kind-action@v1.8.0 + with: + cluster_name: flux + version: v0.20.0 + # The versions below should target the newest Kubernetes version + # Keep this up-to-date with https://endoflife.date/kubernetes + node_image: kindest/node:v1.28.0@sha256:9f3ff58f19dcf1a0611d11e8ac989fdb30a28f40f236f59f0bea31fb956ccf5c + kubectl_version: v1.28.0 + - name: Install Flux in Kubernetes Kind + run: flux install + - name: Setup cluster reconciliation + run: | + flux create source git flux-system \ + --url=${{ github.event.repository.html_url }} \ + --branch=${GITHUB_REF#refs/heads/} \ + --ignore-paths="clusters/**/flux-system/" + flux create kustomization flux-system \ + --source=flux-system \ + --path=./clusters/test + - name: Verify cluster reconciliation + run: | + kubectl -n flux-system wait kustomization/monitoring-controllers --for=condition=ready --timeout=10m + kubectl -n flux-system wait kustomization/monitoring-configs --for=condition=ready --timeout=1m + - name: Debug failure + if: failure() + run: | + kubectl -n flux-system logs deploy/source-controller + kubectl -n flux-system logs deploy/kustomize-controller + kubectl -n flux-system logs deploy/helm-controller + flux get all --all-namespaces diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..8432c15 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,23 @@ +name: test + +on: + workflow_dispatch: + pull_request: + push: + branches: [ '*' ] + tags-ignore: [ '*' ] + +jobs: + manifests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup yq + uses: fluxcd/pkg/actions/yq@main + - name: Setup kubeconform + uses: fluxcd/pkg/actions/kubeconform@main + - name: Setup kustomize + uses: fluxcd/pkg/actions/kustomize@main + - name: Validate manifests + run: ./scripts/validate.sh diff --git a/README.md b/README.md index 710e96a..055e16c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,97 @@ # flux2-monitoring-example -Prometheus monitoring for the Flux control plane + +This repository is an example of how to make use of +[kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) +and +[loki-stack](https://github.com/grafana/helm-charts/tree/main/charts/loki-stack) +to monitor Flux. + +Components: + +* **kube-state-metrics** - generates metrics about the state of the Flux objects +* **Prometheus Operator** - manages Prometheus clusters atop Kubernetes +* **Prometheus** - collects and stores metrics from the Flux controllers and kube-state-metrics +* **Promtail** - collects the logs from the Flux controllers +* **Loki** - stores the logs collected by Promtail +* **Grafana** dashboards - displays the Flux control plane resource usage, reconciliation stats and logs + +## Quickstart + +### Create a Kubernetes cluster + +For a quick local test, you can use [Kubernetes kind](https://kind.sigs.k8s.io/docs/user/quick-start/). +Any other Kubernetes setup will work as well though. + +Create a cluster called `test` with the kind CLI: + +```shell +kind create cluster --name test +``` + +### Fork the GitHub repository + +In order to follow this guide you'll need a GitHub account and a +[personal access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) +that can create repositories (check all permissions under `repo`). + +Add the GitHub PAT and username to your shell environment: + +```sh +export GITHUB_TOKEN= +export GITHUB_USER= +``` + +Fork this repository on your personal account and clone it locally: + +```shell +git clone https://github.com/${GITHUB_USER}/flux2-monitoring-example.git +cd flux2-monitoring-example +``` + +### Bootstrap Flux + +Install the Flux controllers on the test cluster: + +```shell +flux bootstrap github \ + --owner=${GITHUB_USER} \ + --repository=flux2-monitoring-example \ + --branch=main \ + --personal \ + --path=clusters/test +``` + +Wait for Flux to deploy the monitoring stack with: + +```shell +flux get kustomizations --watch +``` + +After Flux has finished reconciling, you can list the pods in the monitoring namespace with: + +```console +$ kubectl -n monitoring get po +NAME READY +kube-prometheus-stack-grafana-5c976ff4cf-xgmwm 3/3 +kube-prometheus-stack-kube-state-metrics-5dcf4c4697-jvlvh 1/1 +kube-prometheus-stack-operator-75f9fdcbf6-98zmh 1/1 +kube-prometheus-stack-prometheus-node-exporter-j4vhb 1/1 +loki-stack-0 1/1 +loki-stack-promtail-dcg64 1/1 +prometheus-kube-prometheus-stack-prometheus-0 2/2 +``` + +### Accessing Grafana + +To access Grafana, start port forward in a separate shell: + +```shell +kubectl -n monitoring port-forward svc/kube-prometheus-stack-grafana 3000:80 +``` + +Navigate to `http://localhost:3000` in your browser and login with user `admin` and password `flux`. + +Flux dashboards: +- [Reconciliation stats](http://localhost:3000/d/flux-cluster/flux-cluster-stats) +- [Control plane stats](http://localhost:3000/d/flux-control-plane/flux-control-plane) +- [Control plane logs](http://localhost:3000/d/flux-logs/flux-logs) diff --git a/clusters/test/flux-system/gotk-components.yaml b/clusters/test/flux-system/gotk-components.yaml new file mode 100644 index 0000000..cb41b33 --- /dev/null +++ b/clusters/test/flux-system/gotk-components.yaml @@ -0,0 +1 @@ +# This file will be generated automatically by flux boostrap. diff --git a/clusters/test/flux-system/gotk-sync.yaml b/clusters/test/flux-system/gotk-sync.yaml new file mode 100644 index 0000000..cb41b33 --- /dev/null +++ b/clusters/test/flux-system/gotk-sync.yaml @@ -0,0 +1 @@ +# This file will be generated automatically by flux boostrap. diff --git a/clusters/test/flux-system/kustomization.yaml b/clusters/test/flux-system/kustomization.yaml new file mode 100644 index 0000000..a3a32be --- /dev/null +++ b/clusters/test/flux-system/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - gotk-components.yaml + - gotk-sync.yaml +labels: + - pairs: + toolkit.fluxcd.io/tenant: sre-team +patches: + - patch: | + - op: add + path: /spec/template/spec/containers/0/args/- + value: --concurrent=20 + - op: add + path: /spec/template/spec/containers/0/args/- + value: --requeue-dependency=5s + target: + kind: Deployment + name: "(kustomize-controller|helm-controller|source-controller)" diff --git a/clusters/test/monitoring.yaml b/clusters/test/monitoring.yaml new file mode 100644 index 0000000..9a3d73e --- /dev/null +++ b/clusters/test/monitoring.yaml @@ -0,0 +1,34 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: monitoring-controllers + namespace: flux-system +spec: + interval: 1h + retryInterval: 2m + timeout: 10m + prune: true + wait: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./monitoring/controllers +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: monitoring-configs + namespace: flux-system +spec: + dependsOn: + - name: monitoring-controllers + interval: 1h + retryInterval: 2m + timeout: 5m + prune: true + wait: true + sourceRef: + kind: GitRepository + name: flux-system + path: ./monitoring/configs diff --git a/monitoring-config/dashboards/cluster.json b/monitoring/configs/dashboards/cluster.json similarity index 100% rename from monitoring-config/dashboards/cluster.json rename to monitoring/configs/dashboards/cluster.json diff --git a/monitoring-config/dashboards/control-plane.json b/monitoring/configs/dashboards/control-plane.json similarity index 100% rename from monitoring-config/dashboards/control-plane.json rename to monitoring/configs/dashboards/control-plane.json diff --git a/monitoring-config/dashboards/logs.json b/monitoring/configs/dashboards/logs.json similarity index 100% rename from monitoring-config/dashboards/logs.json rename to monitoring/configs/dashboards/logs.json diff --git a/monitoring-config/kustomization.yaml b/monitoring/configs/kustomization.yaml similarity index 100% rename from monitoring-config/kustomization.yaml rename to monitoring/configs/kustomization.yaml diff --git a/monitoring-config/podmonitor.yaml b/monitoring/configs/podmonitor.yaml similarity index 100% rename from monitoring-config/podmonitor.yaml rename to monitoring/configs/podmonitor.yaml diff --git a/kube-prometheus-stack/kustomization.yaml b/monitoring/controllers/kube-prometheus-stack/kustomization.yaml similarity index 100% rename from kube-prometheus-stack/kustomization.yaml rename to monitoring/controllers/kube-prometheus-stack/kustomization.yaml diff --git a/kube-prometheus-stack/namespace.yaml b/monitoring/controllers/kube-prometheus-stack/namespace.yaml similarity index 100% rename from kube-prometheus-stack/namespace.yaml rename to monitoring/controllers/kube-prometheus-stack/namespace.yaml diff --git a/kube-prometheus-stack/release.yaml b/monitoring/controllers/kube-prometheus-stack/release.yaml similarity index 98% rename from kube-prometheus-stack/release.yaml rename to monitoring/controllers/kube-prometheus-stack/release.yaml index 6c882c7..fed602e 100644 --- a/kube-prometheus-stack/release.yaml +++ b/monitoring/controllers/kube-prometheus-stack/release.yaml @@ -3,7 +3,7 @@ kind: HelmRelease metadata: name: kube-prometheus-stack spec: - interval: 5m + interval: 1h chart: spec: version: "48.x" @@ -11,7 +11,7 @@ spec: sourceRef: kind: HelmRepository name: prometheus-community - interval: 60m + interval: 1h install: crds: Create upgrade: @@ -33,6 +33,7 @@ spec: app.kubernetes.io/component: monitoring grafana: defaultDashboardsEnabled: false + adminPassword: flux kube-state-metrics: collectors: [] extraArgs: diff --git a/kube-prometheus-stack/repository.yaml b/monitoring/controllers/kube-prometheus-stack/repository.yaml similarity index 54% rename from kube-prometheus-stack/repository.yaml rename to monitoring/controllers/kube-prometheus-stack/repository.yaml index 552a341..f14b884 100644 --- a/kube-prometheus-stack/repository.yaml +++ b/monitoring/controllers/kube-prometheus-stack/repository.yaml @@ -3,6 +3,6 @@ kind: HelmRepository metadata: name: prometheus-community spec: - interval: 120m - type: default - url: https://prometheus-community.github.io/helm-charts + interval: 12h + type: oci + url: oci://ghcr.io/prometheus-community/charts diff --git a/loki-stack/kustomization.yaml b/monitoring/controllers/loki-stack/kustomization.yaml similarity index 100% rename from loki-stack/kustomization.yaml rename to monitoring/controllers/loki-stack/kustomization.yaml diff --git a/loki-stack/release.yaml b/monitoring/controllers/loki-stack/release.yaml similarity index 100% rename from loki-stack/release.yaml rename to monitoring/controllers/loki-stack/release.yaml diff --git a/loki-stack/repository.yaml b/monitoring/controllers/loki-stack/repository.yaml similarity index 100% rename from loki-stack/repository.yaml rename to monitoring/controllers/loki-stack/repository.yaml diff --git a/scripts/validate.sh b/scripts/validate.sh new file mode 100755 index 0000000..7e21685 --- /dev/null +++ b/scripts/validate.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# This script downloads the Flux OpenAPI schemas, then it validates the +# Flux custom resources and the kustomize overlays using kubeconform. +# This script is meant to be run locally and in CI before the changes +# are merged on the main branch that's synced by Flux. + +# Copyright 2023 The Flux authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Prerequisites +# - yq v4.34 +# - kustomize v5.0 +# - kubeconform v0.6 + +set -o errexit +set -o pipefail + +# mirror kustomize-controller build options +kustomize_flags=("--load-restrictor=LoadRestrictionsNone") +kustomize_config="kustomization.yaml" + +# skip Kubernetes Secrets due to SOPS fields failing validation +kubeconform_flags=("-skip=Secret") +kubeconform_config=("-strict" "-ignore-missing-schemas" "-schema-location" "default" "-schema-location" "/tmp/flux-crd-schemas" "-verbose") + +echo "INFO - Downloading Flux OpenAPI schemas" +mkdir -p /tmp/flux-crd-schemas/master-standalone-strict +curl -sL https://github.com/fluxcd/flux2/releases/latest/download/crd-schemas.tar.gz | tar zxf - -C /tmp/flux-crd-schemas/master-standalone-strict + +find . -type f -name '*.yaml' -print0 | while IFS= read -r -d $'\0' file; + do + echo "INFO - Validating $file" + yq e 'true' "$file" > /dev/null +done + +echo "INFO - Validating clusters" +find ./clusters -maxdepth 2 -type f -name '*.yaml' -print0 | while IFS= read -r -d $'\0' file; + do + kubeconform "${kubeconform_flags[@]}" "${kubeconform_config[@]}" "${file}" + if [[ ${PIPESTATUS[0]} != 0 ]]; then + exit 1 + fi +done + +echo "INFO - Validating kustomize overlays" +find . -type f -name $kustomize_config -print0 | while IFS= read -r -d $'\0' file; + do + echo "INFO - Validating kustomization ${file/%$kustomize_config}" + kustomize build "${file/%$kustomize_config}" "${kustomize_flags[@]}" | \ + kubeconform "${kubeconform_flags[@]}" "${kubeconform_config[@]}" + if [[ ${PIPESTATUS[0]} != 0 ]]; then + exit 1 + fi +done