Update documentation (#81)

* Automate CRD reference doc generation * Update README.md
mercari · Apr 20, 2022 · 4681bf4 · 4681bf4
1 parent 0edfb78
commit 4681bf4
Show file tree

Hide file tree

Showing 9 changed files with 449 additions and 119 deletions.
diff --git a/Makefile b/Makefile
@@ -38,6 +38,9 @@ help: ## Display this help.
 clean: ## Remove any locally built or downloaded files
 	rm -rf $(shell pwd)/bin
 
+docs: crd-ref-docs ## Generate CRD reference docs
+	$(CRD_REF_DOCS) --config docs/config/config.yaml --renderer markdown --output-path docs/crd-reference.md --source-path api/v1beta1
+
 ##@ Development
 
 manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
@@ -111,7 +114,7 @@ undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/confi
 
 ##@ Dependencies
 
-deps: controller-gen kustomize kind kpt golangci-lint ## Download the following dependencies locally (in './bin') if necessary
+deps: controller-gen kustomize kind kpt golangci-lint crd-ref-docs ## Download the following dependencies locally (in './bin') if necessary
 
 CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
 controller-gen: ## Download controller-gen locally if necessary.
@@ -138,6 +141,10 @@ GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint
 golangci-lint: ## Downlaod 'golangci-lint' locally if necessary
 	$(call go-get-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint@v1.44.0)
 
+CRD_REF_DOCS = $(shell pwd)/bin/crd-ref-docs
+crd-ref-docs: ## Downlaod 'crd-ref-docs' locally if necessary
+	$(call go-get-tool,$(CRD_REF_DOCS),github.com/elastic/crd-ref-docs@v0.0.8)
+
 # go-get-tool will 'go get' any package $2 and install it to $1.
 PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
 define go-get-tool

diff --git a/README.md b/README.md
@@ -9,21 +9,37 @@ Spanner Autoscaler is a [Kubernetes Operator](https://coreos.com/operators/) to
 ## Overview
 
 [Cloud Spanner](https://cloud.google.com/spanner) is scalable.
-When CPU utilization gets high, we can [reduce CPU utilization by increasing compute capacity](https://cloud.google.com/spanner/docs/cpu-utilization?hl=en#add-compute-capacity).
+When CPU utilization becomes high, we can [reduce it by increasing compute capacity](https://cloud.google.com/spanner/docs/cpu-utilization?hl=en#add-compute-capacity).
 
-Spanner Autoscaler is created to reconcile Cloud Spanner compute capacity like [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) by configuring `minNodes`, `maxNodes`, and `targetCPUUtilization`.
+Spanner Autoscaler is created to reconcile Cloud Spanner compute capacity like [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) by configuring a compute capacity range and `targetCPUUtilization`.
 
 <img src="./docs/assets/overview.jpg" width="450" height="300">
 
-When CPU Utilization(High Priority) is above `targetCPUUtilization`, Spanner Autoscaler calculates desired compute capacity and increases compute capacity.
+When CPU Utilization(High Priority) is above (or below) `targetCPUUtilization`, Spanner Autoscaler tries to bring it back to the threshold by calculating desired compute capacity and then increasing (or decreasing) compute capacity.
 
 <img src="./docs/assets/cpu_utilization.png" width="400" height="200"> <img src="./docs/assets/node_scaleup.png" width="400" height="200">
 
 The [pricing of Cloud Spanner](https://cloud.google.com/spanner/pricing) states that any compute capacity which is provisioned will be billed for a minimum of one hour, so Spanner Autoscaler maintains the increased compute capacity for about an hour. Spanner Autoscaler has `--scale-down-interval` flag (default: 55min) for achieving this.
 
-While scaling down, if Spanner Autoscaler reduces a lot of compute capacity at once like 10000 PU -> 1000 PU, it will cause a latency increase. Spanner Autoscaler decreases the compute capacity in steps to avoid such large disruptions. This step size can be provided with the `maxScaleDownNodes` parameter (default: 2).
+While scaling down, removing large amounts of compute capacity at once (like 10000 PU -> 1000 PU) can cause a latency increase. Therefore, Spanner Autoscaler decreases the compute capacity in steps to avoid such large disruptions. This step size can be provided with the `scaledownStepSize` parameter (default: 2000 PU).
 <img src="./docs/assets/node_scaledown.png" width="400" height="200">
 
+### Scheduled scaling feature
+
+If there are some batch jobs or any other compute intensive tasks which are run periodically on the Cloud Spanner, it is now possible to bump up the scaling range only for a specified duration. For example, the following `SpannerAutoscaleSchedule` will add an extra compute capacity of 600 Processing Units to the spanner instance every day at 2 o'clock, just for 3 hours:
+```yaml
+apiVersion: spanner.mercari.com/v1beta1
+kind: SpannerAutoscaleSchedule
+metadata:
+  name: spannerautoscaleschedule-sample
+  namespace: your-namespace
+spec:
+  targetResource: spannerautoscaler-sample
+  additionalProcessingUnits: 600
+  schedule:
+    cron: "0 2 * * *"
+    duration: 3h
+```
 
 ## Installation
 
@@ -54,98 +70,87 @@ Spanner Autoscaler can be installed using [KPT](https://kpt.dev/installation/) b
    ```console
    $ kubectl apply -f spanner-autoscaler/samples
    ```
-   Examples of CRDs can be found [below](#examples).\
+   Examples of CustomResources can be found [below](#examples).\
    For authentication using a GCP service account JSON key, follow [these steps](#gcp-setup) to create a k8s secret with credentials.
 
 
-## `SpannerAutoscaler` CRD reference
+## CRD reference
 
-Following is a reference of the parameters which can be provided in the `spec` section of the `SpannerAutoscaler` CRD:
-
-Parameter | Type | Required | Description
---- | --- | --- | ---
-`scaleTargetRef` | object | yes | Spanner Instance which will be auto scaled
-`scaleTargetRef.projectId` | string | yes | GCP Project ID
-`scaleTargetRef.instanceId` | string | yes | Cloud Spanner Instance ID
-`serviceAccountSecretRef` | object | no | Secret created [here](#authenticate-with-service-account-json-key)
-`serviceAccountSecretRef.name` | string | yes | Name of the k8s secret
-`serviceAccountSecretRef.namespace` | string | yes | Namespace of the k8s secret
-`serviceAccountSecretRef.key` | string | yes | Name of the key in the secret which holds the authentication information
-`impersonateConfig` | object | no | Impersonation config
-`impersonateConfog.targetServiceAccount` | string | yes | Email address of the service account to impersonate ([`GSA_SPANNER`](#using-service-accounts-with-workload-identity-and-impersonation))
-`impersonateConfog.delegates` | list of string | yes | List of target service account emails in a delegation chain ([Ref](https://pkg.go.dev/google.golang.org/api/impersonate#CredentialsConfig))
-`minProcessingUnits` | integer | no | Minimum [processing units](https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity)
-`maxProcessingUnits` | integer | no | Maximum [processing units](https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity)
-`minNodes` | integer | no | Equals [`minProcessingUnits / 1000`](https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity)
-`maxNodes` | integer | no | Equals [`maxProcessingUnits / 1000`](https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity)
-`maxScaleDownNodes` | integer | no | Maximum number of nodes to remove in one scale-down cycle
-`targetCPUUtilization` | object | yes | Spanner [CPU utilization metrics](https://cloud.google.com/spanner/docs/cpu-utilization)
-`targetCPUUtilization.highPriority` | integer | yes | High Priority CPU Utilization value
+- [`SpannerAutoscaler` CRD reference](docs/crd-reference.md#spannerautoscaler)
+- [`SpannerAutoscaleSchedule` CRD reference](docs/crd-reference.md#spannerautoscaleschedule)
 
 
 ## Examples
 
 #### Single Service Account using Workload Identity:
 
 ```yaml
-apiVersion: spanner.mercari.com/v1alpha1
+apiVersion: spanner.mercari.com/v1beta1
 kind: SpannerAutoscaler
 metadata:
   name: spannerautoscaler-sample
   namespace: your-namespace
 spec:
-  scaleTargetRef:
+  targetInstance:
     projectId: your-gcp-project-id
     instanceId: your-spanner-instance-id
-  minNodes: 1
-  maxNodes: 4
-  maxScaleDownNodes: 1
-  targetCPUUtilization:
-    highPriority: 60
+  scaleConfig:
+    processingUnits:
+      min: 1000
+      max: 4000
+    scaledownStepSize: 1000
+    targetCPUUtilization:
+      highPriority: 60
 ```
 
 #### Using Service Account JSON key for each `SpannerAutoscaler`:
 
 ```diff
-  apiVersion: spanner.mercari.com/v1alpha1
+  apiVersion: spanner.mercari.com/v1beta1
   kind: SpannerAutoscaler
   metadata:
     name: spannerautoscaler-sample
     namespace: your-namespace
   spec:
-    scaleTargetRef:
+    targetInstance:
       projectId: your-gcp-project-id
       instanceId: your-spanner-instance-id
-+   serviceAccountSecretRef:
-+     namespace: your-namespace
-+     name: spanner-autoscaler-gcp-sa
-+     key: service-account
-    minNodes: 1
-    maxNodes: 4
-    maxScaleDownNodes: 1
-    targetCPUUtilization:
-      highPriority: 60
++   authentication:
++     iamKeySecret:
++       namespace: your-namespace
++       name: spanner-autoscaler-gcp-sa
++       key: service-account
+    scaleConfig:
+      processingUnits:
+        min: 1000
+        max: 4000
+      scaledownStepSize: 1000
+      targetCPUUtilization:
+        highPriority: 60
 ```
 
 #### Using Service Accounts with Workload Identity and impersonation:
 
 ```diff
-  apiVersion: spanner.mercari.com/v1alpha1
+  apiVersion: spanner.mercari.com/v1beta1
   kind: SpannerAutoscaler
   metadata:
     name: spannerautoscaler-sample
     namespace: your-namespace
   spec:
-    scaleTargetRef:
+    targetInstance:
       projectId: your-gcp-project-id
       instanceId: your-spanner-instance-id
-+   impersonateConfig:
-+     targetServiceAccount: GSA_SPANNER@TENANT_PROJECT.iam.gserviceaccount.com
-    minNodes: 1
-    maxNodes: 4
-    maxScaleDownNodes: 1
-    targetCPUUtilization:
-      highPriority: 60
++   authentication:
++     impersonateConfig:
++       targetServiceAccount: GSA_SPANNER@TENANT_PROJECT.iam.gserviceaccount.com
+    scaleConfig:
+      processingUnits:
+        min: 1000
+        max: 4000
+      scaledownStepSize: 1000
+      targetCPUUtilization:
+        highPriority: 60
 ```
 
 
@@ -250,34 +255,15 @@ Following are some other advanced methods which can also be used for GCP authent
 </details>
 
 
-## Development
-
-Run `make help` for a list of useful targets. The installation basically has 3 steps:
-
-```
-## 1. Installation of CRD
-$ make install
-
-## 2. Deployment of the operator
-$ make deploy
-
-## 3. Creation of a CRD
-$ kubectl apply -f config/samples
-```
+## Development and Contribution
 
-Test the operator with `make test`
+See [docs/development.md](docs/development.md) and [CONTRIBUTING.md](.github/CONTRIBUTING.md) respectively.
 
-> :warning: **Migration from `v0.1.5`:** Names of some resources (`Deployment`, `serviceAccount`,`Role` etc) have changed since version `0.1.5`. Thus, you must first uninstall the old version before installing the new version. To uninstall the old version:
-> ```console
-> $ git checkout v0.1.5
-> $ kustomize build config/default | kubectl delete -f -
-> ```
-> Specifically, the kubernetes service account used for running the spanner-autoscaler has changed from `default` to `spanner-autoscaler-controller-manager`. Please keep this in mind. It is recommended to follow the below configuration steps and re-create any resources if needed.
+### :information_source: Migration from `0.3.0` to `0.4.0`:
 
+The older version `0.3.0` (with `apiVersion: spanner.mercari.com/v1alpha1`) is now deprecated in favor of `0.4.0` (with `apiVersion: spanner.mercari.com/v1beta1`).
 
-## Contribution
-
-See [CONTRIBUTING.md](.github/CONTRIBUTING.md).
+Version `0.4.0` is backward compatible with `0.3.0`, but there is a restructuring of the `SpannerAutoscaler` resource definition and names of many fields have changed. Thus it is recommended to go through the [`SpannerAutoscaler` CRD reference](docs/crd-reference.md#spannerautoscaler) and replace `v1alpha1` resources with `v1beta1` spec definition.
 
 ## License
 
@@ -290,6 +276,8 @@ Spanner Autoscaler is released under the [Apache License 2.0](./LICENSE).
 1. It doesn't check [the storage size and the number of databases](https://cloud.google.com/spanner/quotas?hl=en#database_limits) as well. You must take care of these metrics by yourself.
 
 
+:information_source: More information and background of spanner-autoscaler is available on [this blog](https://engineering.mercari.com/en/blog/entry/20211222-kubernetes-based-spanner-autoscaler)!
+
 <!-- badge links -->
 
 [actions-workflow-test]: https://github.com/mercari/spanner-autoscaler/actions?query=workflow%3ATest

diff --git a/api/v1beta1/spannerautoscaler_types.go b/api/v1beta1/spannerautoscaler_types.go
@@ -85,35 +85,43 @@ type ScaleConfig struct {
 	// This is only used at the time of CustomResource creation. If compute capacity is provided in `nodes`, then it is automatically converted to `processing-units` at the time of resource creation, and internally, only `ProcessingUnits` are used for computations and scaling.
 	ComputeType ComputeType `json:"computeType,omitempty"`
 
-	// If `nodes` are provided at the time of resource creation, then they are automatically converted to `processing-units`. So it is recommended to use only the processing units.
+	// If `nodes` are provided at the time of resource creation, then they are automatically converted to `processing-units`. So it is recommended to use only the processing units. Ref: [Spanner Compute Capacity](https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity)
 	Nodes ScaleConfigNodes `json:"nodes,omitempty"`
 
-	// ProcessingUnits for scaling of the Spanner instance: https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity
+	// ProcessingUnits for scaling of the Spanner instance. Ref: [Spanner Compute Capacity](https://cloud.google.com/spanner/docs/compute-capacity#compute_capacity)
 	ProcessingUnits ScaleConfigPUs `json:"processingUnits,omitempty"`
 
 	// The maximum number of processing units which can be deleted in one scale-down operation
 	// +kubebuilder:default=2000
 	// +kubebuilder:validation:MultipleOf=1000
 	ScaledownStepSize int `json:"scaledownStepSize,omitempty"`
 
-	// The CPU utilization which the autoscaling will try to achieve
+	// The CPU utilization which the autoscaling will try to achieve. Ref: [Spanner CPU utilization](https://cloud.google.com/spanner/docs/cpu-utilization#task-priority)
 	TargetCPUUtilization TargetCPUUtilization `json:"targetCPUUtilization"`
 }
 
+// Compute capacity in terms of Nodes
 type ScaleConfigNodes struct {
+	// Minimum number of Nodes for the autoscaling range
 	Min int `json:"min,omitempty"`
+
+	// Maximum number of Nodes for the autoscaling range
 	Max int `json:"max,omitempty"`
 }
 
+// Compute capacity in terms of Processing Units
 type ScaleConfigPUs struct {
+	// Minimum number of Processing Units for the autoscaling range
 	// +kubebuilder:validation:MultipleOf=100
 	Min int `json:"min"`
 
+	// Maximum number of Processing Units for the autoscaling range
 	// +kubebuilder:validation:MultipleOf=100
 	Max int `json:"max"`
 }
 
 type TargetCPUUtilization struct {
+	// Desired CPU utilization for 'High Priority' CPU consumption category. Ref: [Spanner CPU utilization](https://cloud.google.com/spanner/docs/cpu-utilization#task-priority)
 	// +kubebuilder:validation:Minimum=0
 	// +kubebuilder:validation:Maximum=100
 	// +kubebuilder:validation:ExclusiveMinimum=true
@@ -123,9 +131,14 @@ type TargetCPUUtilization struct {
 
 // SpannerAutoscalerSpec defines the desired state of SpannerAutoscaler
 type SpannerAutoscalerSpec struct {
+	// The Spanner instance which will be managed for autoscaling
 	TargetInstance TargetInstance `json:"targetInstance"`
-	Authentication Authentication `json:"authentication"`
-	ScaleConfig    ScaleConfig    `json:"scaleConfig"`
+
+	// Authentication details for the Spanner instance
+	Authentication Authentication `json:"authentication,omitempty"`
+
+	// Details of the autoscaling parameters for the Spanner instance
+	ScaleConfig ScaleConfig `json:"scaleConfig"`
 }
 
 type InstanceState string
@@ -143,22 +156,31 @@ const (
 	InstanceStateReady InstanceState = "ready"
 )
 
+// A `SpannerAutoscaleSchedule` which is currently active and will be used for calculating the autoscaling range.
 type ActiveSchedule struct {
-	ScheduleName string      `json:"name"`
-	EndTime      metav1.Time `json:"endTime"`
-	AdditionalPU int         `json:"additionalPU"`
+	// Name of the `SpannerAutoscaleSchedule`
+	ScheduleName string `json:"name"`
+
+	// The time until when this schedule will remain active
+	EndTime metav1.Time `json:"endTime"`
+
+	// The extra compute capacity which will be added because of this schedule
+	AdditionalPU int `json:"additionalPU"`
 }
 
 // SpannerAutoscalerStatus defines the observed state of SpannerAutoscaler
 type SpannerAutoscalerStatus struct {
-	Schedules                []string         `json:"schedules,omitempty"`
+	// List of schedules which are registered with this spanner-autoscaler instance
+	Schedules []string `json:"schedules,omitempty"`
+
+	// List of all the schedules which are currently active and will be used in calculating compute capacity
 	CurrentlyActiveSchedules []ActiveSchedule `json:"currentlyActiveSchedules,omitempty"`
 
-	// Last time the SpannerAutoscaler scaled the number of Spanner nodes
+	// Last time the `SpannerAutoscaler` scaled the number of Spanner nodes.
 	// Used by the autoscaler to control how often the number of nodes are changed
 	LastScaleTime metav1.Time `json:"lastScaleTime,omitempty"`
 
-	// Last time the SpannerAutoscaler fetched and synced this status
+	// Last time the `SpannerAutoscaler` fetched and synced the metrics from Spanner
 	LastSyncTime metav1.Time `json:"lastSyncTime,omitempty"`
 
 	// Current number of processing-units in the Spanner instance
@@ -173,6 +195,7 @@ type SpannerAutoscalerStatus struct {
 	// Maximum number of processing units based on the currently active schedules
 	DesiredMaxPUs int `json:"desiredMaxPUs,omitempty"`
 
+	// State of the Cloud Spanner instance
 	InstanceState InstanceState `json:"instanceState,omitempty"`
 
 	// Current average CPU utilization for high priority task, represented as a percentage