Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enhanced in-place update module to support vertical scaling #1353

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/e2e-1.18.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Install-CSI
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-1.24.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf.yaml
version: ${{ env.KIND_VERSION }}
- name: Install-CSI
run: |
Expand Down
101 changes: 92 additions & 9 deletions .github/workflows/e2e-1.28.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Install-CSI
run: |
Expand Down Expand Up @@ -117,7 +117,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -196,7 +196,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -288,7 +288,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -380,7 +380,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -472,7 +472,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -542,7 +542,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -592,6 +592,89 @@ jobs:
done < <(kubectl get pods -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $1}')
fi
exit $retVal
clonesetAndInplace:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
uses: helm/kind-action@v1.10.0
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Install-CSI
run: |
make install-csi

- name: Build image
run: |
export IMAGE="openkruise/kruise-manager:e2e-${GITHUB_RUN_ID}"
docker build --pull --no-cache . -t $IMAGE
kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; }
- name: Install Kruise
run: |
set -ex
kubectl cluster-info
IMG=openkruise/kruise-manager:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh
NODES=$(kubectl get node | wc -l)
for ((i=1;i<10;i++));
do
set +e
PODS=$(kubectl get pod -n kruise-system | grep '1/1' | wc -l)
set -e
if [ "$PODS" -eq "$NODES" ]; then
break
fi
sleep 3
done
set +e
PODS=$(kubectl get pod -n kruise-system | grep '1/1' | wc -l)
kubectl get node -o yaml
kubectl get all -n kruise-system -o yaml
kubectl get pod -n kruise-system --no-headers | grep daemon | awk '{print $1}' | xargs kubectl logs -n kruise-system
kubectl get pod -n kruise-system --no-headers | grep daemon | awk '{print $1}' | xargs kubectl logs -n kruise-system --previous=true
set -e
if [ "$PODS" -eq "$NODES" ]; then
echo "Wait for kruise-manager and kruise-daemon ready successfully"
else
echo "Timeout to wait for kruise-manager and kruise-daemon ready"
exit 1
fi
- name: Run E2E Tests
run: |
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
./bin/ginkgo -p -timeout 120m -v --focus='\[apps\] (InplaceVPA)' test/e2e
retVal=$?
restartCount=$(kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-manager has not restarted"
else
kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers
echo "Kruise-manager has restarted, abort!!!"
kubectl get pod -n kruise-system --no-headers -l control-plane=controller-manager | awk '{print $1}' | xargs kubectl logs -p -n kruise-system
exit 1
fi
if [ "$retVal" -ne 0 ];then
echo "test fail, dump kruise-manager logs"
while read pod; do
kubectl logs -n kruise-system $pod
done < <(kubectl get pods -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $1}')
echo "test fail, dump kruise-daemon logs"
while read pod; do
kubectl logs -n kruise-system $pod
done < <(kubectl get pods -n kruise-system -l control-plane=daemon --no-headers | awk '{print $1}')
fi
exit $retVal

other:
runs-on: ubuntu-20.04
steps:
Expand All @@ -607,7 +690,7 @@ jobs:
with:
node_image: ${{ env.KIND_IMAGE }}
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
config: ./test/kind-conf-none-fg.yaml
config: ./test/kind-conf-with-vpa.yaml
version: ${{ env.KIND_VERSION }}
- name: Build image
run: |
Expand Down Expand Up @@ -648,7 +731,7 @@ jobs:
export KUBECONFIG=/home/runner/.kube/config
make ginkgo
set +e
./bin/ginkgo -timeout 90m -v --skip='\[apps\] (AppStatefulSetStorage|StatefulSet|PullImage|PullImages|ContainerRecreateRequest|DaemonSet|SidecarSet|EphemeralJob)' --skip='\[policy\] PodUnavailableBudget' test/e2e
./bin/ginkgo -timeout 90m -v --skip='\[apps\] (InplaceVPA|AppStatefulSetStorage|StatefulSet|PullImage|PullImages|ContainerRecreateRequest|DaemonSet|SidecarSet|EphemeralJob)' --skip='\[policy\] PodUnavailableBudget' test/e2e
retVal=$?
restartCount=$(kubectl get pod -n kruise-system -l control-plane=controller-manager --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,15 @@ endif
create-cluster: $(tools/kind)
tools/hack/create-cluster.sh

DISABLE_CSI ?= false
ABNER-1 marked this conversation as resolved.
Show resolved Hide resolved

.PHONY: install-csi
install-csi:
ifeq ($(DISABLE_CSI), true)
@echo "CSI is disabled, skip"
else
cd tools/hack/csi-driver-host-path; ./install-snapshot.sh
endif

# delete-cluster deletes a kube cluster.
.PHONY: delete-cluster
Expand Down
16 changes: 15 additions & 1 deletion apis/apps/pub/inplace_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,21 @@ type InPlaceUpdateState struct {
// UpdateEnvFromMetadata indicates there are envs from annotations/labels that should be in-place update.
UpdateEnvFromMetadata bool `json:"updateEnvFromMetadata,omitempty"`

// UpdateResources indicates there are resources that should be in-place update.
UpdateResources bool `json:"updateResources,omitempty"`

// VerticalUpdateOnly indicates there are only vertical update in this revision.
VerticalUpdateOnly bool `json:"verticalUpdateOnly"`

// NextContainerImages is the containers with lower priority that waiting for in-place update images in next batch.
NextContainerImages map[string]string `json:"nextContainerImages,omitempty"`

// NextContainerRefMetadata is the containers with lower priority that waiting for in-place update labels/annotations in next batch.
NextContainerRefMetadata map[string]metav1.ObjectMeta `json:"nextContainerRefMetadata,omitempty"`

// NextContainerResources is the containers with lower priority that waiting for in-place update resources in next batch.
NextContainerResources map[string]v1.ResourceRequirements `json:"nextContainerResources,omitempty"`
ABNER-1 marked this conversation as resolved.
Show resolved Hide resolved

// PreCheckBeforeNext is the pre-check that must pass before the next containers can be in-place update.
PreCheckBeforeNext *InPlaceUpdatePreCheckBeforeNext `json:"preCheckBeforeNext,omitempty"`

Expand All @@ -91,7 +100,8 @@ type InPlaceUpdateContainerBatch struct {
// InPlaceUpdateContainerStatus records the statuses of the container that are mainly used
// to determine whether the InPlaceUpdate is completed.
type InPlaceUpdateContainerStatus struct {
ImageID string `json:"imageID,omitempty"`
ImageID string `json:"imageID,omitempty"`
Resources v1.ResourceRequirements `json:"resources,omitempty"`
}

// InPlaceUpdateStrategy defines the strategies for in-place update.
Expand Down Expand Up @@ -140,6 +150,10 @@ type RuntimeContainerHashes struct {
// PlainHash is the hash that directly calculated from pod.spec.container[x].
// Usually it is calculated by Kubelet and will be in annotation of each runtime container.
PlainHash uint64 `json:"plainHash"`
// PlainHashWithoutResources is the hash that directly calculated from pod.spec.container[x]
// over fields with Resources field zero'd out.
// Usually it is calculated by Kubelet and will be in annotation of each runtime container.
PlainHashWithoutResources uint64 `json:"plainHashWithoutResources"`
// ExtractedEnvFromMetadataHash is the hash that calculated from pod.spec.container[x],
// whose envs from annotations/labels have already been extracted to the real values.
ExtractedEnvFromMetadataHash uint64 `json:"extractedEnvFromMetadataHash,omitempty"`
Expand Down
11 changes: 10 additions & 1 deletion apis/apps/pub/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion apis/apps/v1alpha1/cloneset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ limitations under the License.
package v1alpha1

import (
appspub "github.com/openkruise/kruise/apis/apps/pub"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"

appspub "github.com/openkruise/kruise/apis/apps/pub"
)

const (
Expand Down Expand Up @@ -178,6 +179,8 @@ type CloneSetStatus struct {

// UpdatedAvailableReplicas is the number of Pods created by the CloneSet controller from the CloneSet version
// indicated by updateRevision and have a Ready Condition for at least minReadySeconds.
// Notice: when enable InPlaceWorkloadVerticalScaling, only resource resize updating pod will also be unavailable.
// This means these pod will be counted in maxUnavailable.
UpdatedAvailableReplicas int32 `json:"updatedAvailableReplicas,omitempty"`

// ExpectedUpdatedReplicas is the number of Pods that should be updated by CloneSet controller.
Expand Down Expand Up @@ -237,6 +240,7 @@ type CloneSetCondition struct {
// +kubebuilder:printcolumn:name="DESIRED",type="integer",JSONPath=".spec.replicas",description="The desired number of pods."
// +kubebuilder:printcolumn:name="UPDATED",type="integer",JSONPath=".status.updatedReplicas",description="The number of pods updated."
// +kubebuilder:printcolumn:name="UPDATED_READY",type="integer",JSONPath=".status.updatedReadyReplicas",description="The number of pods updated and ready."
// +kubebuilder:printcolumn:name="UPDATED_AVAILABLE",type="integer",JSONPath=".status.updatedAvailableReplicas",description="The number of pods updated and available."
// +kubebuilder:printcolumn:name="READY",type="integer",JSONPath=".status.readyReplicas",description="The number of pods ready."
// +kubebuilder:printcolumn:name="TOTAL",type="integer",JSONPath=".status.replicas",description="The number of currently all pods."
// +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp",description="CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC."
Expand Down
6 changes: 6 additions & 0 deletions config/crd/bases/apps.kruise.io_clonesets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ spec:
jsonPath: .status.updatedReadyReplicas
name: UPDATED_READY
type: integer
- description: The number of pods updated and available.
jsonPath: .status.updatedAvailableReplicas
name: UPDATED_AVAILABLE
type: integer
- description: The number of pods ready.
jsonPath: .status.readyReplicas
name: READY
Expand Down Expand Up @@ -512,6 +516,8 @@ spec:
description: |-
UpdatedAvailableReplicas is the number of Pods created by the CloneSet controller from the CloneSet version
indicated by updateRevision and have a Ready Condition for at least minReadySeconds.
Notice: when enable InPlaceWorkloadVerticalScaling, only resource resize updating pod will also be unavailable.
This means these pod will be counted in maxUnavailable.
format: int32
type: integer
updatedReadyReplicas:
Expand Down
4 changes: 2 additions & 2 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ spec:
port: 8000
resources:
limits:
cpu: 100m
memory: 200Mi
cpu: 2
memory: 2Gi
requests:
cpu: 100m
memory: 200Mi
Expand Down
7 changes: 7 additions & 0 deletions pkg/controller/cloneset/cloneset_event_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ func (e *podEventHandler) Update(ctx context.Context, evt event.UpdateEvent, q w

// If it has a ControllerRef, that's all that matters.
if curControllerRef != nil {
// TODO(Abner-1): delete it when fixes only resize resource
//old, _ := json.Marshal(oldPod)
//cur, _ := json.Marshal(curPod)
//patches, _ := jsonpatch.CreatePatch(old, cur)
//pjson, _ := json.Marshal(patches)
//klog.V(4).InfoS("Pod updated json", "pod", klog.KObj(curPod), "patch", pjson)

req := resolveControllerRef(curPod.Namespace, curControllerRef)
if req == nil {
return
Expand Down
11 changes: 11 additions & 0 deletions pkg/controller/cloneset/core/cloneset_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
appspub "github.com/openkruise/kruise/apis/apps/pub"
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
clonesetutils "github.com/openkruise/kruise/pkg/controller/cloneset/utils"
"github.com/openkruise/kruise/pkg/features"
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
"github.com/openkruise/kruise/pkg/util/inplaceupdate"
)

Expand Down Expand Up @@ -211,6 +213,15 @@
}
}
}
// only inplace resource resize
if utilfeature.DefaultFeatureGate.Enabled(features.InPlaceWorkloadVerticalScaling) &&
len(curPod.Labels) > 0 && appspub.LifecycleStateType(curPod.Labels[appspub.LifecycleStateKey]) != appspub.LifecycleStateNormal {
opts := c.GetUpdateOptions()
opts = inplaceupdate.SetOptionsDefaults(opts)
if err := containersUpdateCompleted(curPod, opts.CheckContainersUpdateCompleted); err == nil {
return false

Check warning on line 222 in pkg/controller/cloneset/core/cloneset_core.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/cloneset/core/cloneset_core.go#L217-L222

Added lines #L217 - L222 were not covered by tests
}
}

return true
}
Expand Down
Loading
Loading