Skip to content

Commit

Permalink
feat/ImagePullJob-support-Tolerations
Browse files Browse the repository at this point in the history
Signed-off-by: zerunhu <452829874@qq.com>
  • Loading branch information
zerunhu committed Sep 24, 2024
1 parent 4f04e93 commit 68d61c8
Show file tree
Hide file tree
Showing 11 changed files with 416 additions and 7 deletions.
5 changes: 5 additions & 0 deletions apis/apps/v1alpha1/imagepulljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)
Expand Down Expand Up @@ -63,6 +64,10 @@ type ImagePullJobTemplate struct {
// +optional
PodSelector *ImagePullJobPodSelector `json:"podSelector,omitempty"`

// Tolerations allow image pull to be scheduled onto nodes with specific taints
// +optional
Tolerations []v1.Toleration `json:"tolerations,omitempty"`

// Parallelism is the requested parallelism, it can be set to any non-negative value. If it is unspecified,
// it defaults to 1. If it is specified as 0, then the Job is effectively paused until it is increased.
// +optional
Expand Down
7 changes: 7 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions config/crd/bases/apps.kruise.io_imagelistpulljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,46 @@ spec:
type: array
type: object
x-kubernetes-map-type: atomic
tolerations:
description: Tolerations allow image pull to be scheduled onto nodes
with specific taints
items:
description: |-
The pod this Toleration is attached to tolerates any taint that matches
the triple <key,value,effect> using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
type: object
type: array
required:
- completionPolicy
- images
Expand Down
40 changes: 40 additions & 0 deletions config/crd/bases/apps.kruise.io_imagepulljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,46 @@ spec:
type: array
type: object
x-kubernetes-map-type: atomic
tolerations:
description: Tolerations allow image pull to be scheduled onto nodes
with specific taints
items:
description: |-
The pod this Toleration is attached to tolerates any taint that matches
the triple <key,value,effect> using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
type: object
type: array
required:
- completionPolicy
- image
Expand Down
6 changes: 6 additions & 0 deletions pkg/controller/imagepulljob/imagepulljob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,12 @@ func (r *ReconcileImagePullJob) Reconcile(_ context.Context, request reconcile.R
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get NodeImages: %v", err)
}
//if utilfeature.DefaultFeatureGate.Enabled(features.ImagePullJobTolerationGate) {
nodeImages, err = utilimagejob.TolerationNodeImages(r.Client, nodeImages, job)
if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get NodeImages for Toleration: %v", err)

Check warning on line 238 in pkg/controller/imagepulljob/imagepulljob_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/imagepulljob/imagepulljob_controller.go#L236-L238

Added lines #L236 - L238 were not covered by tests
}
//}

// If resourceVersion expectations have not satisfied yet, just skip this reconcile
for _, nodeImage := range nodeImages {
Expand Down
4 changes: 4 additions & 0 deletions pkg/features/kruise_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ const (
// ImagePullJobGate enable imagepulljob-controller execute ImagePullJob.
ImagePullJobGate featuregate.Feature = "ImagePullJobGate"

// ImagePullJobTolerationGate enable ImagePullJob support Tolerations.
ImagePullJobTolerationGate featuregate.Feature = "ImagePullJobTolerationGate"

// ResourceDistributionGate enable resourcedistribution-controller execute ResourceDistribution.
ResourceDistributionGate featuregate.Feature = "ResourceDistributionGate"

Expand Down Expand Up @@ -157,6 +160,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
CloneSetEventHandlerOptimization: {Default: false, PreRelease: featuregate.Alpha},
PreparingUpdateAsUpdate: {Default: false, PreRelease: featuregate.Alpha},
ImagePullJobGate: {Default: false, PreRelease: featuregate.Alpha},
ImagePullJobTolerationGate: {Default: false, PreRelease: featuregate.Alpha},
ResourceDistributionGate: {Default: false, PreRelease: featuregate.Alpha},
DeletionProtectionForCRDCascadingGate: {Default: false, PreRelease: featuregate.Alpha},

Expand Down
41 changes: 41 additions & 0 deletions pkg/util/imagejob/imagejob_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,47 @@ func GetNodeImagesForJob(reader client.Reader, job *appsv1alpha1.ImagePullJob) (
return convertNodeImages(nodeImageList), err
}

func TolerationNodeImages(reader client.Reader, nodeImages []*appsv1alpha1.NodeImage, job *appsv1alpha1.ImagePullJob) (tolerationNodeImage []*appsv1alpha1.NodeImage, err error) {
for _, ng := range nodeImages {
var node v1.Node
if err = reader.Get(context.TODO(), types.NamespacedName{Name: ng.Name}, &node); err != nil {
if errors.IsNotFound(err) {
tolerationNodeImage = append(tolerationNodeImage, ng)
continue

Check warning on line 162 in pkg/util/imagejob/imagejob_reader.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/imagejob/imagejob_reader.go#L160-L162

Added lines #L160 - L162 were not covered by tests
}
return nil, fmt.Errorf("get specific Node %s error: %v", ng.Name, err)

Check warning on line 164 in pkg/util/imagejob/imagejob_reader.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/imagejob/imagejob_reader.go#L164

Added line #L164 was not covered by tests
}
if nodeMatchesTolerations(node, job.Spec.Tolerations) {
tolerationNodeImage = append(tolerationNodeImage, ng)
}
}
return
}

// nodeMatchesTolerations pod must have Toleration that matches all node Taint to return true
func nodeMatchesTolerations(node v1.Node, tolerations []v1.Toleration) bool {
for _, taint := range node.Spec.Taints {
if !tolerationToleratesTaint(tolerations, taint) {
return false
}
}
return true
}

func tolerationToleratesTaint(tolerations []v1.Toleration, taint v1.Taint) bool {
for _, toleration := range tolerations {
if toleration.Key == taint.Key && toleration.Effect == taint.Effect {
if toleration.Operator == v1.TolerationOpExists {
return true
}
if toleration.Operator == v1.TolerationOpEqual && toleration.Value == taint.Value {
return true
}
}
}
return false
}

func convertNodeImages(nodeImageList *appsv1alpha1.NodeImageList) []*appsv1alpha1.NodeImage {
nodeImages := make([]*appsv1alpha1.NodeImage, 0, len(nodeImageList.Items))
for i := range nodeImageList.Items {
Expand Down
Loading

0 comments on commit 68d61c8

Please sign in to comment.