Skip to content

Commit

Permalink
[FIX] BroadcastJob activeDeadlineSeconds did not take effect
Browse files Browse the repository at this point in the history
Signed-off-by: acejilam <acejilam@gmail.com>
  • Loading branch information
ls-2018 committed Mar 18, 2024
1 parent dad39bc commit 3183a64
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions pkg/controller/broadcastjob/broadcastjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,15 @@ func (r *ReconcileBroadcastJob) Reconcile(_ context.Context, request reconcile.R
failed := int32(len(failedPods))
succeeded := int32(len(succeededPods))

desiredNodes, restNodesToRunPod, podsToDelete := getNodesToRunPod(nodes, job, existingNodeToPodMap)
isDeadLine := func(job *appsv1alpha1.BroadcastJob) bool {
if job.Spec.CompletionPolicy.Type == appsv1alpha1.Always &&
job.Spec.CompletionPolicy.ActiveDeadlineSeconds != nil {
return time.Since(job.CreationTimestamp.Time) >= time.Duration(*job.Spec.CompletionPolicy.ActiveDeadlineSeconds)*time.Second
}

Check warning on line 252 in pkg/controller/broadcastjob/broadcastjob_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/broadcastjob/broadcastjob_controller.go#L251-L252

Added lines #L251 - L252 were not covered by tests
return false
}

desiredNodes, restNodesToRunPod, podsToDelete := getNodesToRunPod(nodes, job, existingNodeToPodMap, isDeadLine)
desired := int32(len(desiredNodes))
klog.Infof("%s/%s has %d/%d nodes remaining to schedule pods", job.Namespace, job.Name, len(restNodesToRunPod), desired)
klog.Infof("Before broadcastjob reconcile %s/%s, desired=%d, active=%d, failed=%d", job.Namespace, job.Name, desired, active, failed)
Expand Down Expand Up @@ -514,7 +522,9 @@ func isJobFailed(job *appsv1alpha1.BroadcastJob, pods []*corev1.Pod) (bool, stri
// * restNodesToRunPod: the nodes do not have pods running yet, excluding the nodes not satisfying constraints such as affinity, taints
// * podsToDelete: the pods that do not satisfy the node constraint any more
func getNodesToRunPod(nodes *corev1.NodeList, job *appsv1alpha1.BroadcastJob,
existingNodeToPodMap map[string]*corev1.Pod) (map[string]*corev1.Pod, []*corev1.Node, []*corev1.Pod) {
existingNodeToPodMap map[string]*corev1.Pod,
isDeadline func(job *appsv1alpha1.BroadcastJob) bool,
) (map[string]*corev1.Pod, []*corev1.Node, []*corev1.Pod) {

var podsToDelete []*corev1.Pod
var restNodesToRunPod []*corev1.Node
Expand All @@ -530,6 +540,10 @@ func getNodesToRunPod(nodes *corev1.NodeList, job *appsv1alpha1.BroadcastJob,
klog.Infof("Pod %s does not fit on node %s due to %v", pod.Name, node.Name, err)
podsToDelete = append(podsToDelete, pod)
continue
} else if isDeadline(job) {
klog.Infof("Pod %s need to delete due to deadline", pod.Name)
podsToDelete = append(podsToDelete, pod)
continue

Check warning on line 546 in pkg/controller/broadcastjob/broadcastjob_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/broadcastjob/broadcastjob_controller.go#L544-L546

Added lines #L544 - L546 were not covered by tests
}
desiredNodes[node.Name] = pod
} else {
Expand Down

0 comments on commit 3183a64

Please sign in to comment.