Skip to content

Commit 723e9c9

Browse files
committed
stopping update run implementation
Signed-off-by: Britania Rodriguez Reyes <britaniar@microsoft.com>
1 parent ed6177d commit 723e9c9

File tree

7 files changed

+358
-76
lines changed

7 files changed

+358
-76
lines changed

apis/placement/v1beta1/stageupdate_types.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ const (
427427
// Its condition status can be one of the following:
428428
// - "True": The staged update run is making progress.
429429
// - "False": The staged update run is waiting/paused/abandoned.
430-
// - "Unknown" means it is unknown.
430+
// - "Unknown": The staged update run is in the process of stopping.
431431
StagedUpdateRunConditionProgressing StagedUpdateRunConditionType = "Progressing"
432432

433433
// StagedUpdateRunConditionSucceeded indicates whether the staged update run is completed successfully.
@@ -489,7 +489,8 @@ const (
489489
// StageUpdatingConditionProgressing indicates whether the stage updating is making progress.
490490
// Its condition status can be one of the following:
491491
// - "True": The stage updating is making progress.
492-
// - "False": The stage updating is waiting/pausing.
492+
// - "False": The stage updating is waiting.
493+
// - "Unknown" means it is unknown.
493494
StageUpdatingConditionProgressing StageUpdatingConditionType = "Progressing"
494495

495496
// StageUpdatingConditionSucceeded indicates whether the stage updating is completed successfully.

pkg/controllers/updaterun/controller.go

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -176,23 +176,23 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
176176
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
177177
}
178178

179-
// The execution is not finished yet or it encounters a retriable error.
180-
// We need to record the status and requeue.
181-
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
182-
return runtime.Result{}, updateErr
183-
}
184-
klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
185-
if execErr != nil {
186-
return runtime.Result{}, execErr
187-
}
188-
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
179+
return r.handleIncompleteUpdateRun(ctx, updateRun, waitTime, execErr, state, runObjRef)
189180
case placementv1beta1.StateStop:
190181
// Stop the updateRun.
191-
klog.InfoS("Stopping the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
192-
// TODO(britaniar): Implement the stopping logic for in-progress stages.
182+
klog.V(2).InfoS("Stopping the updateRun", "state", state, "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
183+
finished, waitTime, stopErr := r.stop(updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
184+
if errors.Is(stopErr, errStagedUpdatedAborted) {
185+
// errStagedUpdatedAborted cannot be retried.
186+
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, stopErr.Error())
187+
}
188+
189+
if finished {
190+
klog.V(2).InfoS("The updateRun is stopped", "updateRun", runObjRef)
191+
return runtime.Result{}, r.recordUpdateRunStopped(ctx, updateRun)
192+
}
193+
194+
return r.handleIncompleteUpdateRun(ctx, updateRun, waitTime, stopErr, state, runObjRef)
193195

194-
klog.V(2).InfoS("The updateRun is stopped", "updateRun", runObjRef)
195-
return runtime.Result{}, r.recordUpdateRunStopped(ctx, updateRun)
196196
default:
197197
// Initialize, Run, or Stop are the only supported states.
198198
unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("found unsupported updateRun state: %s", state))
@@ -202,6 +202,21 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
202202
return runtime.Result{}, nil
203203
}
204204

205+
func (r *Reconciler) handleIncompleteUpdateRun(ctx context.Context, updateRun placementv1beta1.UpdateRunObj, waitTime time.Duration, err error, state placementv1beta1.State, runObjRef klog.ObjectRef) (runtime.Result, error) {
206+
// The execution or stopping is not finished yet or it encounters a retriable error.
207+
// We need to record the status and requeue.
208+
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
209+
return runtime.Result{}, updateErr
210+
}
211+
212+
klog.V(2).InfoS("The updateRun is not finished yet", "state", state, "requeueWaitTime", waitTime, "err", err, "updateRun", runObjRef)
213+
214+
if err != nil {
215+
return runtime.Result{}, err
216+
}
217+
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
218+
}
219+
205220
// handleDelete handles the deletion of the updateRun object.
206221
// We delete all the dependent resources, including approvalRequest objects, of the updateRun object.
207222
func (r *Reconciler) handleDelete(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) (bool, time.Duration, error) {

pkg/controllers/updaterun/controller_integration_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,16 @@ func generateFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *p
333333
}
334334
}
335335

336+
func generateStoppingMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
337+
return &prometheusclientmodel.Metric{
338+
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
339+
string(metav1.ConditionUnknown), condition.UpdateRunStoppingReason),
340+
Gauge: &prometheusclientmodel.Gauge{
341+
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
342+
},
343+
}
344+
}
345+
336346
func generateStoppedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
337347
return &prometheusclientmodel.Metric{
338348
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
@@ -858,3 +868,12 @@ func generateFalseConditionWithReason(obj client.Object, condType any, reason st
858868
falseCond.Reason = reason
859869
return falseCond
860870
}
871+
872+
func generateProgressingUnknownConditionWithReason(obj client.Object, reason string) metav1.Condition {
873+
return metav1.Condition{
874+
Status: metav1.ConditionUnknown,
875+
Type: string(placementv1beta1.StageUpdatingConditionProgressing),
876+
ObservedGeneration: obj.GetGeneration(),
877+
Reason: reason,
878+
}
879+
}

pkg/controllers/updaterun/execution.go

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package updaterun
1818

1919
import (
2020
"context"
21-
"errors"
2221
"fmt"
2322
"reflect"
2423
"strconv"
@@ -68,14 +67,7 @@ func (r *Reconciler) execute(
6867

6968
// Set up defer function to handle errStagedUpdatedAborted.
7069
defer func() {
71-
if errors.Is(err, errStagedUpdatedAborted) {
72-
if updatingStageStatus != nil {
73-
markStageUpdatingFailed(updatingStageStatus, updateRun.GetGeneration(), err.Error())
74-
} else {
75-
// Handle deletion stage case.
76-
markStageUpdatingFailed(updateRunStatus.DeletionStageStatus, updateRun.GetGeneration(), err.Error())
77-
}
78-
}
70+
checkIfErrorStagedUpdateAborted(err, updateRun, updatingStageStatus)
7971
}()
8072

8173
// Mark updateRun as progressing if it's not already marked as waiting or stuck.
@@ -232,9 +224,7 @@ func (r *Reconciler) executeUpdatingStage(
232224
}
233225
}
234226
markClusterUpdatingStarted(clusterStatus, updateRun.GetGeneration())
235-
if finishedClusterCount == 0 {
236-
markStageUpdatingStarted(updatingStageStatus, updateRun.GetGeneration())
237-
}
227+
markStageUpdatingStarted(updatingStageStatus, updateRun.GetGeneration())
238228
// Need to continue as we need to process at most maxConcurrency number of clusters in parallel.
239229
continue
240230
}
@@ -564,6 +554,9 @@ func calculateMaxConcurrencyValue(status *placementv1beta1.UpdateRunStatus, stag
564554
func aggregateUpdateRunStatus(updateRun placementv1beta1.UpdateRunObj, stageName string, stuckClusterNames []string) {
565555
if len(stuckClusterNames) > 0 {
566556
markUpdateRunStuck(updateRun, stageName, strings.Join(stuckClusterNames, ", "))
557+
} else if updateRun.GetUpdateRunSpec().State == placementv1beta1.StateStop {
558+
// If there is no stuck cluster and the update run state is stop, mark the update run as stopping.
559+
markUpdateRunStopping(updateRun)
567560
} else {
568561
// If there is no stuck cluster but some progress has been made, mark the update run as progressing.
569562
markUpdateRunProgressing(updateRun)

0 commit comments

Comments
 (0)