Skip to content

Commit 00b3a89

Browse files
authored
Merge pull request #3076 from k8s-infra-cherrypick-robot/cherry-pick-3074-to-release-1.32
[release-1.32] cleanup: only wait for disk detach when hit MaximumDataDisksExceeded error
2 parents 00706cd + c3ff8c8 commit 00b3a89

File tree

4 files changed

+27
-6
lines changed

4 files changed

+27
-6
lines changed

pkg/azuredisk/azure_controller_common.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"k8s.io/utils/ptr"
3939

4040
"sigs.k8s.io/azuredisk-csi-driver/pkg/azureutils"
41+
"sigs.k8s.io/azuredisk-csi-driver/pkg/util"
4142
"sigs.k8s.io/cloud-provider-azure/pkg/azclient"
4243
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
4344
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
@@ -100,6 +101,8 @@ type controllerCommon struct {
100101
ForceDetachBackoff bool
101102
WaitForDetach bool
102103
CheckDiskCountForBatching bool
104+
// a timed cache for disk attach hitting max data disk count, <nodeName, "">
105+
hitMaxDataDiskCountCache azcache.Resource
103106
}
104107

105108
// ExtendedLocation contains additional info about the location of resources.
@@ -193,7 +196,7 @@ func (c *controllerCommon) AttachDisk(ctx context.Context, diskName, diskURI str
193196
}
194197

195198
var waitForDetachHappened bool
196-
if c.WaitForDetach {
199+
if c.WaitForDetach && c.isMaxDataDiskCountExceeded(ctx, string(nodeName)) {
197200
// wait for disk detach to finish first on the same node
198201
if err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 30*time.Second, true, func(context.Context) (bool, error) {
199202
detachDiskReqeustNum, err := c.getDetachDiskRequestNum(node)
@@ -296,7 +299,11 @@ func (c *controllerCommon) AttachDisk(ctx context.Context, diskName, diskURI str
296299

297300
err = vmset.AttachDisk(ctx, nodeName, diskMap)
298301
if err != nil {
299-
if IsOperationPreempted(err) {
302+
if strings.Contains(err.Error(), util.MaximumDataDiskExceededMsg) {
303+
klog.Warningf("hit max data disk count when attaching disk %s, set cache for node(%s)", diskName, nodeName)
304+
c.hitMaxDataDiskCountCache.Set(node, "")
305+
}
306+
if strings.Contains(err.Error(), "OperationPreempted") {
300307
klog.Errorf("Retry VM Update on node (%s) due to error (%v)", nodeName, err)
301308
err = vmset.UpdateVM(ctx, nodeName)
302309
}
@@ -618,8 +625,14 @@ func (c *controllerCommon) SetDiskLun(ctx context.Context, nodeName types.NodeNa
618625
return lun, nil
619626
}
620627

621-
func IsOperationPreempted(err error) bool {
622-
return strings.Contains(err.Error(), "OperationPreempted")
628+
// isMaxDataDiskCountExceeded checks if the max data disk count is exceeded
629+
func (c *controllerCommon) isMaxDataDiskCountExceeded(ctx context.Context, nodeName string) bool {
630+
_, err := c.hitMaxDataDiskCountCache.Get(ctx, nodeName, azcache.CacheReadTypeDefault)
631+
if err != nil {
632+
klog.Warningf("isMaxDataDiskCountExceeded(%s) return with error: %s", nodeName, err)
633+
return false
634+
}
635+
return true
623636
}
624637

625638
func getValidCreationData(subscriptionID, resourceGroup string, options *ManagedDiskOptions) (armcompute.CreationData, error) {

pkg/azuredisk/azure_controller_common_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"k8s.io/utils/ptr"
3737

3838
mockvmclient "sigs.k8s.io/cloud-provider-azure/pkg/azclient/virtualmachineclient/mock_virtualmachineclient"
39+
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
3940
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
4041
"sigs.k8s.io/cloud-provider-azure/pkg/provider"
4142
)
@@ -190,6 +191,8 @@ func TestCommonAttachDisk(t *testing.T) {
190191
DisableDiskLunCheck: true,
191192
WaitForDetach: true,
192193
}
194+
getter := func(_ context.Context, _ string) (interface{}, error) { return nil, nil }
195+
testdiskController.hitMaxDataDiskCountCache, _ = azcache.NewTimedCache(5*time.Minute, getter, false)
193196
lun, err := testdiskController.AttachDisk(ctx, test.diskName, diskURI, tt.nodeName, armcompute.CachingTypesReadOnly, tt.existedDisk, nil)
194197

195198
assert.Equal(t, tt.expectedLun, lun, "TestCase[%d]: %s", i, tt.desc)

pkg/azuredisk/azure_managedDiskController.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"net/http"
2424
"strconv"
2525
"strings"
26+
"time"
2627

2728
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
2829
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
@@ -36,6 +37,7 @@ import (
3637

3738
azureconsts "sigs.k8s.io/azuredisk-csi-driver/pkg/azureconstants"
3839
"sigs.k8s.io/azuredisk-csi-driver/pkg/azureutils"
40+
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
3941
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
4042
"sigs.k8s.io/cloud-provider-azure/pkg/provider"
4143
)
@@ -52,6 +54,8 @@ func NewManagedDiskController(provider *provider.Cloud) *ManagedDiskController {
5254
AttachDetachInitialDelayInMs: defaultAttachDetachInitialDelayInMs,
5355
clientFactory: provider.ComputeClientFactory,
5456
}
57+
getter := func(_ context.Context, _ string) (interface{}, error) { return nil, nil }
58+
common.hitMaxDataDiskCountCache, _ = azcache.NewTimedCache(5*time.Minute, getter, false)
5559

5660
return &ManagedDiskController{common}
5761
}

pkg/util/util.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ import (
2828
)
2929

3030
const (
31-
GiB = 1024 * 1024 * 1024
32-
TagKeyValueDelimiter = "="
31+
GiB = 1024 * 1024 * 1024
32+
TagKeyValueDelimiter = "="
33+
MaximumDataDiskExceededMsg = "attached to a VM of this size is"
3334
)
3435

3536
// IsWindowsOS decides whether the driver is running on windows OS.

0 commit comments

Comments
 (0)