Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Changelog
Unreleased
----------

* Fix hostname parsing and add tests in dc_util.

2.52.1 (2025-09-15)
-------------------

Expand Down
54 changes: 50 additions & 4 deletions utils/dc_util/dc_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,48 @@ const (
defaultTimeout = "7200s"
)

type customError struct{ msg string }

func (e *customError) Error() string { return e.msg }

// ErrMalformedHostname is returned when hostname cannot be parsed
var ErrMalformedHostname = &customError{"malformed hostname"}

// splitHostname splits hostname by "-"
func splitHostname(hostname string) []string {
return strings.Split(hostname, "-")
}

// makeDecommissionStmt creates the decommission statement
func makeDecommissionStmt(nodeName string) string {
return fmt.Sprintf("alter cluster decommission '%s'", nodeName)
}

// extractNodeName extracts the CrateDB node name from hostname
func extractNodeName(hostname, crateNodePrefix, defaultPrefix string) (string, error) {
parts := splitHostname(hostname)

// If custom prefix provided (not default), use it
if crateNodePrefix != defaultPrefix && crateNodePrefix != "" {
if len(parts) > 0 {
podNumber := parts[len(parts)-1]
return crateNodePrefix + "-" + podNumber, nil
}
return "", ErrMalformedHostname
}

// Extract from hostname if using default prefix
if len(parts) > 3 && parts[0] == "crate" {
prefixParts := parts[1 : len(parts)-2]
podNumber := parts[len(parts)-1]
if len(prefixParts) > 0 && podNumber != "" {
return strings.Join(prefixParts, "-") + "-" + podNumber, nil
}
}

return "", ErrMalformedHostname
}

func sendSQLStatement(proto, stmt string) error {
payload := map[string]string{"stmt": stmt}
payloadBytes, err := json.Marshal(payload)
Expand Down Expand Up @@ -159,6 +201,12 @@ func run() error {
}
statefulSetName := strings.Join(hostnameParts[:len(hostnameParts)-1], "-")

// Determine crateNodeName using extracted logic
actualCrateNodeName, err := extractNodeName(hostname, crateNodePrefix, defaultCrateNodePrefix)
if err != nil {
return fmt.Errorf("failed to extract node name from hostname %s: %w", hostname, err)
}

ctx := context.Background()
statefulSet, err := clientset.AppsV1().StatefulSets(namespace).Get(ctx, statefulSetName, metav1.GetOptions{})
if err != nil {
Expand All @@ -171,16 +219,14 @@ func run() error {
time.Sleep(2 * time.Second) // Sleep to catch up with the replica settings

if replicas > 0 {
podNumber := hostnameParts[len(hostnameParts)-1]

// Send the SQL statements to decommission the node
log.Printf("Decommissioning node %s with graceful_stop.timeout of %s", podNumber, decommissionTimeout)
log.Printf("Decommissioning node %s with graceful_stop.timeout of %s", actualCrateNodeName, decommissionTimeout)

statements := []string{
fmt.Sprintf(`set global transient "cluster.graceful_stop.timeout" = '%s';`, decommissionTimeout),
`set global transient "cluster.graceful_stop.force" = True;`,
fmt.Sprintf(`set global transient "cluster.graceful_stop.min_availability"='%s';`, minAvailability),
fmt.Sprintf(`alter cluster decommission '%s-%s'`, crateNodePrefix, podNumber),
makeDecommissionStmt(actualCrateNodeName),
}

for _, stmt := range statements {
Expand Down
179 changes: 179 additions & 0 deletions utils/dc_util/decommission_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
package main

import (
"strings"
"testing"
)

func TestDecommissionIntegrationCases(t *testing.T) {
cases := []struct {
name string
hostname string
crateNodePrefix string
defaultPrefix string
wantNodeName string
wantExtracted bool
wantError bool
}{
{
name: "Custom prefix overrides extraction",
hostname: "crate-data-hot-uuid-0",
crateNodePrefix: "custom",
defaultPrefix: "data-hot",
wantNodeName: "custom-0",
wantExtracted: true,
wantError: false,
},
{
name: "Default prefix uses extraction",
hostname: "crate-data-hot-uuid-0",
crateNodePrefix: "data-hot",
defaultPrefix: "data-hot",
wantNodeName: "data-hot-0",
wantExtracted: true,
wantError: false,
},
{
name: "Malformed hostname (too short)",
hostname: "crate-data-0",
crateNodePrefix: "",
defaultPrefix: "data-hot",
wantNodeName: "",
wantExtracted: false,
wantError: true,
},
{
name: "Malformed hostname (not crate)",
hostname: "notcrate-data-hot-uuid-0",
crateNodePrefix: "",
defaultPrefix: "data-hot",
wantNodeName: "",
wantExtracted: false,
wantError: true,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
parts := splitHostname(c.hostname)
var nodeName string
extracted := false
var err error
if c.crateNodePrefix != c.defaultPrefix && c.crateNodePrefix != "" {
// Custom prefix always overrides
podNumber := ""
if len(parts) > 0 {
podNumber = parts[len(parts)-1]
}
nodeName = c.crateNodePrefix + "-" + podNumber
extracted = true
} else if len(parts) > 3 && parts[0] == "crate" {
prefixParts := parts[1 : len(parts)-2]
podNumber := parts[len(parts)-1]
if len(prefixParts) > 0 && podNumber != "" {
nodeName = strings.Join(prefixParts, "-") + "-" + podNumber
extracted = true
}
} else {
err = ErrMalformedHostname
}
if c.wantError && err == nil {
t.Errorf("%s: expected error but got none", c.name)
}
if !c.wantError && err != nil {
t.Errorf("%s: unexpected error: %v", c.name, err)
}
if extracted != c.wantExtracted || nodeName != c.wantNodeName {
t.Errorf("%s: got nodeName=%q, extracted=%v; want nodeName=%q, extracted=%v", c.name, nodeName, extracted, c.wantNodeName, c.wantExtracted)
} else {
t.Logf("%s: nodeName=%q, extracted=%v", c.name, nodeName, extracted)
}
})
}

// The following would require real refactoring/mocking:
// - Simulate failure to get StatefulSet (should return a clear error)
// - Simulate StatefulSet with 0 replicas (should skip decommission)
// - Simulate failure in sendSQLStatement (should propagate error)
// - Ensure all SQL statements are sent in the correct order
// These would be best tested by extracting logic into a package and using interfaces/mocks.
}

func TestNodeNameExtractionEdgeCases(t *testing.T) {
cases := []struct {
hostname string
expectedNode string
shouldExtract bool
}{
// Only one prefix part
{"crate-data-12345-0", "data-0", true},
// Multiple prefix parts
{"crate-foo-bar-baz-12345-0", "foo-bar-baz-0", true},
// Missing UUID (should fail to extract)
{"crate-data-0", "", false},
// Not starting with crate-
{"notcrate-data-hot-uuid-0", "", false},
// Empty hostname
{"", "", false},
}
for _, c := range cases {
parts := splitHostname(c.hostname)
var nodeName string
extracted := false
if len(parts) > 3 && parts[0] == "crate" {
// New logic: join all parts between 'crate' and the last two parts (skip unique ID), last part is pod number
prefixParts := parts[1 : len(parts)-2]
podNumber := parts[len(parts)-1]
if len(prefixParts) > 0 && podNumber != "" {
nodeName = strings.Join(prefixParts, "-") + "-" + podNumber
extracted = true
}
}
if extracted != c.shouldExtract || nodeName != c.expectedNode {
t.Errorf("hostname=%q: got nodeName=%q, extracted=%v; want nodeName=%q, extracted=%v", c.hostname, nodeName, extracted, c.expectedNode, c.shouldExtract)
} else {
t.Logf("hostname=%q: nodeName=%q, extracted=%v", c.hostname, nodeName, extracted)
}
}
}

func TestDecommissionStatement(t *testing.T) {
tests := []struct {
hostname string
crateNodePrefix string
defaultPrefix string
expectedNodeName string
}{
// Use prefix from hostname if flag is default
{"crate-master-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0", "data-hot", "data-hot", "master-0"},
// Use prefix from hostname if flag is empty
{"crate-data-hot-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0", "", "data-hot", "data-hot-0"},
// Use provided flag if not default
{"crate-master-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0", "custom", "data-hot", "custom-0"},
}

for _, tt := range tests {
hostnameParts := splitHostname(tt.hostname)
actualNodeName := tt.crateNodePrefix
t.Logf("Test case: hostname=%q, crateNodePrefix=%q, defaultPrefix=%q, expectedNodeName=%q", tt.hostname, tt.crateNodePrefix, tt.defaultPrefix, tt.expectedNodeName)
if tt.crateNodePrefix == tt.defaultPrefix || tt.crateNodePrefix == "" {
if len(hostnameParts) > 7 && hostnameParts[0] == "crate" {
// Node name is everything after 'crate-' up to the UUID (5 parts), plus pod number
// e.g. crate-data-hot-bdc9bebd-d0c6-49a3-bcae-142f34d125fa-0 => data-hot-0
prefixParts := hostnameParts[1 : len(hostnameParts)-6]
podNumber := hostnameParts[len(hostnameParts)-1]
actualNodeName = strings.Join(prefixParts, "-") + "-" + podNumber
t.Logf("Extracted node name from hostname: %q (prefixParts=%v, podNumber=%q)", actualNodeName, prefixParts, podNumber)
}
} else {
podNumber := hostnameParts[len(hostnameParts)-1]
actualNodeName = tt.crateNodePrefix + "-" + podNumber
t.Logf("Used provided prefix: node name=%q (podNumber=%q)", actualNodeName, podNumber)
}
stmt := makeDecommissionStmt(actualNodeName)
want := "alter cluster decommission '" + tt.expectedNodeName + "'"
t.Logf("Generated statement: %q, Expected: %q", stmt, want)
if stmt != want {
t.Errorf("for hostname %q and prefix %q, got %q, want %q", tt.hostname, tt.crateNodePrefix, stmt, want)
}
}
}