-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Open
Labels
bugSomething isn't workingSomething isn't working
Description
TL;DR
Changes were made to the force_node_pool_recreation_resources
local in beta-private-cluster-update-variant
between 36.3.0 -> 37.0.0
i'm seeing that the keepers
on the random_id
resource will always be recreated, meaning our node pools / live cluster will be recreated

Expected behavior
the ability to have a no updates
plan (assuming we address any breaking changes)
Observed behavior
our live node pools will always have their names replaced, causing the node pools themselves to be replaced. this will cause downtime

Terraform Configuration
module "cluster" {
source = "terraform-google-modules/kubernetes-engine/google//modules/beta-private-cluster-update-variant"
version = "36.3.0" # later updated to 37.0.0
project_id = "your-value-here"
name = "your-value-here"
regional = true
region = var.region
deletion_protection = "your-value-here"
release_channel = "your-value-here"
gateway_api_channel = "your-value-here"
monitoring_enable_managed_prometheus = true
# Maintenance window settings - these should not impact anything,
# since GKE performs a zero-downtime upgrade for regional clusters
maintenance_start_time = "2022-01-01T21:00:00Z"
maintenance_end_time = "2022-01-02T01:00:00Z"
maintenance_recurrence = "FREQ=WEEKLY;BYDAY=FR,SA,SU"
# defaults to false, but it is safe to enable the Vertical Pod Autoscaling controller addon by default, since
# it will not do anything unless the CRD is created
enable_vertical_pod_autoscaling = true
horizontal_pod_autoscaling = true # default
# cluster_autoscaling = var.cluster_autoscaling
# Network Configuration
network = "your-value-here"
network_project_id = "your-value-here"
master_ipv4_cidr_block = "your-value-here"
master_authorized_networks = "your-value-here"
subnetwork = "your-value-here"
ip_range_pods = "your-value-here"
ip_range_services = "your-value-here"
# defaults to false; we force it to false as we require Dataplane V2
# which force enables network policies
network_policy = false
# defaults to CALICO; we force it to empty, as we use Dataplane V2,
# which force-enables network policies.
network_policy_provider = ""
# defaults to DATAPATH_PROVIDER_UNSPECIFIED; we force it to ADVANCED_DATAPATH
# to force Cilium for networking.
datapath_provider = "ADVANCED_DATAPATH"
# Firewall Configuration
add_master_webhook_firewall_rules = true
add_cluster_firewall_rules = true
add_shadow_firewall_rules = true
firewall_priority = 900
shadow_firewall_rules_priority = 899 # needs to be less than the firewall priority
firewall_inbound_ports = "your-value-here"
# Node Pool Configuration
remove_default_node_pool = true
node_pools = local.node_pools["app"]["prd"]
# defaults to false; we force it to true
dns_cache = true
# defaults to false; we force it to true, as with safer-cluster.
deploy_using_private_endpoint = true
# defaults to false; we force it to true, as with safer-cluster.
enable_private_endpoint = true
# defaults to false; we force it to true, as with safer-cluster.
enable_private_nodes = true
# defaults to false; we force it to true, to avoid double NAT.
# https://cloud.google.com/kubernetes-engine/docs/best-practices/networking#use-cloudnat
disable_default_snat = true
# defaults to false; we force it to true, as it enables balanced SSD persistent storage classes.
gce_pd_csi_driver = true
# defaults to false; we force it to true, as it improves load balancing efficiency using Network Endpoint Groups.
enable_l4_ilb_subsetting = true
# export cost allocation data for per-namespace/per-cluster cost analysis.
# https://cloud.google.com/kubernetes-engine/docs/how-to/cost-allocations
enable_cost_allocation = true
# collect metrics for worker nodes (SYSTEM_COMPONENTS) as well as the
# cluster control plane (APISERVER, CONTROLLER_MANAGER, SCHEDULER)
monitoring_enabled_components = [
"SYSTEM_COMPONENTS",
"APISERVER",
"CONTROLLER_MANAGER",
"SCHEDULER",
]
# Enable GKE Security Posture workload scanning and config auditing
workload_config_audit_mode = "BASIC"
workload_vulnerability_mode = "BASIC"
security_posture_mode = "BASIC"
security_posture_vulnerability_mode = "VULNERABILITY_BASIC"
}
locals {
# This config will serve as the base model for all node pools outlined
# below. We use a `merge` function to combine this base config with any
# environment/application specific settings. The merge function will combine
# the two maps and override any values in the base config with the values
# from the environment/application specific config.
common_node_pool_config = {
name = "main"
# the initial number of nodes to create (per zone).
# this is required to be set to 1 or more, otherwise
# when attempting to bootstrap the cluster with `flux`
# it will fail because there are no nodes yet available.
initial_node_count = 1
# autoscaling min/max settings (per zone)
# for most clusters, pod subnet size is 1022 and
# the default pods/node is configured to 64:
# 1022/64 = 15 nodes (max). We shouldn't need 15 nodes in
# most of our clusters, so we'll set this to 2.
min_count = 0
max_count = 2
# allows the nodes to upgraded automatically
auto_upgrade = true
# balance the sizes of the available zones
location_policy = "BALANCED"
# this alows us to use image streaming to pull
# container images
enable_gcfs = true
# COS_CONTAINERD is required to use with GCFS
image_type = "COS_CONTAINERD"
# `spot` instances are the recommended way to run GKE clusters
# over `preemptible` instances
preemptible = false
spot = true
# Ensures that the system only runs authentic software by verifying the
# digital signature of all boot components, and halting the boot
# process if signature verification fails
enable_secure_boot = true
}
node_pools = {
app = {
prd = concat([
merge(local.common_node_pool_config, {
# 80 CPU, 92 GB RAM - $1687/node/month (on demand), $556/node/month (spot)
spot = false
min_count = 1
max_count = 20
max_surge = 6
machine_type = "n2d-custom-80-92160"
disk_type = "pd-balanced"
}),
merge(local.common_node_pool_config, {
name = "operations"
# 8 CPU, 12 GB RAM - $36/node/month (spot)
machine_type = "n2d-custom-8-12288"
disk_type = "pd-balanced"
}),
])
}
}
}
Terraform Version
1.12.2
Terraform Provider Versions
terraform {
required_version = "~> 1"
required_providers {
google = {
source = "hashicorp/google"
version = "6.38.0"
}
google-beta = {
source = "hashicorp/google-beta"
version = "6.38.0"
}
}
}
Additional information
No response
mike-sirs, Eliaxie and jamiezieziula
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working