-
Notifications
You must be signed in to change notification settings - Fork 88
Deploy ToolHive Operator into OpenShift (#1063) #1253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -552,6 +552,8 @@ func (r *MCPServerReconciler) deploymentForMCPServer(m *mcpv1alpha1.MCPServer) * | |
} | ||
} | ||
|
||
env = ensureRequiredEnvVars(env) | ||
|
||
dep := &appsv1.Deployment{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: m.Name, | ||
|
@@ -621,6 +623,36 @@ func (r *MCPServerReconciler) deploymentForMCPServer(m *mcpv1alpha1.MCPServer) * | |
return dep | ||
} | ||
|
||
func ensureRequiredEnvVars(env []corev1.EnvVar) []corev1.EnvVar { | ||
// Check for the existence of the XDG_CONFIG_HOME and HOME environment variables | ||
// and set them to /tmp if they don't exist | ||
xdgConfigHomeFound := false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ChrisJBurns do we have a ticket to ensure the proxy runner doesn't write any files? I think in general the proxy runner should be able to run with read-only filesystem There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ooops almost missed this! Are you speaking about the general There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created #1285 to track the ProxyRunner secContext |
||
homeFound := false | ||
for _, envVar := range env { | ||
if envVar.Name == "XDG_CONFIG_HOME" { | ||
xdgConfigHomeFound = true | ||
} | ||
if envVar.Name == "HOME" { | ||
homeFound = true | ||
} | ||
} | ||
if !xdgConfigHomeFound { | ||
logger.Debugf("XDG_CONFIG_HOME not found, setting to /tmp") | ||
env = append(env, corev1.EnvVar{ | ||
Name: "XDG_CONFIG_HOME", | ||
Value: "/tmp", | ||
}) | ||
} | ||
if !homeFound { | ||
logger.Debugf("HOME not found, setting to /tmp") | ||
env = append(env, corev1.EnvVar{ | ||
Name: "HOME", | ||
Value: "/tmp", | ||
}) | ||
} | ||
return env | ||
} | ||
|
||
func createServiceName(mcpServerName string) string { | ||
return fmt.Sprintf("mcp-%s-proxy", mcpServerName) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,13 +42,14 @@ operator: | |
# -- Pod security context settings | ||
podSecurityContext: | ||
runAsNonRoot: true | ||
seccompProfile: | ||
type: RuntimeDefault | ||
|
||
# -- Container security context settings for the operator | ||
containerSecurityContext: | ||
allowPrivilegeEscalation: false | ||
readOnlyRootFilesystem: true | ||
runAsNonRoot: true | ||
runAsUser: 1000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ChrisJBurns would you prefer if this change was done in a separate Either way it looks like we need to bump the chart version ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I think you'll be able to override this anyways during deployment. In GitOps you just pass in: containerSecurityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true This should override the default values. However if you're not using GitOps, then a separate values file like you mentioned |
||
capabilities: | ||
drop: | ||
- ALL | ||
|
@@ -85,10 +86,10 @@ operator: | |
resources: | ||
limits: | ||
cpu: 500m | ||
memory: 128Mi | ||
memory: 384Mi | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason we want to bump these up? You could override these values at deploy time if needed. |
||
requests: | ||
cpu: 10m | ||
memory: 64Mi | ||
memory: 192Mi | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||
|
||
# -- RBAC configuration for the operator | ||
rbac: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ import ( | |
"k8s.io/client-go/rest" | ||
"k8s.io/client-go/tools/remotecommand" | ||
"k8s.io/client-go/tools/watch" | ||
clientconfig "sigs.k8s.io/controller-runtime/pkg/client/config" | ||
|
||
"github.com/stacklok/toolhive/pkg/container/runtime" | ||
"github.com/stacklok/toolhive/pkg/logger" | ||
|
@@ -246,17 +247,30 @@ func (c *Client) DeployWorkload(ctx context.Context, | |
} | ||
|
||
// Ensure the pod template has required configuration (labels, etc.) | ||
podTemplateSpec = ensurePodTemplateConfig(podTemplateSpec, containerLabels) | ||
isOpenShift := false | ||
// Get a config to talk to the apiserver | ||
cfg, err := clientconfig.GetConfig() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, we have some examples here with mkp. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I spent a little more time looking at the test failures and one of the tests fails because the code now expects a config but the unit test doesn't have one. I think the cleanest solution would be to have an enum of supported platforms: type Platform int
const (
PlatformKubernetes Platform = iota
PlatformOpenShift
) and an interface to detect a platform: type PlatformDetector interface {
DetectPlatform(ctx context.Context) (Platform, error)
} this way we could mock the detection in the tests. btw I realize we let you waiting for a review and now I'm asking for a change, of course I'd be happy with a lower-cost solution or I'll be happy to contribute to your branch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I agree here, this would somewhat avoid the isOpenShift flags below as well (and any future isEKS etc). If we have a nice way of being able to support multiple platforms without having toggles passed into functions that would be quite nice. |
||
if err != nil { | ||
return 0, fmt.Errorf("error getting config for APIServer: %w", err) | ||
} | ||
|
||
isOpenShift, err = DetectOpenShiftWith(cfg) | ||
if err != nil { | ||
return 0, fmt.Errorf("can't determine api server type: %w", err) | ||
} | ||
|
||
podTemplateSpec = ensurePodTemplateConfig(podTemplateSpec, containerLabels, isOpenShift) | ||
|
||
// Configure the MCP container | ||
err := configureMCPContainer( | ||
err = configureMCPContainer( | ||
podTemplateSpec, | ||
image, | ||
command, | ||
attachStdio, | ||
envVarList, | ||
transportType, | ||
options, | ||
isOpenShift, | ||
) | ||
if err != nil { | ||
return 0, err | ||
|
@@ -891,6 +905,7 @@ func createPodTemplateFromPatch(patchJSON string) (*corev1apply.PodTemplateSpecA | |
func ensurePodTemplateConfig( | ||
podTemplateSpec *corev1apply.PodTemplateSpecApplyConfiguration, | ||
containerLabels map[string]string, | ||
isOpenShift bool, | ||
) *corev1apply.PodTemplateSpecApplyConfiguration { | ||
podTemplateSpec = ensureObjectMetaApplyConfigurationExists(podTemplateSpec) | ||
// Ensure the pod template has labels | ||
|
@@ -940,6 +955,31 @@ func ensurePodTemplateConfig( | |
podTemplateSpec.Spec.SecurityContext = podTemplateSpec.Spec.SecurityContext.WithRunAsGroup(int64(1000)) | ||
} | ||
} | ||
|
||
if isOpenShift { | ||
if podTemplateSpec.Spec.SecurityContext.RunAsUser != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may show my lack of OpenShift knowledge here 😆 , but is there a reason we are setting these to Another point here, maybe not now, but in future, I'd like to keep the securityContext code as simple as possible in the runtime layer so we don't have to do additional checks. I think a longer-term goal would be to drive it from the podTemplateSpec that's passed in. So the Operator is responsible for passing in the sensible defaults and at the same time can detect if it's OpenShift or not (so it doesn't pass in anything) |
||
podTemplateSpec.Spec.SecurityContext.RunAsUser = nil | ||
} | ||
|
||
if podTemplateSpec.Spec.SecurityContext.RunAsGroup != nil { | ||
podTemplateSpec.Spec.SecurityContext.RunAsGroup = nil | ||
} | ||
|
||
if podTemplateSpec.Spec.SecurityContext.FSGroup != nil { | ||
podTemplateSpec.Spec.SecurityContext.FSGroup = nil | ||
} | ||
|
||
if podTemplateSpec.Spec.SecurityContext.SeccompProfile == nil { | ||
podTemplateSpec.Spec.SecurityContext.SeccompProfile = | ||
corev1apply.SeccompProfile().WithType( | ||
corev1.SeccompProfileTypeRuntimeDefault) | ||
} else { | ||
podTemplateSpec.Spec.SecurityContext.SeccompProfile = | ||
podTemplateSpec.Spec.SecurityContext.SeccompProfile.WithType( | ||
corev1.SeccompProfileTypeRuntimeDefault) | ||
} | ||
} | ||
|
||
return podTemplateSpec | ||
} | ||
|
||
|
@@ -985,7 +1025,18 @@ func configureContainer( | |
command []string, | ||
attachStdio bool, | ||
envVars []*corev1apply.EnvVarApplyConfiguration, | ||
isOpenShift bool, | ||
) { | ||
logger.Infof("Configuring container %s with image %s", *container.Name, image) | ||
logger.Infof("Command: ") | ||
for _, arg := range command { | ||
logger.Infof("Arg: %s", arg) | ||
} | ||
logger.Infof("AttachStdio: %v", attachStdio) | ||
for _, envVar := range envVars { | ||
logger.Infof("EnvVar: %s=%s", *envVar.Name, *envVar.Value) | ||
} | ||
|
||
container.WithImage(image). | ||
WithArgs(command...). | ||
WithStdin(attachStdio). | ||
|
@@ -1029,6 +1080,34 @@ func configureContainer( | |
container.SecurityContext = container.SecurityContext.WithAllowPrivilegeEscalation(false) | ||
} | ||
} | ||
|
||
if isOpenShift { | ||
logger.Infof("Setting OpenShift security context requirements to container %s", *container.Name) | ||
|
||
if container.SecurityContext.RunAsUser != nil { | ||
container.SecurityContext.RunAsUser = nil | ||
} | ||
|
||
if container.SecurityContext.RunAsGroup != nil { | ||
container.SecurityContext.RunAsGroup = nil | ||
} | ||
|
||
if container.SecurityContext.SeccompProfile == nil { | ||
container.SecurityContext.SeccompProfile = | ||
corev1apply.SeccompProfile().WithType( | ||
corev1.SeccompProfileTypeRuntimeDefault) | ||
} else { | ||
container.SecurityContext.SeccompProfile = | ||
container.SecurityContext.SeccompProfile.WithType( | ||
corev1.SeccompProfileTypeRuntimeDefault) | ||
} | ||
|
||
if container.SecurityContext.Capabilities == nil { | ||
container.SecurityContext.Capabilities = &corev1apply.CapabilitiesApplyConfiguration{ | ||
Drop: []corev1.Capability{"ALL"}, | ||
} | ||
} | ||
} | ||
} | ||
|
||
// configureMCPContainer configures the MCP container in the pod template | ||
|
@@ -1040,6 +1119,7 @@ func configureMCPContainer( | |
envVarList []*corev1apply.EnvVarApplyConfiguration, | ||
transportType string, | ||
options *runtime.DeployWorkloadOptions, | ||
isOpenShift bool, | ||
) error { | ||
// Get the "mcp" container if it exists | ||
mcpContainer := getMCPContainer(podTemplateSpec) | ||
|
@@ -1049,7 +1129,7 @@ func configureMCPContainer( | |
mcpContainer = corev1apply.Container().WithName("mcp") | ||
|
||
// Configure the container | ||
configureContainer(mcpContainer, image, command, attachStdio, envVarList) | ||
configureContainer(mcpContainer, image, command, attachStdio, envVarList, isOpenShift) | ||
|
||
// Configure ports if needed | ||
if options != nil && transportType == string(transtypes.TransportTypeSSE) { | ||
|
@@ -1064,7 +1144,7 @@ func configureMCPContainer( | |
podTemplateSpec.Spec.WithContainers(mcpContainer) | ||
} else { | ||
// Configure the existing container | ||
configureContainer(mcpContainer, image, command, attachStdio, envVarList) | ||
configureContainer(mcpContainer, image, command, attachStdio, envVarList, isOpenShift) | ||
|
||
// Configure ports if needed | ||
if options != nil && transportType == string(transtypes.TransportTypeSSE) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package kubernetes | ||
|
||
import ( | ||
"os" | ||
"strings" | ||
"time" | ||
|
||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
"k8s.io/client-go/discovery" | ||
"k8s.io/client-go/rest" | ||
|
||
"github.com/stacklok/toolhive/pkg/logger" | ||
) | ||
|
||
// extra kinds | ||
const ( | ||
// defaultRetries is the number of times a resource discovery is retried | ||
defaultRetries = 10 | ||
|
||
//defaultRetryInterval is the interval to wait before retring a resource discovery | ||
defaultRetryInterval = 3 * time.Second | ||
) | ||
|
||
var isOpenShift *bool = nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Not needed to include the PR, we can iterate on the patches in subsequent PRs as well) |
||
|
||
// DetectOpenShiftWith determines whether the current cluster is an OpenShift | ||
// cluster by attempting to discover the Route resource (route.openshift.io/v1). | ||
// It returns true when the resource is present. The provided REST config is used | ||
// by the discovery client to query the API server. | ||
func DetectOpenShiftWith(config *rest.Config) (bool, error) { | ||
// If first time, check if we are running on OpenShift | ||
if isOpenShift == nil { | ||
value, ok := os.LookupEnv("OPERATOR_OPENSHIFT") | ||
if ok { | ||
//ctrl.Log.V(1).Info("Set by env-var 'OPERATOR_OPENSHIFT': " + value) | ||
logger.Infof("OpenShift set by env var 'OPERATOR_OPENSHIFT': " + value) | ||
return strings.ToLower(value) == "true", nil | ||
} | ||
|
||
discoveryClient, err := discovery.NewDiscoveryClientForConfig(config) | ||
if err != nil { | ||
return false, err | ||
} | ||
|
||
isOpenShiftResourcePresent := false | ||
for i := 0; i < defaultRetries; i++ { | ||
isOpenShiftResourcePresent, err = discovery.IsResourceEnabled(discoveryClient, | ||
schema.GroupVersionResource{ | ||
Group: "route.openshift.io", | ||
Version: "v1", | ||
Resource: "routes", | ||
}) | ||
|
||
if err == nil { | ||
break | ||
} | ||
|
||
time.Sleep(defaultRetryInterval) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we consider using |
||
} | ||
|
||
if err != nil { | ||
return false, err | ||
} | ||
|
||
isOpenShift = &isOpenShiftResourcePresent | ||
if isOpenShiftResourcePresent { | ||
logger.Infof("OpenShift detected by route resource check.") | ||
} | ||
} | ||
|
||
// Otherwise, return the cached value | ||
return *isOpenShift, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it seems like the fact that we are adding the variables during the initial deployment but not adding them in
deploymentNeedsUpdate
is causing some unit test failures.It seems like
deploymentNeedsUpdate
is comparingcontainer.Env
withexpectedProxyEnv
, but it should account for the automatically added environment variables fromensureRequiredEnvVars
as well.