diff --git a/cmd/mapt/cmd/aws/services/snc.go b/cmd/mapt/cmd/aws/services/snc.go index 5cbd4b254..b85dd84a5 100644 --- a/cmd/mapt/cmd/aws/services/snc.go +++ b/cmd/mapt/cmd/aws/services/snc.go @@ -24,7 +24,7 @@ const ( disableClusterReadinessDesc = "If this flag is set it will skip the checks for the cluster readiness. In this case the kubeconfig can not be generated" sncProfile = "profile" - sncProfileDesc = "comma separated list of profiles to apply on the SNC cluster. Profiles available: virtualization" + sncProfileDesc = "comma separated list of profiles to apply on the SNC cluster. Profiles available: virtualization, serverless-serving, serverless-eventing, serverless, servicemesh, ai. The ai profile automatically includes servicemesh and serverless-serving as prerequisites and raises the minimum instance size to 16 vCPUs" ) func GetOpenshiftSNCCmd() *cobra.Command { @@ -62,6 +62,9 @@ func createSNC() *cobra.Command { if sncApi.ProfilesRequireNestedVirt(profiles) { computeReq.NestedVirt = true } + if minCPUs := sncApi.ProfilesMinCPUs(profiles); minCPUs > computeReq.CPUs { + computeReq.CPUs = minCPUs + } if _, err := openshiftsnc.Create( &maptContext.ContextArgs{ Context: cmd.Context(), diff --git a/docs/aws/openshift-snc.md b/docs/aws/openshift-snc.md index c49b97f5a..cb5224be2 100644 --- a/docs/aws/openshift-snc.md +++ b/docs/aws/openshift-snc.md @@ -45,7 +45,7 @@ After the AMI is published and accessible by the account, we can use the followi --pull-secret-file /home/tester/Downloads/pull-secret ``` -After the above command succeeds the `kubeconfig` to access the deployed cluster will be available in `/tmp/snc/kubeconfig` +When `--conn-details-output` is set, the `kubeconfig` is written to disk as soon as the cluster is ready — before any profile deployment begins. This means the kubeconfig is available at `/kubeconfig` even if a profile installation fails or times out. ## Profiles @@ -62,13 +62,18 @@ mapt aws openshift-snc create \ --profile virtualization ``` -Multiple profiles can be specified as a comma-separated list (e.g., `--profile virtualization,serverless`). +Multiple profiles can be specified as a comma-separated list (e.g., `--profile virtualization,ai`). ### Available profiles | Profile | Description | |---------|-------------| | `virtualization` | Installs [OpenShift Virtualization](https://docs.openshift.com/container-platform/latest/virt/about_virt/about-virt.html) (CNV) on the cluster, enabling virtual machines to run on the single-node cluster. When this profile is selected, nested virtualization is automatically enabled on the cloud instance. Because standard Nitro-based instances do not expose `/dev/kvm`, a bare metal instance is required.| +| `serverless-serving` | Installs [OpenShift Serverless](https://docs.openshift.com/serverless/latest/about/about-serverless.html) and creates a KnativeServing instance, enabling serverless workloads (Knative Serving) on the cluster.| +| `serverless-eventing` | Installs [OpenShift Serverless](https://docs.openshift.com/serverless/latest/about/about-serverless.html) and creates a KnativeEventing instance, enabling event-driven workloads (Knative Eventing) on the cluster.| +| `serverless` | Installs [OpenShift Serverless](https://docs.openshift.com/serverless/latest/about/about-serverless.html) and creates both KnativeServing and KnativeEventing instances.| +| `servicemesh` | Installs [OpenShift Service Mesh 3](https://docs.openshift.com/service-mesh/latest/about/about-ossm.html) (Sail/Istio) on the cluster, deploying IstioCNI and an Istio control plane.| +| `ai` | Installs [Red Hat OpenShift AI](https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed) (RHOAI) on the cluster. Automatically installs Service Mesh v2 (Maistra) and Serverless Serving as prerequisites for Kserve. All three operators install in parallel; the DataScienceCluster CR is only created once Service Mesh and Serverless are fully ready. The minimum instance size is raised to 16 vCPUs (from the default 8) to accommodate the additional operators. **Cannot be combined with the `servicemesh` profile** (which deploys Service Mesh v3/Sail).| ### Adding new profiles @@ -76,5 +81,5 @@ Multiple profiles can be specified as a comma-separated list (e.g., `--profile v To add a new profile: 1. Create `profile_.go` under `pkg/target/service/snc/` — Go file with a `deploy()` function that uses the Pulumi Kubernetes provider to create the required resources (Namespace, OperatorGroup, Subscription, CRs, etc.) -2. Register the profile name in `profiles.go` by adding it to `validProfiles` and the `DeployProfile()` switch +2. Register the profile name in `profiles.go` by adding it to `validProfiles` and the `DeployProfiles()` function diff --git a/pkg/provider/aws/action/snc/snc.go b/pkg/provider/aws/action/snc/snc.go index 10aba5c62..35cc98ff2 100644 --- a/pkg/provider/aws/action/snc/snc.go +++ b/pkg/provider/aws/action/snc/snc.go @@ -264,20 +264,24 @@ func (r *openshiftSNCRequest) deploy(ctx *pulumi.Context) error { } ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, apiSNC.OutputKubeconfig), pulumi.ToSecret(kubeconfig)) + // Write kubeconfig to disk early so it is available even if profile deployment fails + if outputPath := r.mCtx.GetResultsOutputPath(); len(outputPath) > 0 { + kubeconfig.ApplyT(func(kc string) error { + return os.WriteFile(fmt.Sprintf("%s/kubeconfig", outputPath), []byte(kc), 0600) + }) + } // Deploy profiles using Kubernetes provider if len(r.profiles) > 0 { k8sProvider, err := apiSNC.NewK8sProvider(ctx, "k8s-provider", kubeconfig) if err != nil { return err } - for _, profileName := range r.profiles { - if _, err := apiSNC.DeployProfile(ctx, profileName, &apiSNC.ProfileDeployArgs{ - K8sProvider: k8sProvider, - Kubeconfig: kubeconfig, - Prefix: *r.prefix, - }); err != nil { - return err - } + if err := apiSNC.DeployProfiles(ctx, r.profiles, &apiSNC.ProfileDeployArgs{ + K8sProvider: k8sProvider, + Kubeconfig: kubeconfig, + Prefix: *r.prefix, + }); err != nil { + return err } } return nil diff --git a/pkg/target/service/snc/client.go b/pkg/target/service/snc/client.go index 1d1bc2d20..1d4a52113 100644 --- a/pkg/target/service/snc/client.go +++ b/pkg/target/service/snc/client.go @@ -69,14 +69,22 @@ func waitForCRCondition(ctx context.Context, kubeconfig string, gvr schema.Group } // findResource returns a single resource by exact name or by name prefix. +// When namespace is empty the resource is looked up at cluster scope. func findResource(ctx context.Context, dc dynamic.Interface, gvr schema.GroupVersionResource, namespace, name string, prefixMatch bool) (*unstructured.Unstructured, error) { + var ri dynamic.ResourceInterface + if namespace != "" { + ri = dc.Resource(gvr).Namespace(namespace) + } else { + ri = dc.Resource(gvr) + } + if !prefixMatch { - return dc.Resource(gvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + return ri.Get(ctx, name, metav1.GetOptions{}) } - list, err := dc.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{}) + list, err := ri.List(ctx, metav1.ListOptions{}) if err != nil { return nil, err } diff --git a/pkg/target/service/snc/profile_openshift_ai.go b/pkg/target/service/snc/profile_openshift_ai.go new file mode 100644 index 000000000..3ce6a38ac --- /dev/null +++ b/pkg/target/service/snc/profile_openshift_ai.go @@ -0,0 +1,168 @@ +package snc + +import ( + "fmt" + "time" + + "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions" + corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1" + metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + rhoaiNamespace = "redhat-ods-operator" +) + +var ( + dscGVR = schema.GroupVersionResource{ + Group: "datasciencecluster.opendatahub.io", + Version: "v1", + Resource: "datascienceclusters", + } +) + +// deployOpenShiftAI installs the RHOAI operator and creates a DataScienceCluster. +// The entire RHOAI installation is gated on prereqs (ServiceMesh v2, Authorino, +// and Serverless readiness outputs) so that when the operator starts and auto-creates +// the DSCI, it finds all dependencies already in place. +func deployOpenShiftAI(ctx *pulumi.Context, args *ProfileDeployArgs, prereqs []pulumi.StringOutput) (pulumi.Resource, error) { + goCtx := ctx.Context() + rn := func(suffix string) string { + return fmt.Sprintf("%s-rhoai-%s", args.Prefix, suffix) + } + + // Gate the entire RHOAI installation on prerequisites. + // The namespace name won't resolve until all prereqs are ready, + // which delays the operator install until SM v2 + Authorino + + // Serverless are fully operational. + nsName := pulumi.String(rhoaiNamespace).ToStringOutput() + for _, p := range prereqs { + prev := nsName + nsName = pulumi.All(prev, p).ApplyT( + func(args []interface{}) string { + return args[0].(string) + }).(pulumi.StringOutput) + } + + // Create Namespace (blocked until all prereqs resolve) + ns, err := corev1.NewNamespace(ctx, rn("ns"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: nsName, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn(args.Deps)) + if err != nil { + return nil, err + } + + // Create OperatorGroup (AllNamespaces mode — no targetNamespaces) + og, err := apiextensions.NewCustomResource(ctx, rn("og"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1"), + Kind: pulumi.String("OperatorGroup"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("redhat-ods-operator-group"), + Namespace: pulumi.String(rhoaiNamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{ns})) + if err != nil { + return nil, err + } + + // Create Subscription + sub, err := apiextensions.NewCustomResource(ctx, rn("sub"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"), + Kind: pulumi.String("Subscription"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("rhods-operator"), + Namespace: pulumi.String(rhoaiNamespace), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "name": "rhods-operator", + "channel": "stable", + "installPlanApproval": "Automatic", + }, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{og})) + if err != nil { + return nil, err + } + + // Wait for CSV to succeed (operator fully installed). + csvReady := pulumi.All(sub.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, csvGVR, + rhoaiNamespace, "rhods-operator", + "", "Succeeded", 20*time.Minute, true); err != nil { + return "", fmt.Errorf("waiting for RHOAI CSV: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + // Create DataScienceCluster CR after RHOAI CSV is ready. + dscName := csvReady.ApplyT(func(_ string) string { + return "default-dsc" + }).(pulumi.StringOutput) + + dsc, err := apiextensions.NewCustomResource(ctx, rn("dsc"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("datasciencecluster.opendatahub.io/v1"), + Kind: pulumi.String("DataScienceCluster"), + Metadata: &metav1.ObjectMetaArgs{ + Name: dscName, + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "components": map[string]interface{}{ + "dashboard": map[string]interface{}{"managementState": "Managed"}, + "workbenches": map[string]interface{}{"managementState": "Managed"}, + "datasciencepipelines": map[string]interface{}{"managementState": "Managed"}, + // Kserve depends on ServiceMesh and Serverless which are + // deployed as implicit dependencies of the AI profile. + "kserve": map[string]interface{}{"managementState": "Managed"}, + "modelmeshserving": map[string]interface{}{"managementState": "Managed"}, + "ray": map[string]interface{}{"managementState": "Managed"}, + // Kueue webhook fails on SNC due to missing endpoints + "kueue": map[string]interface{}{"managementState": "Removed"}, + "trustyai": map[string]interface{}{"managementState": "Managed"}, + "codeflare": map[string]interface{}{"managementState": "Managed"}, + "trainingoperator": map[string]interface{}{"managementState": "Removed"}, + "modelregistry": map[string]interface{}{"managementState": "Removed"}, + }, + }, + }, + }, + pulumi.Provider(args.K8sProvider)) + if err != nil { + return nil, err + } + + // Wait for DataScienceCluster to be ready. + dscReady := pulumi.All(dsc.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, dscGVR, + "", "default-dsc", + "Ready", "True", 40*time.Minute, false); err != nil { + return "", fmt.Errorf("waiting for DataScienceCluster: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + ctx.Export("dscReady", dscReady) + + return dsc, nil +} diff --git a/pkg/target/service/snc/profile_serverless.go b/pkg/target/service/snc/profile_serverless.go new file mode 100644 index 000000000..067ca4d0b --- /dev/null +++ b/pkg/target/service/snc/profile_serverless.go @@ -0,0 +1,229 @@ +package snc + +import ( + "fmt" + "time" + + "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions" + corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1" + metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + serverlessNamespace = "openshift-serverless" + knativeServingNamespace = "knative-serving" + knativeEventingNamespace = "knative-eventing" +) + +var ( + knativeServingGVR = schema.GroupVersionResource{ + Group: "operator.knative.dev", + Version: "v1beta1", + Resource: "knativeservings", + } + + knativeEventingGVR = schema.GroupVersionResource{ + Group: "operator.knative.dev", + Version: "v1beta1", + Resource: "knativeeventings", + } +) + +// deployServerlessOperator installs the OpenShift Serverless operator and waits +// for the CSV to succeed. It returns a pulumi.StringOutput that resolves after +// the operator is ready, suitable for threading namespace names through ApplyT. +func deployServerlessOperator(ctx *pulumi.Context, args *ProfileDeployArgs) (pulumi.StringOutput, error) { + goCtx := ctx.Context() + rn := func(suffix string) string { + return fmt.Sprintf("%s-serverless-%s", args.Prefix, suffix) + } + + // Create openshift-serverless namespace + ns, err := corev1.NewNamespace(ctx, rn("ns"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String(serverlessNamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn(args.Deps)) + if err != nil { + return pulumi.StringOutput{}, err + } + + // Create OperatorGroup (AllNamespaces — empty spec) + og, err := apiextensions.NewCustomResource(ctx, rn("og"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1"), + Kind: pulumi.String("OperatorGroup"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("serverless-operators"), + Namespace: pulumi.String(serverlessNamespace), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{}, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{ns})) + if err != nil { + return pulumi.StringOutput{}, err + } + + // Create Subscription + sub, err := apiextensions.NewCustomResource(ctx, rn("sub"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"), + Kind: pulumi.String("Subscription"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("serverless-operator"), + Namespace: pulumi.String(serverlessNamespace), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "name": "serverless-operator", + "channel": "stable", + "installPlanApproval": "Automatic", + }, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{og})) + if err != nil { + return pulumi.StringOutput{}, err + } + + // Wait for CSV to succeed (operator fully installed). + operatorReady := pulumi.All(sub.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, csvGVR, + serverlessNamespace, "serverless-operator", + "", "Succeeded", 20*time.Minute, true); err != nil { + return "", fmt.Errorf("waiting for Serverless CSV: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + return operatorReady, nil +} + +// deployKnativeServing creates a KnativeServing CR and waits for it to be ready. +// The operatorReady output is used to chain the dependency on the operator installation. +func deployKnativeServing(ctx *pulumi.Context, args *ProfileDeployArgs, operatorReady pulumi.StringOutput) (pulumi.Resource, pulumi.StringOutput, error) { + goCtx := ctx.Context() + rn := func(suffix string) string { + return fmt.Sprintf("%s-serverless-%s", args.Prefix, suffix) + } + + // Thread the wait into the namespace name via ApplyT + ksNSName := operatorReady.ApplyT(func(_ string) string { + return knativeServingNamespace + }).(pulumi.StringOutput) + + // Create knative-serving namespace + ksNS, err := corev1.NewNamespace(ctx, rn("ks-ns"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: ksNSName, + }, + }, + pulumi.Provider(args.K8sProvider)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Create KnativeServing CR + ks, err := apiextensions.NewCustomResource(ctx, rn("ks"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operator.knative.dev/v1beta1"), + Kind: pulumi.String("KnativeServing"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("knative-serving"), + Namespace: pulumi.String(knativeServingNamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{ksNS})) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for KnativeServing to be ready. + ksReady := pulumi.All(ks.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, knativeServingGVR, + knativeServingNamespace, "knative-serving", + "Ready", "True", 20*time.Minute, false); err != nil { + return "", fmt.Errorf("waiting for KnativeServing: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + ctx.Export("knativeServingReady", ksReady) + + return ks, ksReady, nil +} + +// deployKnativeEventing creates a KnativeEventing CR and waits for it to be ready. +// The operatorReady output is used to chain the dependency on the operator installation. +func deployKnativeEventing(ctx *pulumi.Context, args *ProfileDeployArgs, operatorReady pulumi.StringOutput) (pulumi.Resource, error) { + goCtx := ctx.Context() + rn := func(suffix string) string { + return fmt.Sprintf("%s-serverless-%s", args.Prefix, suffix) + } + + // Thread the wait into the namespace name via ApplyT + keNSName := operatorReady.ApplyT(func(_ string) string { + return knativeEventingNamespace + }).(pulumi.StringOutput) + + // Create knative-eventing namespace + keNS, err := corev1.NewNamespace(ctx, rn("ke-ns"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: keNSName, + }, + }, + pulumi.Provider(args.K8sProvider)) + if err != nil { + return nil, err + } + + // Create KnativeEventing CR + ke, err := apiextensions.NewCustomResource(ctx, rn("ke"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operator.knative.dev/v1beta1"), + Kind: pulumi.String("KnativeEventing"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("knative-eventing"), + Namespace: pulumi.String(knativeEventingNamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{keNS})) + if err != nil { + return nil, err + } + + // Wait for KnativeEventing to be ready. + keReady := pulumi.All(ke.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, knativeEventingGVR, + knativeEventingNamespace, "knative-eventing", + "Ready", "True", 20*time.Minute, false); err != nil { + return "", fmt.Errorf("waiting for KnativeEventing: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + ctx.Export("knativeEventingReady", keReady) + + return ke, nil +} diff --git a/pkg/target/service/snc/profile_servicemesh.go b/pkg/target/service/snc/profile_servicemesh.go new file mode 100644 index 000000000..5f6721289 --- /dev/null +++ b/pkg/target/service/snc/profile_servicemesh.go @@ -0,0 +1,178 @@ +package snc + +import ( + "fmt" + "time" + + "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions" + corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1" + metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const ( + istioSystemNamespace = "istio-system" + istioCNINamespace = "istio-cni" +) + +var ( + istioGVR = schema.GroupVersionResource{ + Group: "sailoperator.io", + Version: "v1", + Resource: "istios", + } + istioCNIGVR = schema.GroupVersionResource{ + Group: "sailoperator.io", + Version: "v1", + Resource: "istiocnis", + } +) + +// deployServiceMesh installs OpenShift Service Mesh 3 and returns the last +// resource together with a StringOutput that resolves when Istio is fully ready. +func deployServiceMesh(ctx *pulumi.Context, args *ProfileDeployArgs) (pulumi.Resource, pulumi.StringOutput, error) { + goCtx := ctx.Context() + rn := func(suffix string) string { + return fmt.Sprintf("%s-smesh-%s", args.Prefix, suffix) + } + + // Create istio-system namespace + nsSystem, err := corev1.NewNamespace(ctx, rn("ns-system"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String(istioSystemNamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn(args.Deps)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Create istio-cni namespace + nsCNI, err := corev1.NewNamespace(ctx, rn("ns-cni"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String(istioCNINamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn(args.Deps)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Create Subscription for the OpenShift Service Mesh 3 operator + sub, err := apiextensions.NewCustomResource(ctx, rn("sub"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"), + Kind: pulumi.String("Subscription"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("servicemeshoperator3"), + Namespace: pulumi.String("openshift-operators"), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "name": "servicemeshoperator3", + "channel": "stable", + "installPlanApproval": "Automatic", + }, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{nsSystem, nsCNI})) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for the Service Mesh operator CSV to succeed + csvReady := pulumi.All(sub.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, csvGVR, + "openshift-operators", "servicemeshoperator3", + "", "Succeeded", 20*time.Minute, true); err != nil { + return "", fmt.Errorf("waiting for Service Mesh operator CSV: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + // Create IstioCNI CR + istioCNIName := csvReady.ApplyT(func(_ string) string { + return "default" + }).(pulumi.StringOutput) + + // IstioCNI is cluster-scoped + cni, err := apiextensions.NewCustomResource(ctx, rn("istiocni"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("sailoperator.io/v1"), + Kind: pulumi.String("IstioCNI"), + Metadata: &metav1.ObjectMetaArgs{ + Name: istioCNIName, + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "namespace": istioCNINamespace, + "profile": "openshift", + }, + }, + }, + pulumi.Provider(args.K8sProvider)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for IstioCNI to be ready (cluster-scoped, empty namespace) + cniReady := pulumi.All(cni.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, istioCNIGVR, + "", "default", + "Ready", "True", 20*time.Minute, false); err != nil { + return "", fmt.Errorf("waiting for IstioCNI: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + // Create Istio CR (cluster-scoped, depends on CNI being ready) + istioName := cniReady.ApplyT(func(_ string) string { + return "default" + }).(pulumi.StringOutput) + + istio, err := apiextensions.NewCustomResource(ctx, rn("istio"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("sailoperator.io/v1"), + Kind: pulumi.String("Istio"), + Metadata: &metav1.ObjectMetaArgs{ + Name: istioName, + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "namespace": istioSystemNamespace, + }, + }, + }, + pulumi.Provider(args.K8sProvider)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for Istio to be ready (cluster-scoped, empty namespace) + istioReady := pulumi.All(istio.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, istioGVR, + "", "default", + "Ready", "True", 20*time.Minute, false); err != nil { + return "", fmt.Errorf("waiting for Istio: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + ctx.Export("istioReady", istioReady) + + return istio, istioReady, nil +} diff --git a/pkg/target/service/snc/profile_servicemesh_v2.go b/pkg/target/service/snc/profile_servicemesh_v2.go new file mode 100644 index 000000000..143384ea3 --- /dev/null +++ b/pkg/target/service/snc/profile_servicemesh_v2.go @@ -0,0 +1,187 @@ +package snc + +import ( + "fmt" + "time" + + "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions" + corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1" + metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + smcpGVR = schema.GroupVersionResource{ + Group: "maistra.io", + Version: "v2", + Resource: "servicemeshcontrolplanes", + } +) + +// deployServiceMeshV2 installs OpenShift Service Mesh v2 (Maistra/Istio) and the +// Authorino operator, both required by RHOAI for Kserve. It creates an SMCP named +// "data-science-smcp" in istio-system, matching the DSCI defaults. +// Returns a StringOutput that resolves when both SMCP and Authorino are ready. +func deployServiceMeshV2(ctx *pulumi.Context, args *ProfileDeployArgs) (pulumi.Resource, pulumi.StringOutput, error) { + goCtx := ctx.Context() + rn := func(suffix string) string { + return fmt.Sprintf("%s-smeshv2-%s", args.Prefix, suffix) + } + + // Create istio-system namespace + ns, err := corev1.NewNamespace(ctx, rn("ns"), + &corev1.NamespaceArgs{ + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String(istioSystemNamespace), + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn(args.Deps)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // --- Service Mesh v2 operator --- + + // Create Subscription (openshift-operators is a pre-existing global namespace + // with an OperatorGroup, no need to create one). + smSub, err := apiextensions.NewCustomResource(ctx, rn("sub"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"), + Kind: pulumi.String("Subscription"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("servicemeshoperator"), + Namespace: pulumi.String("openshift-operators"), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "name": "servicemeshoperator", + "channel": "stable", + "installPlanApproval": "Automatic", + }, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn([]pulumi.Resource{ns})) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for the Service Mesh v2 CSV to succeed + smCSVReady := pulumi.All(smSub.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, csvGVR, + "openshift-operators", "servicemeshoperator", + "", "Succeeded", 20*time.Minute, true); err != nil { + return "", fmt.Errorf("waiting for Service Mesh v2 CSV: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + // Create ServiceMeshControlPlane — "data-science-smcp" matches the DSCI default + smcpName := smCSVReady.ApplyT(func(_ string) string { + return "data-science-smcp" + }).(pulumi.StringOutput) + + smcp, err := apiextensions.NewCustomResource(ctx, rn("smcp"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("maistra.io/v2"), + Kind: pulumi.String("ServiceMeshControlPlane"), + Metadata: &metav1.ObjectMetaArgs{ + Name: smcpName, + Namespace: pulumi.String(istioSystemNamespace), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "version": "v2.6", + "tracing": map[string]interface{}{ + "type": "None", + }, + "security": map[string]interface{}{ + "dataPlane": map[string]interface{}{ + "mtls": true, + }, + }, + "addons": map[string]interface{}{ + "kiali": map[string]interface{}{ + "enabled": false, + }, + "grafana": map[string]interface{}{ + "enabled": false, + }, + "prometheus": map[string]interface{}{ + "enabled": false, + }, + }, + }, + }, + }, + pulumi.Provider(args.K8sProvider)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for SMCP to be ready + smcpReady := pulumi.All(smcp.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, smcpGVR, + istioSystemNamespace, "data-science-smcp", + "Ready", "True", 20*time.Minute, false); err != nil { + return "", fmt.Errorf("waiting for SMCP: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + // --- Authorino operator (required by RHOAI for ServiceMesh authorization) --- + + authSub, err := apiextensions.NewCustomResource(ctx, rn("authorino-sub"), + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"), + Kind: pulumi.String("Subscription"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String("authorino-operator"), + Namespace: pulumi.String("openshift-operators"), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "source": "redhat-operators", + "sourceNamespace": "openshift-marketplace", + "name": "authorino-operator", + "channel": "stable", + "installPlanApproval": "Automatic", + }, + }, + }, + pulumi.Provider(args.K8sProvider), + pulumi.DependsOn(args.Deps)) + if err != nil { + return nil, pulumi.StringOutput{}, err + } + + // Wait for Authorino CSV to succeed + authReady := pulumi.All(authSub.ID(), args.Kubeconfig).ApplyT( + func(allArgs []interface{}) (string, error) { + kc := allArgs[1].(string) + if err := waitForCRCondition(goCtx, kc, csvGVR, + "openshift-operators", "authorino-operator", + "", "Succeeded", 20*time.Minute, true); err != nil { + return "", fmt.Errorf("waiting for Authorino CSV: %w", err) + } + return "ready", nil + }).(pulumi.StringOutput) + + // Combine SMCP + Authorino readiness into a single output + allReady := pulumi.All(smcpReady, authReady).ApplyT( + func(_ []interface{}) string { + return "ready" + }).(pulumi.StringOutput) + + ctx.Export("smcpReady", allReady) + + return smcp, allReady, nil +} diff --git a/pkg/target/service/snc/profiles.go b/pkg/target/service/snc/profiles.go index c2be6b640..bd7b46d55 100644 --- a/pkg/target/service/snc/profiles.go +++ b/pkg/target/service/snc/profiles.go @@ -9,11 +9,21 @@ import ( ) const ( - ProfileVirtualization = "virtualization" + ProfileVirtualization = "virtualization" + ProfileServerlessServing = "serverless-serving" + ProfileServerlessEventing = "serverless-eventing" + ProfileServerless = "serverless" + ProfileServiceMesh = "servicemesh" + ProfileOpenShiftAI = "ai" ) // validProfiles is the single source of truth for supported profile names. -var validProfiles = []string{ProfileVirtualization} +var validProfiles = []string{ + ProfileVirtualization, + ProfileServerlessServing, ProfileServerlessEventing, ProfileServerless, + ProfileServiceMesh, + ProfileOpenShiftAI, +} // ProfileDeployArgs holds the arguments needed by a profile to deploy // its resources on the SNC cluster. @@ -24,25 +34,110 @@ type ProfileDeployArgs struct { Deps []pulumi.Resource } -// ValidateProfiles checks that all requested profiles are supported. +// ValidateProfiles checks that all requested profiles are supported and +// that there are no incompatible combinations. func ValidateProfiles(profiles []string) error { for _, p := range profiles { if !slices.Contains(validProfiles, p) { return fmt.Errorf("profile %q is not supported for SNC. Supported profiles: %v", p, validProfiles) } } + // AI uses Service Mesh v2 (Maistra); the servicemesh profile deploys v3 (Sail). + // Both target istio-system and are incompatible on the same cluster. + if slices.Contains(profiles, ProfileOpenShiftAI) && slices.Contains(profiles, ProfileServiceMesh) { + return fmt.Errorf("profiles %q and %q cannot be combined: AI requires Service Mesh v2 while the servicemesh profile deploys v3", + ProfileOpenShiftAI, ProfileServiceMesh) + } return nil } -// DeployProfile deploys the resources for a given profile on the SNC cluster. -// It returns the last resource created for dependency chaining. -func DeployProfile(ctx *pulumi.Context, profile string, args *ProfileDeployArgs) (pulumi.Resource, error) { - switch profile { - case ProfileVirtualization: - return deployVirtualization(ctx, args) - default: - return nil, fmt.Errorf("profile %q has no deploy function", profile) +// DeployProfiles deploys all requested profiles on the SNC cluster. +// It ensures shared dependencies (e.g. the Serverless operator) are only +// installed once, even when multiple profiles require them. +// The AI profile implicitly brings in Service Mesh v2 (Maistra) and +// serverless-serving as prerequisites for Kserve. +func DeployProfiles(ctx *pulumi.Context, profiles []string, args *ProfileDeployArgs) error { + needVirtualization := false + needServing := false + needEventing := false + needServiceMesh := false + needAI := false + + for _, p := range profiles { + switch p { + case ProfileVirtualization: + needVirtualization = true + case ProfileServerlessServing: + needServing = true + case ProfileServerlessEventing: + needEventing = true + case ProfileServerless: + needServing = true + needEventing = true + case ProfileServiceMesh: + needServiceMesh = true + case ProfileOpenShiftAI: + needAI = true + // AI requires serverless-serving for Kserve + needServing = true + default: + return fmt.Errorf("profile %q has no deploy function", p) + } + } + + if needVirtualization { + if _, err := deployVirtualization(ctx, args); err != nil { + return err + } + } + + // Collect readiness outputs from prerequisite profiles so that + // dependent profiles (e.g. AI) can wait for them. + var aiPrereqs []pulumi.StringOutput + + if needServiceMesh { + if _, _, err := deployServiceMesh(ctx, args); err != nil { + return err + } + } + + // AI requires Service Mesh v2 (Maistra) — separate from the v3 (Sail) profile + if needAI { + _, smcpReady, err := deployServiceMeshV2(ctx, args) + if err != nil { + return err + } + aiPrereqs = append(aiPrereqs, smcpReady) + } + + if needServing || needEventing { + operatorReady, err := deployServerlessOperator(ctx, args) + if err != nil { + return err + } + if needServing { + _, ksReady, err := deployKnativeServing(ctx, args, operatorReady) + if err != nil { + return err + } + if needAI { + aiPrereqs = append(aiPrereqs, ksReady) + } + } + if needEventing { + if _, err := deployKnativeEventing(ctx, args, operatorReady); err != nil { + return err + } + } + } + + if needAI { + if _, err := deployOpenShiftAI(ctx, args, aiPrereqs); err != nil { + return err + } } + + return nil } // ProfilesRequireNestedVirt returns true if any of the given profiles @@ -50,3 +145,13 @@ func DeployProfile(ctx *pulumi.Context, profile string, args *ProfileDeployArgs) func ProfilesRequireNestedVirt(profiles []string) bool { return slices.Contains(profiles, ProfileVirtualization) } + +// ProfilesMinCPUs returns the minimum number of CPUs required by the +// given set of profiles. If no profile needs extra resources it returns 0 +// (meaning "use the default"). +func ProfilesMinCPUs(profiles []string) int32 { + if slices.Contains(profiles, ProfileOpenShiftAI) { + return 16 + } + return 0 +} diff --git a/tkn/infra-aws-ocp-snc.yaml b/tkn/infra-aws-ocp-snc.yaml index 4f02b3c21..2785a1360 100644 --- a/tkn/infra-aws-ocp-snc.yaml +++ b/tkn/infra-aws-ocp-snc.yaml @@ -126,7 +126,7 @@ spec: description: If this flag is set it will skip the checks for the cluster readiness. In this case the kubeconfig can not be generated. default: 'false' - name: profile - description: Comma-separated list of profiles to install on the cluster (e.g. virtualization). When virtualization is selected, a bare metal instance is used. + description: Comma-separated list of profiles to install on the cluster (e.g. virtualization, serverless-serving, serverless-eventing, serverless, servicemesh, ai). When virtualization is selected, a bare metal instance is used. default: "''" # Metadata params diff --git a/tkn/template/infra-aws-ocp-snc.yaml b/tkn/template/infra-aws-ocp-snc.yaml index 6aab389fd..9d72755e4 100644 --- a/tkn/template/infra-aws-ocp-snc.yaml +++ b/tkn/template/infra-aws-ocp-snc.yaml @@ -126,7 +126,7 @@ spec: description: If this flag is set it will skip the checks for the cluster readiness. In this case the kubeconfig can not be generated. default: 'false' - name: profile - description: Comma-separated list of profiles to install on the cluster (e.g. virtualization). When virtualization is selected, a bare metal instance is used. + description: Comma-separated list of profiles to install on the cluster (e.g. virtualization, serverless-serving, serverless-eventing, serverless, servicemesh, ai). When virtualization is selected, a bare metal instance is used. default: "''" # Metadata params