Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .evergreen-functions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ functions:
params:
env:
SKIP_MINIKUBE_SETUP: ${skip_minikube_setup!|false}
SKIP_INSTALL_REQUIREMENTS: ${skip_install_python_requirements!|true}
working_dir: src/github.com/mongodb/mongodb-kubernetes
add_to_path:
- ${workdir}/bin
Expand Down
2 changes: 0 additions & 2 deletions .evergreen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ tasks:
- func: setup_building_host_minikube
vars:
skip_minikube_setup: true
skip_install_python_requirements: false
- func: download_multi_cluster_binary
vars:
platform: linux/s390x
Expand All @@ -411,7 +410,6 @@ tasks:
- func: setup_building_host_minikube
vars:
skip_minikube_setup: true
skip_install_python_requirements: false
- func: download_multi_cluster_binary
vars:
platform: linux/ppc64le
Expand Down
10 changes: 2 additions & 8 deletions scripts/dev/recreate_python_venv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,8 @@ PYENV_VERSION="${PYTHON_VERSION}" python -m venv venv
source venv/bin/activate
pip install --upgrade pip

skip_requirements="${SKIP_INSTALL_REQUIREMENTS:-false}"
if [[ "${skip_requirements}" != "true" ]]; then
echo "Installing requirements.txt..."
pip install -r requirements.txt
else
echo "Skipping requirements.txt installation."
pip install requests
fi
echo "Installing requirements.txt..."
pip install -r requirements.txt

echo "Python venv was recreated successfully."
echo "Using Python: $(which python) ($(python --version))" >&2
91 changes: 61 additions & 30 deletions scripts/dev/setup_ibm_container_runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,73 @@

set -Eeou pipefail

echo "Cleaning DNF cache..."
sudo dnf clean all && sudo rm -r /var/cache/dnf

echo "Installing/upgrading crun..."
sudo dnf upgrade -y crun --disableplugin=subscription-manager || \
sudo dnf install -y crun --disableplugin=subscription-manager || \
sudo yum upgrade -y crun --disableplugin=subscription-manager || \
sudo yum install -y crun --disableplugin=subscription-manager

if ! crun --version &>/dev/null; then
echo "❌ crun installation failed"
exit 1
echo "Setting up IBM container runtime (rootful podman for minikube)"

# Install crun if not present (OCI runtime for cgroup v2)
if ! command -v crun &>/dev/null; then
echo "Installing crun..."
sudo dnf install -y crun --disableplugin=subscription-manager 2>/dev/null || \
sudo yum install -y crun --disableplugin=subscription-manager 2>/dev/null || \
echo "Warning: Could not install crun"
else
echo "crun already installed: $(crun --version | head -1)"
fi

current_version=$(crun --version | head -n1)
echo "✅ Using crun: ${current_version}"
# Clean up stale container state (safe for shared CI machines)
cleanup_stale_state() {
echo "Cleaning up stale container state..."

# Clean up any existing conflicting configurations
echo "Cleaning up existing container configurations..."
rm -f ~/.config/containers/containers.conf 2>/dev/null || true
sudo rm -f /root/.config/containers/containers.conf 2>/dev/null || true
sudo rm -f /etc/containers/containers.conf 2>/dev/null || true
# Skip if minikube is running
if command -v minikube &>/dev/null && minikube status &>/dev/null 2>&1; then
echo " Minikube running - skipping cleanup"
return 0
fi

crun_path=$(which crun)
echo "Using crun path: ${crun_path}"
# Kill orphaned root conmon processes (PPID=1 means orphaned)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all of this should be handled by https://jira.mongodb.org/browse/DEVPROD-25447

for pid in $(sudo pgrep conmon 2>/dev/null); do
ppid=$(ps -o ppid= -p "${pid}" 2>/dev/null | tr -d ' ')
if [[ "${ppid}" == "1" ]]; then
echo " Killing orphaned conmon ${pid}"
sudo kill -9 "${pid}" 2>/dev/null || true
fi
done

config="[containers]
cgroup_manager = \"cgroupfs\"
# Clean stale lock files (safe since minikube isn't running)
sudo find /run/crun -name "*.lock" -delete 2>/dev/null || true

[engine]
runtime = \"crun\""
# Prune exited containers and dangling volumes
sudo podman container prune -f 2>/dev/null || true
sudo podman volume prune -f 2>/dev/null || true
}

mkdir -p ~/.config/containers
echo "${config}" > ~/.config/containers/containers.conf
cleanup_stale_state

sudo mkdir -p /root/.config/containers
echo "${config}" | sudo tee /root/.config/containers/containers.conf >/dev/null
# Test sudo podman (used by minikube in rootful mode)
echo "Testing sudo podman..."
if ! sudo podman run --rm docker.io/library/alpine:latest echo "sudo podman works" 2>/dev/null; then
echo "Sudo podman not working, resetting..."
sudo podman system reset --force 2>/dev/null || true
sleep 1

if sudo podman run --rm docker.io/library/alpine:latest echo "sudo podman works" 2>/dev/null; then
echo "Sudo podman working after reset"
else
echo "Warning: Sudo podman still not working"
fi
else
echo "Sudo podman working"
fi

# Configure root-level podman
sudo mkdir -p /etc/containers
sudo tee /etc/containers/containers.conf > /dev/null << 'EOF'
[containers]
cgroup_manager = "systemd"

[engine]
runtime = "crun"
EOF

echo "✅ Configured crun"
echo "Container runtime setup complete"
echo " crun: $(crun --version 2>/dev/null | head -1 || echo 'not found')"
echo " podman: $(sudo podman --version)"
1 change: 0 additions & 1 deletion scripts/evergreen/setup_minikube_host.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ run_setup_step() {

# Setup Python environment (needed for AWS CLI pip installation)
export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
export SKIP_INSTALL_REQUIREMENTS=${SKIP_INSTALL_REQUIREMENTS:-true}
run_setup_step "Python Virtual Environment" "scripts/dev/recreate_python_venv.sh"

run_setup_step "AWS CLI Setup" "scripts/evergreen/setup_aws.sh"
Expand Down
4 changes: 3 additions & 1 deletion scripts/funcs/install
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ download_and_install_binary() {

mkdir -p "${dir}"
echo "Downloading ${url}"
curl --retry 5 --retry-delay 3 --retry-all-errors --fail --show-error --max-time 180 --silent -L "${url}" -o "${bin}"
# Use longer timeout (10 min) for large binaries like minikube (~140MB) on slow IBM networks
# Add -C - for resume capability in case of partial downloads
curl --retry 5 --retry-delay 10 --retry-all-errors --fail --show-error --max-time 600 -L "${url}" -o "${bin}"
chmod +x "${bin}"
mv "${bin}" "${dir}"
echo "Installed ${bin} to ${dir}"
Expand Down
85 changes: 47 additions & 38 deletions scripts/minikube/setup_minikube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
source scripts/dev/set_env_context.sh
source scripts/funcs/install


set -Eeou pipefail

set_limits() {
Expand Down Expand Up @@ -48,10 +49,10 @@ setup_local_registry_and_custom_image() {
if [[ "${ARCH}" == "ppc64le" ]]; then
echo ">>> Setting up local registry and custom kicbase image for ppc64le..."

# Check if local registry is running (with fallback for namespace issues)
# Check if local registry is running (use 127.0.0.1 to avoid IPv6 fallback delay)
registry_running=false
if curl -s http://localhost:5000/v2/_catalog >/dev/null 2>&1; then
echo "Registry detected via HTTP check (podman ps failed)"
if curl -s --max-time 5 http://127.0.0.1:5000/v2/_catalog >/dev/null 2>&1; then
echo "Registry detected via HTTP check"
registry_running=true
fi

Expand All @@ -61,15 +62,15 @@ setup_local_registry_and_custom_image() {
# Clean up any existing registry first
sudo podman rm -f registry 2>/dev/null || true

if ! sudo podman run -d -p 5000:5000 --name registry --restart=always docker.io/library/registry:2; then
if ! sudo podman run -d -p 127.0.0.1:5000:5000 --name registry --restart=always docker.io/library/registry:2; then
echo "❌ Failed to start local registry - trying alternative approach"
exit 1
fi

# Wait for registry to be ready
echo "Waiting for registry to be ready..."
for _ in {1..30}; do
if curl -s http://localhost:5000/v2/_catalog >/dev/null 2>&1; then
if curl -s --max-time 5 http://127.0.0.1:5000/v2/_catalog >/dev/null 2>&1; then
break
fi
sleep 1
Expand All @@ -78,31 +79,18 @@ setup_local_registry_and_custom_image() {
echo "✅ Local registry already running"
fi

# Configure podman to trust local registry (both user and root level for minikube)
# Configure podman to trust local registry (rootful only since minikube uses sudo podman)
echo "Configuring registries.conf to trust local registry..."

# User-level config
mkdir -p ~/.config/containers
cat > ~/.config/containers/registries.conf << 'EOF'
[[registry]]
location = "localhost:5000"
insecure = true
EOF

# Root-level config (since minikube uses sudo podman)
sudo mkdir -p /root/.config/containers
sudo tee /root/.config/containers/registries.conf << 'EOF' >/dev/null
[[registry]]
location = "localhost:5000"
insecure = true
EOF
echo "✅ Registry configuration created"

echo "✅ Registry configuration created for both user and root"
custom_image_tag="localhost:5000/kicbase:v0.0.47"

# Determine image tag
custom_image_tag="localhost:5000/kicbase:v0.0.47"
if curl -s http://localhost:5000/v2/kicbase/tags/list | grep -q "v0.0.47"; then
custom_image_tag="localhost:5000/kicbase:v0.0.48"
if curl -s --max-time 5 http://127.0.0.1:5000/v2/kicbase/tags/list | grep -q "v0.0.48"; then
echo "Custom kicbase image already exists in local registry"
return 0
fi
Expand All @@ -113,7 +101,7 @@ EOF
# Build custom kicbase image
mkdir -p "${PROJECT_DIR:-.}/scripts/minikube/kicbase"
cat > "${PROJECT_DIR:-.}/scripts/minikube/kicbase/Dockerfile" << 'EOF'
FROM gcr.io/k8s-minikube/kicbase:v0.0.47
FROM gcr.io/k8s-minikube/kicbase:v0.0.48
RUN if [ "$(uname -m)" = "ppc64le" ]; then \
CRICTL_VERSION="v1.28.0" && \
curl -L "https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICTL_VERSION}/crictl-${CRICTL_VERSION}-linux-ppc64le.tar.gz" \
Expand All @@ -129,6 +117,21 @@ EOF
echo "Failed to build custom image"
return 1
}

# Use 127.0.0.1 to avoid IPv6 issues with podman on ppc64le, we might bind to ipv6 and it might not work
if ! curl -s --max-time 5 http://127.0.0.1:5000/v2/_catalog >/dev/null 2>&1; then
echo "Registry not responding, restarting..."
sudo podman rm -f registry 2>/dev/null || true
sudo podman run -d -p 127.0.0.1:5000:5000 --name registry --restart=always docker.io/library/registry:2
for _ in {1..15}; do
if curl -s --max-time 5 http://127.0.0.1:5000/v2/_catalog >/dev/null 2>&1; then
echo "Registry restarted successfully"
break
fi
sleep 1
done
fi

sudo podman push "${custom_image_tag}" --tls-verify=false || {
echo "Failed to push to registry"
return 1
Expand All @@ -139,9 +142,19 @@ EOF
return 0
}

# Start minikube with podman driver
# Start minikube with podman driver (rootful mode for reliable networking)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minikube requires root podman for changing networking and iptables

start_minikube_cluster() {
echo ">>> Starting minikube cluster with podman driver..."
echo ">>> Starting minikube cluster with podman driver (rootful mode)..."

if "${PROJECT_DIR:-.}/bin/minikube" status &>/dev/null; then
echo "✅ Minikube is already running - verifying health..."
if "${PROJECT_DIR:-.}/bin/minikube" kubectl -- get nodes &>/dev/null; then
echo "✅ Minikube cluster is healthy - skipping setup"
return 0
else
echo "⚠️ Minikube running but unhealthy - will recreate"
fi
fi

# Clean up any existing minikube state to avoid cached configuration issues
echo "Cleaning up any existing minikube state..."
Expand All @@ -153,19 +166,23 @@ start_minikube_cluster() {
echo "Ensuring clean minikube state..."
"${PROJECT_DIR:-.}/bin/minikube" delete 2>/dev/null || true

local start_args=("--driver=podman")
# Clean up stale podman volumes
echo "Cleaning up stale podman volumes..."
sudo podman volume rm -f minikube 2>/dev/null || true
sudo podman network rm -f minikube 2>/dev/null || true

# Use rootful podman - rootless has iptables/CNI issues on ppc64le and s390x
local start_args=("--driver=podman" "--container-runtime=containerd" "--rootless=false")
start_args+=("--cpus=4" "--memory=8g")
start_args+=("--cni=bridge")

if [[ "${ARCH}" == "ppc64le" ]]; then
echo "Using custom kicbase image for ppc64le with crictl..."

start_args+=("--base-image=localhost:5000/kicbase:v0.0.47")
start_args+=("--base-image=localhost:5000/kicbase:v0.0.48")
start_args+=("--insecure-registry=localhost:5000")
fi

# Use default bridge CNI to avoid Docker Hub rate limiting issues
# start_args+=("--cni=bridge")

echo "Starting minikube with args: ${start_args[*]}"
if "${PROJECT_DIR:-.}/bin/minikube" start "${start_args[@]}"; then
echo "✅ Minikube started successfully"
Expand Down Expand Up @@ -194,14 +211,6 @@ else
exit 1
fi

if [[ "${ARCH}" == "ppc64le" ]]; then
echo ""
echo ">>> Note: crictl will be patched into the minikube container after startup"
else
echo ""
echo ">>> Using standard kicbase image (crictl included for x86_64/aarch64/s390x)"
fi

# Start the minikube cluster
start_minikube_cluster

Expand Down