-
Notifications
You must be signed in to change notification settings - Fork 21
CLOUDP-339878 - add docker caching #360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
cc47ed8
75e9308
b4fa9fe
285c77d
f65be04
e1614d2
fac670c
3a45460
6ddc8ca
288b31b
0e2f0d9
f3b3a75
effb404
8748149
05fd387
fa47edd
4594db5
3599dfb
25f0a80
a916c53
7fb509c
6c804a4
2b5dc48
4cecea6
7723a13
d7c9cc1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
""" | ||
Branch detection and cache scoping utilities for Evergreen CI. | ||
|
||
This module provides functions to detect the current git branch and generate | ||
cache scopes for BuildKit remote cache in different environments (local development, | ||
Evergreen patch builds, Evergreen regular builds). | ||
""" | ||
|
||
import subprocess | ||
from typing import Optional | ||
|
||
|
||
def get_current_branch() -> Optional[str]: | ||
""" | ||
Detect the current git branch for cache scoping. | ||
|
||
In CI environments like Evergreen, git rev-parse --abbrev-ref HEAD returns | ||
auto-generated branch names like evg-pr-testing-<hash>. This function finds the original branch name by | ||
looking for remote branches that point to the current commit. | ||
|
||
:return: branch name or 'master' as fallback | ||
""" | ||
try: | ||
# Find the original branch (same commit, but not the evg-pr-test-* branch which evg creates) | ||
current_commit_result = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True) | ||
current_commit = current_commit_result.stdout.strip() | ||
|
||
# Get all remote branches with their commit hashes | ||
remote_branches_result = subprocess.run( | ||
["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"], | ||
capture_output=True, | ||
text=True, | ||
check=True, | ||
) | ||
|
||
# Find branches that point to the current commit, excluding auto-generated CI branches | ||
for line in remote_branches_result.stdout.strip().split("\n"): | ||
if not line: | ||
continue | ||
parts = line.split() | ||
if len(parts) >= 2: | ||
branch_name, commit_hash = parts[0], parts[1] | ||
if commit_hash == current_commit and not "evg-pr-test" in branch_name: | ||
# Remove 'origin/' prefix | ||
original_branch = branch_name.replace("origin/", "", 1) | ||
if original_branch: | ||
return original_branch | ||
except (subprocess.CalledProcessError, FileNotFoundError): | ||
return "master" | ||
|
||
return "master" | ||
|
||
|
||
def get_cache_scope() -> str: | ||
""" | ||
Get the cache scope for BuildKit remote cache. | ||
|
||
Returns a scope string that combines branch and run information: | ||
- For master branch: returns "master" | ||
- For other branches: returns the branch name (sanitized for use in image tags) | ||
- For patch builds: includes version_id to avoid conflicts | ||
|
||
:return: cache scope string suitable for use in image tags | ||
""" | ||
branch = get_current_branch() | ||
|
||
# Sanitize branch name for use in image tags | ||
# Replace invalid characters with hyphens and convert to lowercase | ||
sanitized_branch = branch.lower() | ||
sanitized_branch = "".join(c if c.isalnum() or c in "-_." else "-" for c in sanitized_branch) | ||
|
||
return sanitized_branch |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# This file is the new Sonar | ||
import base64 | ||
from typing import Dict | ||
from typing import Dict, List, Any | ||
|
||
import boto3 | ||
import docker | ||
|
@@ -9,10 +9,64 @@ | |
from python_on_whales.exceptions import DockerException | ||
|
||
from lib.base_logger import logger | ||
from scripts.release.branch_detection import get_cache_scope, get_current_branch | ||
|
||
DEFAULT_BUILDER_NAME = "multiarch" # Default buildx builder name | ||
|
||
|
||
def ensure_ecr_cache_repository(repository_name: str, region: str = "us-east-1"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if there is none - we should create the repo! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [non blocking] I would like some comments to state a few things that might seem obvious, but that I understood fully because I had to implement some caching in a pipeline recently:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [non blocking] What do you think about moving the cache-related functions to another file ? And keep this file only for core build logic. |
||
ecr_client = boto3.client("ecr", region_name=region) | ||
try: | ||
_ = ecr_client.create_repository(repositoryName=repository_name) | ||
logger.info(f"Successfully created ECR cache repository: {repository_name}") | ||
except ClientError as e: | ||
error_code = e.response['Error']['Code'] | ||
if error_code == 'RepositoryAlreadyExistsException': | ||
logger.info(f"ECR cache repository already exists: {repository_name}") | ||
else: | ||
logger.error(f"Failed to create ECR cache repository {repository_name}: {error_code} - {e}") | ||
raise | ||
|
||
|
||
def build_cache_configuration(base_registry: str) -> tuple[list[Any], dict[str, str]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some gotchas:
|
||
""" | ||
Build cache configuration for branch-scoped BuildKit remote cache. | ||
|
||
Implements the cache strategy: | ||
- Per-image cache repo: …/dev/cache/<image> | ||
- Per-branch run with read precedence: branch → master | ||
- Write to branch scope only | ||
- Use BuildKit registry cache exporter with mode=max, oci-mediatypes=true, image-manifest=true | ||
|
||
:param base_registry: Base registry URL for cache (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cache/mongodb-kubernetes") | ||
""" | ||
cache_scope = get_cache_scope() | ||
|
||
# Build cache references with read precedence: branch → master | ||
cache_from_refs = [] | ||
|
||
# Read precedence: branch → master | ||
branch_ref = f"{base_registry}:{cache_scope}" | ||
master_ref = f"{base_registry}:master" | ||
|
||
# Add to cache_from in order of precedence | ||
if cache_scope != "master": | ||
cache_from_refs.append({"type": "registry", "ref": branch_ref}) | ||
cache_from_refs.append({"type": "registry", "ref": master_ref}) | ||
else: | ||
cache_from_refs.append({"type": "registry", "ref": master_ref}) | ||
|
||
cache_to_refs = { | ||
"type": "registry", | ||
"ref": branch_ref, | ||
"mode": "max", | ||
"oci-mediatypes": "true", | ||
"image-manifest": "true" | ||
} | ||
|
||
return cache_from_refs, cache_to_refs | ||
|
||
|
||
def ecr_login_boto3(region: str, account_id: str): | ||
""" | ||
Fetches an auth token from ECR via boto3 and logs | ||
|
@@ -75,8 +129,8 @@ def ensure_buildx_builder(builder_name: str = DEFAULT_BUILDER_NAME) -> str: | |
def execute_docker_build( | ||
tags: list[str], | ||
dockerfile: str, | ||
path: str, args: | ||
Dict[str, str], | ||
path: str, | ||
args: Dict[str, str], | ||
push: bool, | ||
platforms: list[str], | ||
builder_name: str = DEFAULT_BUILDER_NAME, | ||
|
@@ -102,17 +156,24 @@ def execute_docker_build( | |
# Convert build args to the format expected by python_on_whales | ||
build_args = {k: str(v) for k, v in args.items()} | ||
|
||
cache_from_refs, cache_to_refs = _build_cache(tags) | ||
|
||
logger.info(f"Building image: {tags}") | ||
logger.info(f"Platforms: {platforms}") | ||
logger.info(f"Dockerfile: {dockerfile}") | ||
logger.info(f"Build context: {path}") | ||
logger.info(f"Cache scope: {get_cache_scope()}") | ||
logger.info(f"Current branch: {get_current_branch()}") | ||
logger.info(f"Cache from sources: {len(cache_from_refs)} refs") | ||
logger.info(f"Cache to targets: {len(cache_to_refs)} refs") | ||
logger.debug(f"Build args: {build_args}") | ||
logger.debug(f"Cache from: {cache_from_refs}") | ||
logger.debug(f"Cache to: {cache_to_refs}") | ||
|
||
# Use buildx for multi-platform builds | ||
if len(platforms) > 1: | ||
logger.info(f"Multi-platform build for {len(platforms)} architectures") | ||
|
||
# Build the image using buildx, builder must be already initialized | ||
docker_cmd.buildx.build( | ||
context_path=path, | ||
file=dockerfile, | ||
|
@@ -124,10 +185,36 @@ def execute_docker_build( | |
push=push, | ||
provenance=False, # To not get an untagged image for single platform builds | ||
pull=False, # Don't always pull base images | ||
cache_from=cache_from_refs, | ||
cache_to=cache_to_refs, | ||
) | ||
|
||
logger.info(f"Successfully built {'and pushed' if push else ''} {tags}") | ||
|
||
except Exception as e: | ||
logger.error(f"Failed to build image {tags}: {e}") | ||
raise RuntimeError(f"Failed to build image {tags}: {str(e)}") | ||
|
||
|
||
def _build_cache(tags): | ||
# Filter tags to only include ECR ones (containing ".dkr.ecr.") | ||
ecr_tags = [tag for tag in tags if ".dkr.ecr." in tag] | ||
if not ecr_tags: | ||
return [], {} | ||
primary_tag = ecr_tags[0] | ||
# Extract the repository URL without tag (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes:1.0.0" -> "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes") | ||
repository_url = primary_tag.split(":")[0] if ":" in primary_tag else primary_tag | ||
# Extract just the image name from the repository URL (e.g., "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes" -> "mongodb-kubernetes") | ||
cache_image_name = repository_url.split("/")[-1] | ||
# Base cache repository name | ||
base_cache_repo = f"dev/cache/{cache_image_name}" | ||
# Build branch/arch-scoped cache configuration | ||
base_registry = f"268558157000.dkr.ecr.us-east-1.amazonaws.com/{base_cache_repo}" | ||
|
||
ensure_ecr_cache_repository(base_cache_repo) | ||
|
||
# TODO CLOUDP-335471: use env variables to configure AWS region and account ID | ||
cache_from_refs, cache_to_refs = build_cache_configuration( | ||
base_registry | ||
) | ||
return cache_from_refs, cache_to_refs |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import subprocess | ||
from unittest.mock import MagicMock, patch | ||
|
||
from scripts.release.branch_detection import ( | ||
get_cache_scope, | ||
get_current_branch, | ||
) | ||
|
||
|
||
class TestGetCurrentBranch: | ||
"""Test branch detection logic for different scenarios.""" | ||
|
||
@patch("subprocess.run") | ||
def test_ci_environment_with_original_branch(self, mock_run): | ||
"""Test detection of original branch in CI environment like Evergreen.""" | ||
|
||
# Mock the sequence of git commands | ||
def side_effect(cmd, **kwargs): | ||
if cmd == ["git", "rev-parse", "HEAD"]: | ||
return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) | ||
elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: | ||
return MagicMock( | ||
stdout="origin/master 1234567890abcdef\norigin/add-caching 4cecea664abcd1234567890\norigin/evg-pr-test-12345 4cecea664abcd1234567890\n", | ||
returncode=0, | ||
) | ||
elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: | ||
return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) | ||
return MagicMock(stdout="", returncode=1) | ||
|
||
mock_run.side_effect = side_effect | ||
|
||
result = get_current_branch() | ||
|
||
assert result == "add-caching" | ||
|
||
@patch("subprocess.run") | ||
def test_master_branch_fallback(self, mock_run): | ||
"""Test detection of master branch using fallback method.""" | ||
|
||
# Mock the sequence where sophisticated method fails but fallback works | ||
def side_effect(cmd, **kwargs): | ||
if cmd == ["git", "rev-parse", "HEAD"]: | ||
return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) | ||
elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: | ||
raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback | ||
elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: | ||
return MagicMock(stdout="master\n", returncode=0) | ||
return MagicMock(stdout="", returncode=1) | ||
|
||
mock_run.side_effect = side_effect | ||
|
||
result = get_current_branch() | ||
|
||
assert result == "master" | ||
|
||
@patch("subprocess.run") | ||
def test_detached_head_fallback(self, mock_run): | ||
"""Test detection when in detached HEAD state using fallback.""" | ||
|
||
# Mock the sequence where sophisticated method fails and fallback returns HEAD | ||
def side_effect(cmd, **kwargs): | ||
if cmd == ["git", "rev-parse", "HEAD"]: | ||
return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) | ||
elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: | ||
raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback | ||
elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: | ||
return MagicMock(stdout="HEAD\n", returncode=0) | ||
return MagicMock(stdout="", returncode=1) | ||
|
||
mock_run.side_effect = side_effect | ||
|
||
result = get_current_branch() | ||
|
||
assert result == "master" # fallback to master | ||
|
||
@patch("subprocess.run") | ||
def test_ci_branch_filtered_out_in_fallback(self, mock_run): | ||
"""Test that CI auto-generated branches are filtered out in fallback.""" | ||
|
||
# Mock the sequence where sophisticated method fails and fallback returns CI branch | ||
def side_effect(cmd, **kwargs): | ||
if cmd == ["git", "rev-parse", "HEAD"]: | ||
return MagicMock(stdout="4cecea664abcd1234567890\n", returncode=0) | ||
elif cmd == ["git", "for-each-ref", "--format=%(refname:short) %(objectname)", "refs/remotes/origin"]: | ||
raise subprocess.CalledProcessError(1, "git") # This fails, triggering fallback | ||
elif cmd == ["git", "rev-parse", "--abbrev-ref", "HEAD"]: | ||
return MagicMock(stdout="evg-pr-test-12345\n", returncode=0) | ||
return MagicMock(stdout="", returncode=1) | ||
|
||
mock_run.side_effect = side_effect | ||
|
||
result = get_current_branch() | ||
|
||
assert result == "master" # fallback to master when CI branch is detected | ||
|
||
@patch("subprocess.run") | ||
def test_git_command_fails(self, mock_run): | ||
"""Test fallback when all git commands fail.""" | ||
mock_run.side_effect = subprocess.CalledProcessError(1, "git") | ||
|
||
result = get_current_branch() | ||
|
||
assert result == "master" # fallback to master | ||
|
||
@patch("subprocess.run") | ||
def test_git_not_found(self, mock_run): | ||
"""Test fallback when git is not found.""" | ||
mock_run.side_effect = FileNotFoundError("git not found") | ||
|
||
result = get_current_branch() | ||
|
||
assert result == "master" # fallback to master | ||
|
||
|
||
class TestGetCacheScope: | ||
"""Test cache scope generation for different scenarios.""" | ||
|
||
@patch("scripts.release.branch_detection.get_current_branch") | ||
def test_feature_branch(self, mock_branch): | ||
"""Test cache scope for feature branch.""" | ||
mock_branch.return_value = "feature/new-cache" | ||
|
||
result = get_cache_scope() | ||
|
||
assert result == "feature-new-cache" | ||
|
||
@patch("scripts.release.branch_detection.get_current_branch") | ||
def test_branch_name_sanitization(self, mock_branch): | ||
"""Test branch name sanitization for cache scope.""" | ||
mock_branch.return_value = "Feature/NEW_cache@123" | ||
|
||
result = get_cache_scope() | ||
|
||
assert result == "feature-new_cache-123" | ||
|
||
@patch("scripts.release.branch_detection.get_current_branch") | ||
def test_complex_branch_name(self, mock_branch): | ||
"""Test cache scope for complex branch name with special characters.""" | ||
mock_branch.return_value = "user/feature-123_test.branch" | ||
|
||
result = get_cache_scope() | ||
|
||
assert result == "user-feature-123_test.branch" |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
first run (not cached)
second run (cached)