From f721942b5b7f4e93031a994adf647af83fbcf0f6 Mon Sep 17 00:00:00 2001 From: Dan Notestein Date: Tue, 6 Jan 2026 18:47:59 -0500 Subject: [PATCH] Add image cache lookup scripts and templates Add generic scripts for finding pre-built Docker images: - find-last-source-commit.sh: Find most recent commit that changed source files - get-cached-image.sh: Check if Docker image exists for a commit - find-upstream-image.sh: Find images from upstream repos (git fetch + lookup) Add CI template (source_change_detection.gitlab-ci.yml) with: - .detect_source_changes: Detect docs-only/tests-only changes - .find_upstream_image: Look up images from upstream repos - .check_local_image: Check local registry for cached images - Skip rule templates for conditional job execution This generalizes hive's get_image4submodule.sh pattern for use by any repo, and enables downstream repos (clive, wax, etc.) to find upstream images without maintaining submodules. --- CLAUDE.md | 22 ++ docs/image-cache-lookup.md | 292 ++++++++++++++++ scripts/bash/find-last-source-commit.sh | 130 +++++++ scripts/bash/find-upstream-image.sh | 241 +++++++++++++ scripts/bash/get-cached-image.sh | 233 +++++++++++++ .../source_change_detection.gitlab-ci.yml | 326 ++++++++++++++++++ 6 files changed, 1244 insertions(+) create mode 100644 docs/image-cache-lookup.md create mode 100755 scripts/bash/find-last-source-commit.sh create mode 100755 scripts/bash/find-upstream-image.sh create mode 100755 scripts/bash/get-cached-image.sh create mode 100644 templates/source_change_detection.gitlab-ci.yml diff --git a/CLAUDE.md b/CLAUDE.md index 5436423..52a8b1c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -17,6 +17,7 @@ Detailed documentation is available in `docs/`: - `cache-manager.md` - NFS-backed cache system for HAF/hive replay data - `common-ci-images.md` - Docker images, their purposes, and Python versions - `haf-app-testing.md` - Templates for HAF-dependent application testing +- `image-cache-lookup.md` - Scripts for finding pre-built images and avoiding rebuilds ## Validation Commands @@ -59,6 +60,7 @@ Templates are in `templates/` and are included by downstream projects: | `test_jobs.gitlab-ci.yml` | pytest, jmeter, tox test runners | | `python_projects.gitlab-ci.yml` | Python linting/testing | | `haf_app_testing.gitlab-ci.yml` | HAF app change detection, DinD testing, Tavern | +| `source_change_detection.gitlab-ci.yml` | Source change detection, skip rules, upstream image lookup | | `cache-manager.gitlab-ci.yml` | Cache-manager script setup | | `base.gitlab-ci.yml` | Common job defaults | @@ -82,6 +84,26 @@ Located in `scripts/bash/npm-helpers/`: - `npm_publish.sh` - Publish to npm registries - `npm_pack_package.sh` - Create package tarballs +## Image Cache Lookup Scripts + +Located in `scripts/bash/`: +- `find-last-source-commit.sh` - Find the most recent commit that changed source files +- `get-cached-image.sh` - Check if a Docker image exists in a registry for a commit +- `find-upstream-image.sh` - Find pre-built images from upstream repos (combines git fetch + lookup) +- `docker-image-utils.sh` - Utility functions for Docker image operations + +See `docs/image-cache-lookup.md` for full documentation. + +**Example: Downstream repo finding upstream image:** +```bash +# Find latest hive image without maintaining a submodule +find-upstream-image.sh \ + --repo-url=https://gitlab.syncad.com/hive/hive.git \ + --registry=registry.gitlab.syncad.com/hive/hive \ + --patterns="libraries/,programs/,CMakeLists.txt,Dockerfile" +# Outputs: UPSTREAM_IMAGE=registry.../hive:abc12345 +``` + ## Architecture Notes **emsdk image** (`Dockerfile.emscripten`): Contains Emscripten toolchain with Node.js, pnpm, and pre-compiled WASM dependencies (Boost, OpenSSL, secp256k1). Used by wax and other WASM projects. diff --git a/docs/image-cache-lookup.md b/docs/image-cache-lookup.md new file mode 100644 index 0000000..57ba096 --- /dev/null +++ b/docs/image-cache-lookup.md @@ -0,0 +1,292 @@ +# Image Cache Lookup + +This document describes the scripts and templates for finding pre-built Docker images, avoiding unnecessary rebuilds, and looking up images from upstream repositories. + +## Overview + +The image cache lookup system provides: + +1. **Build avoidance** - Skip building images when source code hasn't changed +2. **Cross-repo lookup** - Find pre-built images from upstream repositories (e.g., clive finding hive images) +3. **Change detection** - Automatically detect what type of files changed and skip unnecessary jobs + +## Scripts + +### find-last-source-commit.sh + +Finds the most recent commit that changed any of the specified source file patterns. + +```bash +# Find last commit that changed C++ source files +find-last-source-commit.sh "libraries/" "programs/" "CMakeLists.txt" "Dockerfile" + +# Find in a specific directory with full hash +find-last-source-commit.sh --dir=/path/to/repo --full "src/" "Dockerfile" + +# Quiet mode (only outputs commit hash) +find-last-source-commit.sh --quiet "src/" +``` + +**Options:** +- `--dir=PATH` - Directory to search in (default: current directory) +- `--abbrev=N` - Abbreviate commit to N characters (default: 8) +- `--full` - Output full 40-character hash +- `--quiet` - Only output the commit hash + +### get-cached-image.sh + +Checks if a Docker image exists in a registry for a given commit. + +```bash +# Check if image exists +get-cached-image.sh --commit=abc12345 --registry=registry.gitlab.syncad.com/hive/hive + +# Check for a specific image variant (e.g., testnet) +get-cached-image.sh --commit=abc12345 --registry=registry.gitlab.syncad.com/hive/hive --image=testnet + +# Use commit from environment variable +export HIVE_COMMIT=abc12345 +get-cached-image.sh --commit-var=HIVE_COMMIT --registry=registry.gitlab.syncad.com/hive/hive + +# Require image to exist (exit with error if not found) +get-cached-image.sh --commit=abc12345 --registry=... --require-hit +``` + +**Options:** +- `--commit=HASH` - Commit hash to look up +- `--commit-var=NAME` - Environment variable containing commit hash +- `--registry=URL` - Docker registry URL +- `--image=NAME` - Image name within registry (optional) +- `--output=FILE` - Output env file (default: image-cache.env) +- `--require-hit` - Exit with error if image not found + +**Output (image-cache.env):** +```bash +CACHE_HIT=true +IMAGE_COMMIT=abc12345def67890... +IMAGE_TAG=abc12345 +IMAGE_NAME=registry.gitlab.syncad.com/hive/hive:abc12345 +IMAGE_REGISTRY=registry.gitlab.syncad.com/hive/hive +``` + +### find-upstream-image.sh + +Combines git fetch and image lookup for finding images from upstream repositories. + +```bash +# Find latest hive image for use by clive +find-upstream-image.sh \ + --repo-url=https://gitlab.syncad.com/hive/hive.git \ + --registry=registry.gitlab.syncad.com/hive/hive \ + --patterns="libraries/,programs/,CMakeLists.txt,Dockerfile,cmake/,.gitmodules" + +# Find testnet image from specific branch +find-upstream-image.sh \ + --repo-url=https://gitlab.syncad.com/hive/hive.git \ + --registry=registry.gitlab.syncad.com/hive/hive \ + --image=testnet \ + --branch=develop \ + --patterns="libraries/,programs/" +``` + +**Options:** +- `--repo-url=URL` - Git URL of upstream repo +- `--registry=URL` - Docker registry URL +- `--patterns=LIST` - Comma-separated source file patterns +- `--branch=NAME` - Branch to check (default: develop) +- `--depth=N` - Git fetch depth (default: 100) +- `--image=NAME` - Image name within registry +- `--require-hit` - Exit with error if image not found + +**Output (upstream-image.env):** +```bash +UPSTREAM_BRANCH=develop +UPSTREAM_CACHE_HIT=true +UPSTREAM_COMMIT=abc12345 +UPSTREAM_TAG=abc12345 +UPSTREAM_IMAGE=registry.gitlab.syncad.com/hive/hive:abc12345 +UPSTREAM_REGISTRY=registry.gitlab.syncad.com/hive/hive +``` + +## CI Templates + +Include the template in your `.gitlab-ci.yml`: + +```yaml +include: + - project: 'hive/common-ci-configuration' + ref: develop + file: '/templates/source_change_detection.gitlab-ci.yml' +``` + +### Change Detection + +Detects what type of files changed and sets variables for conditional job execution: + +```yaml +variables: + # Customize patterns for your project + SOURCE_CODE_PATTERNS: "^(libraries/|programs/|CMakeLists\\.txt|cmake/|Dockerfile)" + DOCS_PATTERNS: "^(.*\\.md|doc/|\\.gitignore|CODEOWNERS)" + CI_SCRIPT_PATTERNS: "^(\\.gitlab-ci\\.yaml|scripts/ci/)" + TEST_PATTERNS: "^(tests/)" + +detect_changes: + extends: .detect_source_changes +``` + +**Output variables:** +- `DOCS_ONLY=true` - Only documentation files changed +- `TESTS_ONLY=true` - Only test files changed (no source or CI) +- `SOURCE_CHANGED=true` - Source code files changed +- `CI_CHANGED=true` - CI scripts changed + +### Skip Rules + +Use the provided rule templates to skip jobs conditionally: + +```yaml +build: + extends: .skip_build_on_non_source_changes + script: + - make build + +test: + extends: .skip_test_on_docs_only + script: + - make test + +expensive_test: + extends: .manual_on_feature_branches + script: + - make expensive-test +``` + +### Upstream Image Lookup + +Find pre-built images from upstream repositories: + +```yaml +find_hive_image: + extends: .find_upstream_image + variables: + UPSTREAM_REPO_URL: "https://gitlab.syncad.com/hive/hive.git" + UPSTREAM_REGISTRY: "registry.gitlab.syncad.com/hive/hive" + UPSTREAM_BRANCH: "develop" + UPSTREAM_PATTERNS: "libraries/,programs/,CMakeLists.txt,Dockerfile,cmake/,.gitmodules" + UPSTREAM_IMAGE: "testnet" # optional + +use_hive_image: + needs: [find_hive_image] + script: + - echo "Using hive image: $UPSTREAM_IMAGE" + - docker pull "$UPSTREAM_IMAGE" +``` + +### Local Image Check + +Check if an image already exists in your own registry: + +```yaml +check_my_image: + extends: .check_local_image + variables: + LOCAL_REGISTRY: "${CI_REGISTRY_IMAGE}" + LOCAL_PATTERNS: "src/,Dockerfile,CMakeLists.txt" + +build: + needs: [check_my_image] + script: + - | + if [ "$CACHE_HIT" = "true" ]; then + echo "Image already exists: $IMAGE_NAME" + docker pull "$IMAGE_NAME" + else + echo "Building new image..." + docker build -t "$IMAGE_NAME" . + docker push "$IMAGE_NAME" + fi +``` + +## Use Cases + +### 1. Building Repo (hive) + +Hive uses the scripts to avoid rebuilding when only tests/docs change: + +```yaml +variables: + SOURCE_CODE_PATTERNS: "^(libraries/|programs/|CMakeLists\\.txt|cmake/|Dockerfile|docker/)" + +detect_changes: + extends: .detect_source_changes + +check_image: + extends: .check_local_image + variables: + LOCAL_PATTERNS: "libraries/,programs/,CMakeLists.txt,cmake/,Dockerfile,docker/" + +build_hived: + extends: .skip_build_on_non_source_changes + needs: [detect_changes, check_image] + script: + - | + if [ "$CACHE_HIT" = "true" ]; then + echo "Reusing existing image: $IMAGE_NAME" + else + ./scripts/ci-helpers/build_instance.sh + fi +``` + +### 2. Downstream Repo (clive) + +Clive looks up the latest hive image without maintaining a submodule: + +```yaml +find_hive: + extends: .find_upstream_image + variables: + UPSTREAM_REPO_URL: "https://gitlab.syncad.com/hive/hive.git" + UPSTREAM_REGISTRY: "registry.gitlab.syncad.com/hive/hive" + UPSTREAM_PATTERNS: "libraries/,programs/,CMakeLists.txt,Dockerfile" + +build_clive: + needs: [find_hive] + script: + - echo "Building with hive: $UPSTREAM_IMAGE" + - docker pull "$UPSTREAM_IMAGE" + - ./build.sh --hive-image="$UPSTREAM_IMAGE" +``` + +### 3. HAF App (balance_tracker) + +HAF apps can find both hive and HAF images: + +```yaml +find_haf: + extends: .find_upstream_image + variables: + UPSTREAM_REPO_URL: "https://gitlab.syncad.com/hive/haf.git" + UPSTREAM_REGISTRY: "registry.gitlab.syncad.com/hive/haf" + UPSTREAM_PATTERNS: "src/,hive/,CMakeLists.txt,Dockerfile" + UPSTREAM_OUTPUT: "haf-image.env" + +sync: + needs: [find_haf] + script: + - source haf-image.env + - echo "Using HAF: $UPSTREAM_IMAGE" +``` + +## Migration from Hive Scripts + +If your repo currently uses hive's `get_image4submodule.sh` or similar scripts: + +1. **Replace `retrieve_last_commit.sh`** with `find-last-source-commit.sh` +2. **Replace `docker_image_utils.sh`** - already exists in common-ci-configuration +3. **Replace `get_image4submodule.sh`** with a combination of: + - `find-last-source-commit.sh` (find commit) + - `get-cached-image.sh` (check registry) +4. **Replace submodule-based lookups** with `find-upstream-image.sh` + +The new scripts are more flexible and work both for same-repo and cross-repo lookups. diff --git a/scripts/bash/find-last-source-commit.sh b/scripts/bash/find-last-source-commit.sh new file mode 100755 index 0000000..b4b5268 --- /dev/null +++ b/scripts/bash/find-last-source-commit.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# find-last-source-commit.sh - Find the most recent commit that changed source files +# +# This script finds the commit hash of the most recent change to any of the +# specified file patterns. Used to determine if a rebuild is needed by comparing +# the last source-changing commit to available cached images. +# +# Usage: +# find-last-source-commit.sh [OPTIONS] PATTERN [PATTERN...] +# +# Options: +# --dir=PATH Directory to search in (default: current directory) +# --abbrev=N Abbreviate commit hash to N characters (default: 8, use 40 for full) +# --full Output full 40-character commit hash (same as --abbrev=40) +# --quiet Only output the commit hash, no status messages +# --help Show this help message +# +# Examples: +# # Find last commit that changed C++ source files +# find-last-source-commit.sh "libraries/" "programs/" "CMakeLists.txt" +# +# # Find last commit in a specific directory with full hash +# find-last-source-commit.sh --dir=/path/to/repo --full "src/" "Dockerfile" +# +# # Use with patterns file +# find-last-source-commit.sh $(cat .source-patterns) +# +# Output: +# Prints the (abbreviated) commit hash to stdout +# Exit code 0 on success, 1 if no matching commits found +# +# Environment: +# SOURCE_COMMIT_DIR Alternative to --dir +# SOURCE_COMMIT_ABBREV Alternative to --abbrev +# + +set -euo pipefail + +# Defaults +DIR="${SOURCE_COMMIT_DIR:-.}" +ABBREV="${SOURCE_COMMIT_ABBREV:-8}" +QUIET="${QUIET:-false}" +PATTERNS=() + +print_help() { + sed -n '2,/^[^#]/p' "$0" | grep "^#" | sed 's/^# \?//' +} + +log() { + if [[ "$QUIET" != "true" ]]; then + echo "$@" >&2 + fi +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --dir=*) + DIR="${1#*=}" + ;; + --abbrev=*) + ABBREV="${1#*=}" + ;; + --full) + ABBREV=40 + ;; + --quiet) + QUIET=true + ;; + --help|-h) + print_help + exit 0 + ;; + -*) + echo "Error: Unknown option: $1" >&2 + print_help + exit 2 + ;; + *) + PATTERNS+=("$1") + ;; + esac + shift +done + +# Validate inputs +if [[ ${#PATTERNS[@]} -eq 0 ]]; then + echo "Error: At least one pattern is required" >&2 + print_help + exit 2 +fi + +if [[ ! -d "$DIR" ]]; then + echo "Error: Directory not found: $DIR" >&2 + exit 1 +fi + +# Change to target directory +cd "$DIR" + +# Verify it's a git repository +if ! git rev-parse --git-dir >/dev/null 2>&1; then + echo "Error: Not a git repository: $DIR" >&2 + exit 1 +fi + +log "Searching for last commit that changed: ${PATTERNS[*]}" + +# Find the most recent commit that changed any of the patterns +COMMIT=$(git log --pretty=format:"%H" -n 1 -- "${PATTERNS[@]}" 2>/dev/null || true) + +if [[ -z "$COMMIT" ]]; then + log "Warning: No commits found matching patterns" + # Fall back to HEAD if no matching commits (new repo or patterns don't match history) + COMMIT=$(git rev-parse HEAD) + log "Using HEAD: $COMMIT" +fi + +# Abbreviate if requested +if [[ "$ABBREV" -lt 40 ]]; then + SHORT_COMMIT=$(git -c core.abbrev="$ABBREV" rev-parse --short "$COMMIT") +else + SHORT_COMMIT="$COMMIT" +fi + +log "Found commit: $SHORT_COMMIT (from $COMMIT)" + +# Output just the commit hash +echo "$SHORT_COMMIT" diff --git a/scripts/bash/find-upstream-image.sh b/scripts/bash/find-upstream-image.sh new file mode 100755 index 0000000..966c23b --- /dev/null +++ b/scripts/bash/find-upstream-image.sh @@ -0,0 +1,241 @@ +#!/bin/bash +# +# find-upstream-image.sh - Find the latest built image from an upstream repo +# +# This script fetches an upstream repository, finds the last commit that changed +# source files, and checks if a Docker image exists for that commit. Used by +# downstream repos to find pre-built images from their dependencies. +# +# Usage: +# find-upstream-image.sh [OPTIONS] +# +# Required Options: +# --repo-url=URL Git URL of upstream repo (e.g., https://gitlab.syncad.com/hive/hive.git) +# --registry=URL Docker registry URL (e.g., registry.gitlab.syncad.com/hive/hive) +# --patterns=LIST Comma-separated list of source file patterns (e.g., "libraries/,programs/,Dockerfile") +# +# Optional: +# --branch=NAME Branch to check (default: develop) +# --depth=N Git fetch depth (default: 100) +# --image=NAME Image name within registry (default: none) +# --output=FILE Output env file (default: upstream-image.env) +# --work-dir=PATH Working directory for git clone (default: /tmp/upstream-repo-$$) +# --keep-repo Don't delete cloned repo after completion +# --require-hit Exit with error if image not found +# --quiet Suppress status messages +# --help Show this help message +# +# Examples: +# # Find latest hive image for clive +# find-upstream-image.sh \ +# --repo-url=https://gitlab.syncad.com/hive/hive.git \ +# --registry=registry.gitlab.syncad.com/hive/hive \ +# --patterns="libraries/,programs/,CMakeLists.txt,Dockerfile,cmake/,.gitmodules" +# +# # Find testnet image from specific branch +# find-upstream-image.sh \ +# --repo-url=https://gitlab.syncad.com/hive/hive.git \ +# --registry=registry.gitlab.syncad.com/hive/hive \ +# --image=testnet \ +# --branch=develop \ +# --patterns="libraries/,programs/" +# +# Output Environment File (upstream-image.env): +# UPSTREAM_CACHE_HIT=true|false Whether image was found +# UPSTREAM_COMMIT= Commit hash of last source change +# UPSTREAM_TAG= Docker image tag +# UPSTREAM_IMAGE= Full image name with tag +# UPSTREAM_REGISTRY= Registry path without tag +# UPSTREAM_BRANCH= Branch that was checked +# +# Exit Codes: +# 0 - Success +# 1 - Image not found (with --require-hit) or git/docker error +# 2 - Invalid arguments +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Defaults +REPO_URL="" +REGISTRY="" +PATTERNS="" +BRANCH="develop" +DEPTH=100 +IMAGE="" +OUTPUT_FILE="upstream-image.env" +WORK_DIR="" +KEEP_REPO=false +REQUIRE_HIT=false +QUIET="${QUIET:-false}" + +print_help() { + sed -n '2,/^[^#]/p' "$0" | grep "^#" | sed 's/^# \?//' +} + +log() { + if [[ "$QUIET" != "true" ]]; then + echo "$@" >&2 + fi +} + +error() { + echo "Error: $*" >&2 +} + +# shellcheck disable=SC2329 # Function is invoked via trap +cleanup() { + if [[ "$KEEP_REPO" != "true" && -n "$WORK_DIR" && -d "$WORK_DIR" ]]; then + log "Cleaning up: $WORK_DIR" + rm -rf "$WORK_DIR" + fi +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --repo-url=*) + REPO_URL="${1#*=}" + ;; + --registry=*) + REGISTRY="${1#*=}" + ;; + --patterns=*) + PATTERNS="${1#*=}" + ;; + --branch=*) + BRANCH="${1#*=}" + ;; + --depth=*) + DEPTH="${1#*=}" + ;; + --image=*) + IMAGE="${1#*=}" + ;; + --output=*) + OUTPUT_FILE="${1#*=}" + ;; + --work-dir=*) + WORK_DIR="${1#*=}" + ;; + --keep-repo) + KEEP_REPO=true + ;; + --require-hit) + REQUIRE_HIT=true + ;; + --quiet) + QUIET=true + ;; + --help|-h) + print_help + exit 0 + ;; + *) + error "Unknown option: $1" + print_help + exit 2 + ;; + esac + shift +done + +# Validate required arguments +if [[ -z "$REPO_URL" ]]; then + error "--repo-url=URL is required" + exit 2 +fi + +if [[ -z "$REGISTRY" ]]; then + error "--registry=URL is required" + exit 2 +fi + +if [[ -z "$PATTERNS" ]]; then + error "--patterns=LIST is required" + exit 2 +fi + +# Set default work directory +if [[ -z "$WORK_DIR" ]]; then + WORK_DIR="/tmp/upstream-repo-$$" +fi + +# Setup cleanup trap +trap cleanup EXIT + +# Convert comma-separated patterns to array +IFS=',' read -ra PATTERN_ARRAY <<< "$PATTERNS" + +log "Fetching upstream repo: $REPO_URL (branch: $BRANCH)" + +# Clone the repository (shallow, single branch) +if [[ -d "$WORK_DIR" ]]; then + log "Removing existing work directory: $WORK_DIR" + rm -rf "$WORK_DIR" +fi + +git clone --depth="$DEPTH" --branch="$BRANCH" --single-branch "$REPO_URL" "$WORK_DIR" 2>&1 | \ + while IFS= read -r line; do log " $line"; done + +# Find last source commit +log "Finding last source commit for patterns: ${PATTERN_ARRAY[*]}" + +FIND_COMMIT_ARGS=(--dir="$WORK_DIR" --quiet) +COMMIT=$("$SCRIPT_DIR/find-last-source-commit.sh" "${FIND_COMMIT_ARGS[@]}" "${PATTERN_ARRAY[@]}") + +if [[ -z "$COMMIT" ]]; then + error "Failed to find source commit" + exit 1 +fi + +log "Found last source commit: $COMMIT" + +# Check if image exists +log "Checking for image in registry: $REGISTRY" + +GET_IMAGE_ARGS=( + --commit="$COMMIT" + --registry="$REGISTRY" + --output="$OUTPUT_FILE.tmp" +) + +if [[ -n "$IMAGE" ]]; then + GET_IMAGE_ARGS+=(--image="$IMAGE") +fi + +if [[ "$QUIET" == "true" ]]; then + GET_IMAGE_ARGS+=(--quiet) +fi + +if [[ "$REQUIRE_HIT" == "true" ]]; then + GET_IMAGE_ARGS+=(--require-hit) +fi + +"$SCRIPT_DIR/get-cached-image.sh" "${GET_IMAGE_ARGS[@]}" +GET_RESULT=$? + +# Read the temp output and rewrite with UPSTREAM_ prefix +if [[ -f "$OUTPUT_FILE.tmp" ]]; then + { + # Add branch info + echo "UPSTREAM_BRANCH=$BRANCH" + # Rename variables with UPSTREAM_ prefix + sed 's/^CACHE_HIT=/UPSTREAM_CACHE_HIT=/; + s/^IMAGE_COMMIT=/UPSTREAM_COMMIT=/; + s/^IMAGE_TAG=/UPSTREAM_TAG=/; + s/^IMAGE_NAME=/UPSTREAM_IMAGE=/; + s/^IMAGE_REGISTRY=/UPSTREAM_REGISTRY=/' "$OUTPUT_FILE.tmp" + } > "$OUTPUT_FILE" + rm -f "$OUTPUT_FILE.tmp" +fi + +log "" +log "Output written to: $OUTPUT_FILE" +if [[ "$QUIET" != "true" ]]; then + cat "$OUTPUT_FILE" >&2 +fi + +exit $GET_RESULT diff --git a/scripts/bash/get-cached-image.sh b/scripts/bash/get-cached-image.sh new file mode 100755 index 0000000..f9a3390 --- /dev/null +++ b/scripts/bash/get-cached-image.sh @@ -0,0 +1,233 @@ +#!/bin/bash +# +# get-cached-image.sh - Check if a Docker image exists for a given commit +# +# This script checks if a Docker image tagged with a specific commit hash exists +# in a container registry. Used by both building repos (to avoid rebuilds) and +# downstream repos (to find pre-built images from upstream). +# +# Usage: +# get-cached-image.sh [OPTIONS] +# +# Required Options (one of): +# --commit=HASH Commit hash to look up (can be short or full) +# --commit-var=NAME Environment variable containing commit hash +# +# Required Options: +# --registry=URL Registry URL (e.g., registry.gitlab.syncad.com/hive/hive) +# +# Optional: +# --image=NAME Image name within registry (default: none, uses registry directly) +# --output=FILE Output file for environment variables (default: image-cache.env) +# --tag-prefix=PREFIX Prefix for tags (default: none) +# --tag-suffix=SUFFIX Suffix for tags (default: none) +# --abbrev=N Abbreviate commit to N chars for tag (default: 8) +# --require-hit Exit with error if image not found (default: exit 0 with CACHE_HIT=false) +# --quiet Suppress status messages +# --help Show this help message +# +# Examples: +# # Check if hive image exists for a commit +# get-cached-image.sh --commit=abc12345 --registry=registry.gitlab.syncad.com/hive/hive +# +# # Check for testnet variant +# get-cached-image.sh --commit=abc12345 --registry=registry.gitlab.syncad.com/hive/hive --image=testnet +# +# # Use commit from environment variable +# export HIVE_COMMIT=abc12345 +# get-cached-image.sh --commit-var=HIVE_COMMIT --registry=registry.gitlab.syncad.com/hive/hive +# +# # Downstream repo looking up upstream image +# HIVE_COMMIT=$(find-last-source-commit.sh --dir=/tmp/hive libraries/ programs/) +# get-cached-image.sh --commit=$HIVE_COMMIT --registry=registry.gitlab.syncad.com/hive/hive +# +# Output Environment File (image-cache.env): +# CACHE_HIT=true|false Whether image was found +# IMAGE_COMMIT= Full commit hash +# IMAGE_TAG= Tag used for lookup (may be abbreviated) +# IMAGE_NAME= Full image name with tag (registry/image:tag) +# IMAGE_REGISTRY= Registry path without tag +# +# Exit Codes: +# 0 - Success (image found or not found without --require-hit) +# 1 - Error (image not found with --require-hit, or other error) +# 2 - Invalid arguments +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source docker utilities if available +if [[ -f "$SCRIPT_DIR/docker-image-utils.sh" ]]; then + # shellcheck source=./docker-image-utils.sh disable=SC1091 + source "$SCRIPT_DIR/docker-image-utils.sh" +fi + +# Defaults +COMMIT="" +COMMIT_VAR="" +REGISTRY="" +IMAGE="" +OUTPUT_FILE="image-cache.env" +TAG_PREFIX="" +TAG_SUFFIX="" +ABBREV=8 +REQUIRE_HIT=false +QUIET="${QUIET:-false}" + +print_help() { + sed -n '2,/^[^#]/p' "$0" | grep "^#" | sed 's/^# \?//' +} + +log() { + if [[ "$QUIET" != "true" ]]; then + echo "$@" >&2 + fi +} + +error() { + echo "Error: $*" >&2 +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + case "$1" in + --commit=*) + COMMIT="${1#*=}" + ;; + --commit-var=*) + COMMIT_VAR="${1#*=}" + ;; + --registry=*) + REGISTRY="${1#*=}" + ;; + --image=*) + IMAGE="${1#*=}" + ;; + --output=*) + OUTPUT_FILE="${1#*=}" + ;; + --tag-prefix=*) + TAG_PREFIX="${1#*=}" + ;; + --tag-suffix=*) + TAG_SUFFIX="${1#*=}" + ;; + --abbrev=*) + ABBREV="${1#*=}" + ;; + --require-hit) + REQUIRE_HIT=true + ;; + --quiet) + QUIET=true + ;; + --help|-h) + print_help + exit 0 + ;; + *) + error "Unknown option: $1" + print_help + exit 2 + ;; + esac + shift +done + +# Get commit from variable if specified +if [[ -n "$COMMIT_VAR" ]]; then + COMMIT="${!COMMIT_VAR:-}" + if [[ -z "$COMMIT" ]]; then + error "Environment variable $COMMIT_VAR is not set or empty" + exit 2 + fi +fi + +# Validate required arguments +if [[ -z "$COMMIT" ]]; then + error "Either --commit=HASH or --commit-var=NAME is required" + print_help + exit 2 +fi + +if [[ -z "$REGISTRY" ]]; then + error "--registry=URL is required" + print_help + exit 2 +fi + +# Store full commit (might already be abbreviated, that's ok) +FULL_COMMIT="$COMMIT" + +# Abbreviate commit for tag if longer than requested +if [[ ${#COMMIT} -gt $ABBREV ]]; then + TAG_COMMIT="${COMMIT:0:$ABBREV}" +else + TAG_COMMIT="$COMMIT" +fi + +# Build the tag +TAG="${TAG_PREFIX}${TAG_COMMIT}${TAG_SUFFIX}" + +# Build full image name +# Remove trailing slash from registry if present +REGISTRY="${REGISTRY%/}" + +if [[ -n "$IMAGE" ]]; then + IMAGE_REGISTRY="${REGISTRY}/${IMAGE}" + IMAGE_NAME="${REGISTRY}/${IMAGE}:${TAG}" +else + IMAGE_REGISTRY="${REGISTRY}" + IMAGE_NAME="${REGISTRY}:${TAG}" +fi + +log "Checking for image: $IMAGE_NAME" + +# Check if image exists in registry +CACHE_HIT=false + +# Save current set -e state and disable temporarily +OLD_SET_E=0 +[[ $- == *e* ]] && OLD_SET_E=1 +set +e + +docker manifest inspect "$IMAGE_NAME" >/dev/null 2>&1 +RESULT=$? + +# Restore set -e if it was enabled +((OLD_SET_E)) && set -e + +if [[ $RESULT -eq 0 ]]; then + CACHE_HIT=true + log "Image found: $IMAGE_NAME" +else + log "Image not found: $IMAGE_NAME" +fi + +# Write output environment file +log "Writing output to: $OUTPUT_FILE" +cat > "$OUTPUT_FILE" << EOF +CACHE_HIT=$CACHE_HIT +IMAGE_COMMIT=$FULL_COMMIT +IMAGE_TAG=$TAG +IMAGE_NAME=$IMAGE_NAME +IMAGE_REGISTRY=$IMAGE_REGISTRY +EOF + +# Also output to stdout for easy capture +log "Results:" +log " CACHE_HIT=$CACHE_HIT" +log " IMAGE_COMMIT=$FULL_COMMIT" +log " IMAGE_TAG=$TAG" +log " IMAGE_NAME=$IMAGE_NAME" +log " IMAGE_REGISTRY=$IMAGE_REGISTRY" + +# Handle require-hit mode +if [[ "$REQUIRE_HIT" == "true" && "$CACHE_HIT" == "false" ]]; then + error "Required image not found: $IMAGE_NAME" + exit 1 +fi + +exit 0 diff --git a/templates/source_change_detection.gitlab-ci.yml b/templates/source_change_detection.gitlab-ci.yml new file mode 100644 index 0000000..4c119d2 --- /dev/null +++ b/templates/source_change_detection.gitlab-ci.yml @@ -0,0 +1,326 @@ +# Source Change Detection Templates +# +# Provides reusable templates for detecting what type of files changed in a commit +# and skipping unnecessary builds/tests. Also provides templates for finding +# pre-built images from upstream repositories. +# +# Include this file in your .gitlab-ci.yml: +# include: +# - project: 'hive/common-ci-configuration' +# ref: develop +# file: '/templates/source_change_detection.gitlab-ci.yml' +# +# Features: +# 1. Detect docs-only changes (skip builds and tests) +# 2. Detect tests-only changes (skip builds, reuse cached images) +# 3. Find pre-built images from upstream repos +# 4. Skip rules for conditional job execution +# +# Required Variables (set in your CI): +# SOURCE_CODE_PATTERNS - Regex patterns for source files (pipe-separated) +# DOCS_PATTERNS - Regex patterns for documentation files (pipe-separated) +# CI_SCRIPT_PATTERNS - Regex patterns for CI scripts (pipe-separated) +# +# Example Configuration: +# variables: +# SOURCE_CODE_PATTERNS: "^(libraries/|programs/|CMakeLists\\.txt|cmake/|Dockerfile)" +# DOCS_PATTERNS: "^(.*\\.md|doc/|\\.gitignore|CODEOWNERS)" +# CI_SCRIPT_PATTERNS: "^(\\.gitlab-ci\\.yaml|scripts/ci/)" + +variables: + # Default patterns (override in your CI) + SOURCE_CODE_PATTERNS: "^(src/|lib/|Dockerfile|CMakeLists\\.txt|cmake/)" + DOCS_PATTERNS: "^(.*\\.md|doc/|docs/|\\.gitignore|\\.gitattributes|\\.dockerignore|\\.editorconfig|CODEOWNERS|\\.mailmap|LICENSE|COPYING)" + CI_SCRIPT_PATTERNS: "^(\\.gitlab-ci\\.yaml|\\.gitlab-ci\\.yml|scripts/ci/|\\.ci/)" + TEST_PATTERNS: "^(tests/|test/|spec/|__tests__/)" + + # Control variables (can be set when running pipeline) + FORCE_FULL_PIPELINE: "false" + QUICK_TEST: "false" + + # Output variables (set by detect_source_changes job) + DOCS_ONLY: "false" + TESTS_ONLY: "false" + SOURCE_CHANGED: "false" + CI_CHANGED: "false" + +# ============================================================================ +# Change Detection Job Template +# ============================================================================ + +# Detects what type of files changed and exports variables for other jobs +# Extend this in your CI and customize the patterns as needed +.detect_source_changes: + stage: .pre + image: alpine:latest + needs: [] + variables: + GIT_DEPTH: 50 + before_script: + - apk add --no-cache git + script: + - | + echo "=== Source Change Detection ===" + echo "SOURCE_CODE_PATTERNS: $SOURCE_CODE_PATTERNS" + echo "DOCS_PATTERNS: $DOCS_PATTERNS" + echo "CI_SCRIPT_PATTERNS: $CI_SCRIPT_PATTERNS" + echo "TEST_PATTERNS: $TEST_PATTERNS" + echo "" + + # Determine base commit to compare against + if [ -n "$CI_MERGE_REQUEST_DIFF_BASE_SHA" ]; then + BASE_SHA="$CI_MERGE_REQUEST_DIFF_BASE_SHA" + echo "Using MR diff base: $BASE_SHA" + elif [ -n "$CI_COMMIT_BEFORE_SHA" ] && [ "$CI_COMMIT_BEFORE_SHA" != "0000000000000000000000000000000000000000" ]; then + BASE_SHA="$CI_COMMIT_BEFORE_SHA" + echo "Using commit before SHA: $BASE_SHA" + else + BASE_SHA="HEAD~1" + echo "Using HEAD~1 as base" + fi + + # Get list of changed files + echo "" + echo "Changed files:" + CHANGED_FILES=$(git diff --name-only "$BASE_SHA" HEAD 2>/dev/null || git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "") + echo "$CHANGED_FILES" | head -50 + if [ "$(echo "$CHANGED_FILES" | wc -l)" -gt 50 ]; then + echo "... (truncated, more than 50 files changed)" + fi + + # Initialize detection flags + DOCS_ONLY="false" + SOURCE_CHANGED="false" + TESTS_CHANGED="false" + CI_CHANGED="false" + TESTS_ONLY="false" + + if [ -n "$CHANGED_FILES" ]; then + # Check if ONLY documentation files changed + NON_DOCS=$(echo "$CHANGED_FILES" | grep -vE "$DOCS_PATTERNS" || true) + if [ -z "$NON_DOCS" ]; then + DOCS_ONLY="true" + echo "" + echo ">>> Only documentation files changed" + fi + + # Check if source code changed + if echo "$CHANGED_FILES" | grep -qE "$SOURCE_CODE_PATTERNS"; then + SOURCE_CHANGED="true" + fi + + # Check if tests changed + if echo "$CHANGED_FILES" | grep -qE "$TEST_PATTERNS"; then + TESTS_CHANGED="true" + fi + + # Check if CI scripts changed + if echo "$CHANGED_FILES" | grep -qE "$CI_SCRIPT_PATTERNS"; then + CI_CHANGED="true" + fi + + # Determine if tests-only change (tests changed, but not source or CI) + if [ "$DOCS_ONLY" = "false" ] && [ "$TESTS_CHANGED" = "true" ] && [ "$SOURCE_CHANGED" = "false" ] && [ "$CI_CHANGED" = "false" ]; then + TESTS_ONLY="true" + echo "" + echo ">>> Tests-only change detected" + fi + fi + + echo "" + echo "=== Detection Results ===" + echo " DOCS_ONLY=$DOCS_ONLY" + echo " SOURCE_CHANGED=$SOURCE_CHANGED" + echo " TESTS_CHANGED=$TESTS_CHANGED" + echo " CI_CHANGED=$CI_CHANGED" + echo " TESTS_ONLY=$TESTS_ONLY" + + # Write to dotenv file for other jobs + cat > detect_changes.env << EOF + DOCS_ONLY=$DOCS_ONLY + TESTS_ONLY=$TESTS_ONLY + SOURCE_CHANGED=$SOURCE_CHANGED + CI_CHANGED=$CI_CHANGED + EOF + artifacts: + reports: + dotenv: detect_changes.env + expire_in: 1 day + rules: + # Skip on protected branches - always run full pipeline + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + when: never + - if: $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "main" + when: never + - if: $CI_COMMIT_BRANCH == "develop" + when: never + # Skip on tags + - if: $CI_COMMIT_TAG + when: never + # Skip if forcing full pipeline + - if: $FORCE_FULL_PIPELINE == "true" + when: never + # Skip if QUICK_TEST is manually enabled + - if: $QUICK_TEST == "true" + when: never + - when: on_success + +# ============================================================================ +# Upstream Image Lookup Job Template +# ============================================================================ + +# Template for finding pre-built images from upstream repositories +# Extend this and set the required variables +.find_upstream_image: + stage: .pre + image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/docker:24-cli + needs: [] + variables: + # Override these in your job + UPSTREAM_REPO_URL: "" + UPSTREAM_REGISTRY: "" + UPSTREAM_BRANCH: "develop" + UPSTREAM_PATTERNS: "" + UPSTREAM_IMAGE: "" + UPSTREAM_DEPTH: "100" + # Output file name + UPSTREAM_OUTPUT: "upstream-image.env" + before_script: + - apk add --no-cache git bash + script: + - | + # Fetch common-ci-configuration scripts + SCRIPTS_URL="https://gitlab.syncad.com/hive/common-ci-configuration/-/raw/develop/scripts/bash" + + mkdir -p /tmp/ci-scripts + for script in find-last-source-commit.sh get-cached-image.sh find-upstream-image.sh; do + wget -q -O "/tmp/ci-scripts/$script" "$SCRIPTS_URL/$script" + chmod +x "/tmp/ci-scripts/$script" + done + + # Run the upstream image finder + /tmp/ci-scripts/find-upstream-image.sh \ + --repo-url="$UPSTREAM_REPO_URL" \ + --registry="$UPSTREAM_REGISTRY" \ + --branch="$UPSTREAM_BRANCH" \ + --patterns="$UPSTREAM_PATTERNS" \ + ${UPSTREAM_IMAGE:+--image="$UPSTREAM_IMAGE"} \ + --depth="$UPSTREAM_DEPTH" \ + --output="$UPSTREAM_OUTPUT" + artifacts: + reports: + dotenv: $UPSTREAM_OUTPUT + expire_in: 1 day + +# ============================================================================ +# Skip Rule Templates +# ============================================================================ + +# Skip build jobs when only docs or tests changed +.skip_build_on_non_source_changes: + rules: + - if: $QUICK_TEST == "true" + when: never + - if: $DOCS_ONLY == "true" + when: never + - if: $TESTS_ONLY == "true" + when: never + - if: $FORCE_FULL_PIPELINE == "true" + when: on_success + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + when: on_success + - if: $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "develop" + when: on_success + - if: $CI_COMMIT_TAG + when: on_success + - when: on_success + +# Skip test jobs when only docs changed +.skip_test_on_docs_only: + rules: + - if: $FORCE_FULL_PIPELINE == "true" + when: on_success + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + when: on_success + - if: $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "develop" + when: on_success + - if: $CI_COMMIT_TAG + when: on_success + - if: $DOCS_ONLY == "true" + when: never + - when: on_success + +# Make expensive jobs manual on feature branches +.manual_on_feature_branches: + rules: + - if: $FORCE_FULL_PIPELINE == "true" + when: on_success + - if: $TESTS_ONLY == "true" + when: never + - if: $QUICK_TEST == "true" + when: manual + allow_failure: true + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + when: on_success + - if: $CI_COMMIT_BRANCH == "master" || $CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "develop" + when: on_success + - if: $CI_COMMIT_TAG + when: on_success + - when: manual + allow_failure: true + +# Run when QUICK_TEST or TESTS_ONLY is enabled (for cached binary setup) +.run_on_quick_test: + rules: + - if: $QUICK_TEST == "true" + when: always + - if: $TESTS_ONLY == "true" + when: always + - when: never + +# ============================================================================ +# Local Image Lookup Job Template +# ============================================================================ + +# Template for checking if a locally-built image already exists +# Use this for the "build-or-skip" pattern within the same repo +.check_local_image: + stage: .pre + image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/docker:24-cli + needs: [] + variables: + # Override these in your job + LOCAL_REGISTRY: "${CI_REGISTRY_IMAGE}" + LOCAL_IMAGE: "" + LOCAL_PATTERNS: "" + LOCAL_OUTPUT: "local-image.env" + before_script: + - apk add --no-cache git bash + script: + - | + # Fetch common-ci-configuration scripts + SCRIPTS_URL="https://gitlab.syncad.com/hive/common-ci-configuration/-/raw/develop/scripts/bash" + + mkdir -p /tmp/ci-scripts + for script in find-last-source-commit.sh get-cached-image.sh; do + wget -q -O "/tmp/ci-scripts/$script" "$SCRIPTS_URL/$script" + chmod +x "/tmp/ci-scripts/$script" + done + + # Convert comma-separated patterns to arguments + IFS=',' read -ra PATTERNS <<< "$LOCAL_PATTERNS" + + # Find last source commit + COMMIT=$(/tmp/ci-scripts/find-last-source-commit.sh "${PATTERNS[@]}") + echo "Last source commit: $COMMIT" + + # Check if image exists + /tmp/ci-scripts/get-cached-image.sh \ + --commit="$COMMIT" \ + --registry="$LOCAL_REGISTRY" \ + ${LOCAL_IMAGE:+--image="$LOCAL_IMAGE"} \ + --output="$LOCAL_OUTPUT" + artifacts: + reports: + dotenv: $LOCAL_OUTPUT + expire_in: 1 day -- GitLab