From 72ab78fe8a298bac943756195225231bec869b4a Mon Sep 17 00:00:00 2001 From: Dan Notestein Date: Mon, 29 Dec 2025 01:51:05 -0500 Subject: [PATCH 1/2] Fix lock file permissions for multi-user/container access Lock files created by Docker containers (UID 2000) were not writable by other users, causing 'Permission denied' errors when other jobs tried to update the lock timestamp. Add _touch_lock() helper that creates lock files with 666 permissions and attempts to fix permissions on existing lock files. --- scripts/cache-manager.sh | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/scripts/cache-manager.sh b/scripts/cache-manager.sh index 2614578..e2c1f7f 100755 --- a/scripts/cache-manager.sh +++ b/scripts/cache-manager.sh @@ -73,6 +73,20 @@ _error() { echo "[cache-manager] ERROR: $1" >&2 } +# Create or update a lock file with world-writable permissions +# This ensures lock files can be used by any user/container (different UIDs) +_touch_lock() { + local lockfile="$1" + if [[ ! -f "$lockfile" ]]; then + # Create new lock file with 666 permissions + install -m 666 /dev/null "$lockfile" 2>/dev/null || touch "$lockfile" 2>/dev/null || true + else + # Update timestamp, fix permissions if we can + touch "$lockfile" 2>/dev/null || true + chmod 666 "$lockfile" 2>/dev/null || true + fi +} + # Write lock holder info for debugging stale locks _write_lock_info() { local lockfile="$1" @@ -198,7 +212,7 @@ _update_lru() { local entry="${cache_type}/${cache_key}" # Acquire global lock for index update - touch "$GLOBAL_LOCK" + _touch_lock "$GLOBAL_LOCK" _flock_with_timeout 30 -x "$GLOBAL_LOCK" -c " # Create or update LRU index (simple format: timestamp|path per line) if [[ -f '$LRU_INDEX' ]]; then @@ -413,7 +427,7 @@ cmd_get() { mkdir -p "$local_dest" local tar_lock="${source_tar}.lock" - touch "$tar_lock" 2>/dev/null || true + _touch_lock "$tar_lock" local get_start_time=$(date +%s.%N) if _flock_with_timeout "$CACHE_LOCK_TIMEOUT" -s "$tar_lock" -c " @@ -498,7 +512,7 @@ cmd_put() { # Create tar archive (local I/O on NFS host, still fast) _log "Storing cache on NFS host: $NFS_TAR_FILE" mkdir -p "$(dirname "$NFS_TAR_FILE")" - touch "$NFS_TAR_LOCK" + _touch_lock "$NFS_TAR_LOCK" # shellcheck disable=SC2086 if ! _flock_with_timeout "$CACHE_LOCK_TIMEOUT" -x "$NFS_TAR_LOCK" -c " @@ -569,7 +583,7 @@ cmd_put() { fi mkdir -p "$(dirname "$NFS_TAR_FILE")" - touch "$NFS_TAR_LOCK" + _touch_lock "$NFS_TAR_LOCK" # Check for stale locks before attempting to acquire _check_stale_lock "$NFS_TAR_LOCK" -- GitLab From ecf6ee39781f472b6aeb34fc96457efd3d144789 Mon Sep 17 00:00:00 2001 From: Dan Notestein Date: Mon, 29 Dec 2025 01:56:30 -0500 Subject: [PATCH 2/2] Fix HAF cache handling for all cache types (haf_pipeline, haf_filtered, etc.) The permission relaxation and tar excludes were only applied to 'haf' and 'haf_sync' cache types, but not to 'haf_pipeline' and 'haf_filtered' which also contain PostgreSQL data directories. Changed the pattern matching from exact string matches to wildcard 'haf*' to cover all HAF-related cache types. --- scripts/cache-manager.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/cache-manager.sh b/scripts/cache-manager.sh index e2c1f7f..d7a33b9 100755 --- a/scripts/cache-manager.sh +++ b/scripts/cache-manager.sh @@ -480,7 +480,8 @@ cmd_put() { fi # Relax pgdata permissions for HAF caches so they can be copied - if [[ "$cache_type" == "haf" ]]; then + # Covers: haf, haf_sync, haf_pipeline, haf_filtered, etc. + if [[ "$cache_type" == haf* ]]; then _relax_pgdata_permissions "$local_source" fi @@ -505,7 +506,7 @@ cmd_put() { tar_excludes="--exclude=./datadir/blockchain" _log "Excluding datadir/blockchain" fi - elif [[ "$cache_type" == "haf" || "$cache_type" == "haf_sync" ]]; then + elif [[ "$cache_type" == haf* ]]; then tar_excludes=$(_build_haf_tar_excludes "$local_source") fi @@ -553,7 +554,7 @@ cmd_put() { tar_excludes="--exclude=./datadir/blockchain" _log "Excluding datadir/blockchain" fi - elif [[ "$cache_type" == "haf" || "$cache_type" == "haf_sync" ]]; then + elif [[ "$cache_type" == haf* ]]; then tar_excludes=$(_build_haf_tar_excludes "$local_source") fi -- GitLab