From 4d80d25722b9d15cce7e4a3d864080f5a05192cd Mon Sep 17 00:00:00 2001
From: Eric Frias <efrias@syncad.com>
Date: Thu, 21 Nov 2024 16:38:11 +0000
Subject: [PATCH] Allow the docker health check for the block processor to
 report health as long as reputation tracker is actively processing blocks,
 even if it's not yet synced

---
 .../scripts/block-processing-healthcheck.sh   | 29 +++++++++++++++++--
 scripts/process_blocks.sh                     |  2 ++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/docker/scripts/block-processing-healthcheck.sh b/docker/scripts/block-processing-healthcheck.sh
index a022c01..db6cf99 100755
--- a/docker/scripts/block-processing-healthcheck.sh
+++ b/docker/scripts/block-processing-healthcheck.sh
@@ -8,6 +8,31 @@ trap 'trap - 2 15 && kill -- -$$' 2 15
 postgres_user=${POSTGRES_USER:-"haf_admin"}
 postgres_host=${POSTGRES_HOST:-"localhost"}
 postgres_port=${POSTGRES_PORT:-5432}
-POSTGRES_ACCESS=${POSTGRES_URL:-"postgresql://$postgres_user@$postgres_host:$postgres_port/haf_block_log"}
+POSTGRES_ACCESS=${POSTGRES_URL:-"postgresql://$postgres_user@$postgres_host:$postgres_port/haf_block_log?application_name=reptracker_health_check"}
 
-exec [ "$(psql "$POSTGRES_ACCESS" --quiet --no-align --tuples-only --command="SELECT hive.is_app_in_sync('reptracker_app');")" = t ]
+# this health check will return healthy if:
+# - reputation_tracker has processed a block in the last 60 seconds
+#   (as long as it was also after the container started, we don't want
+#    to report healthy immediately after a restart)
+# or
+# - reputation_tracker's head block has caught up to haf's irreversible block
+#   (so we don't mark reputation_tracker as unhealthy if HAF stops getting blocks)
+#
+# This check needs to know when the block processing started, so the docker entrypoint
+# must write this to a file like:
+#   date --utc --iso-8601=seconds > /tmp/block_processing_startup_time.txt
+if [ ! -f "/tmp/block_processing_startup_time.txt" ]; then
+  echo "file /tmp/block_processing_startup_time.txt does not exist, which means block"
+  echo "processing hasn't started yet"
+  exit 1
+fi
+STARTUP_TIME="$(cat /tmp/block_processing_startup_time.txt)"
+CHECK="SET TIME ZONE 'UTC'; \
+       SELECT ((now() - (SELECT last_active_at FROM hafd.contexts WHERE name = 'reptracker_app')) < interval '1 minute' \
+               AND (SELECT last_active_at FROM hafd.contexts WHERE name = 'reptracker_app') > '${STARTUP_TIME}'::timestamp) OR \
+              hive.is_app_in_sync('reptracker_app');"
+
+
+# the docker container probably won't have a locale set, do this to suppress the warning
+export LC_ALL=C
+exec [ "$(psql "$POSTGRES_ACCESS" --quiet --no-align --tuples-only --command="${CHECK}")" = t ]
diff --git a/scripts/process_blocks.sh b/scripts/process_blocks.sh
index 2bf5be3..6d3061c 100755
--- a/scripts/process_blocks.sh
+++ b/scripts/process_blocks.sh
@@ -68,6 +68,8 @@ POSTGRES_ACCESS=${POSTGRES_URL:-"postgresql://$POSTGRES_USER@$POSTGRES_HOST:$POS
 process_blocks() {
     local n_blocks="${1:-null}"
     log_file="reptracker_sync.log"
+    # record the startup time for use in health checks
+    date -uIseconds > /tmp/block_processing_startup_time.txt
     psql "$POSTGRES_ACCESS" -v "ON_ERROR_STOP=on" -v REPTRACKER_SCHEMA="${REPTRACKER_SCHEMA}" -c "\timing" -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -c "CALL ${REPTRACKER_SCHEMA}.main('${REPTRACKER_SCHEMA}', $n_blocks);" 2>&1 | tee -i $log_file
 }
 
-- 
GitLab