From ccc1bd6415528b7e4e6d7dec75504be578af4b2a Mon Sep 17 00:00:00 2001
From: Marcin Ickiewicz <mickiewicz@syncad.com>
Date: Wed, 8 Jan 2025 16:29:07 +0100
Subject: [PATCH] only used state providers impact db hash

---
 ...fork_manager_update_script_generator.sh.in | 25 +++++------
 src/hive_fork_manager/update.sql              | 30 +++++++++++---
 .../hive_fork_manager/CMakeLists.txt          |  1 +
 .../state_providers/hash.sql                  | 15 ++++---
 .../state_providers/no_hash.sql               | 41 +++++++++++++++++++
 .../hive_fork_manager/test_update_script.sh   | 35 ++++++++++++++++
 6 files changed, 123 insertions(+), 24 deletions(-)
 create mode 100644 tests/integration/functional/hive_fork_manager/state_providers/no_hash.sql

diff --git a/src/hive_fork_manager/hive_fork_manager_update_script_generator.sh.in b/src/hive_fork_manager/hive_fork_manager_update_script_generator.sh.in
index 8e1d112c8..93757b256 100755
--- a/src/hive_fork_manager/hive_fork_manager_update_script_generator.sh.in
+++ b/src/hive_fork_manager/hive_fork_manager_update_script_generator.sh.in
@@ -81,15 +81,15 @@ POSTGRES_EXTENSION_DIR='@POSTGRES_SHAREDIR@/extension'
 DB_NAME_AFTER_UPDATE="upd_${DB_NAME}" # truncate before postgres will do this
 DB_NAME_AFTER_UPDATE="${DB_NAME_AFTER_UPDATE:0:63}"
 save_table_schema() {
-  psql_do -d "$DB_NAME" -o before_update_columns.txt -q -t -A -c "SELECT table_name, table_columns FROM hive-update.calculate_schema_hash()"
-  psql_do -d "$DB_NAME" -o before_update_constraints.txt -q -t -A -c "SELECT table_name, table_constraints FROM hive-update.calculate_schema_hash()"
-  psql_do -d "$DB_NAME" -o before_update_indexes.txt -q -t -A -c "SELECT table_name, table_indexes FROM hive-update.calculate_schema_hash()"
-  psql_do -d "$DB_NAME" -o before_update_providers.txt -q -t -A -c "SELECT provider, hash FROM hive.calculate_state_provider_hashes()"
-
-  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_columns.txt -q -t -A -c "SELECT table_name, table_columns FROM hive-update.calculate_schema_hash()"
-  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_constraings.txt -q -t -A -c "SELECT table_name, table_constraints FROM hive-update.calculate_schema_hash()"
-  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_indexes.txt -q -t -A -c "SELECT table_name, table_indexes FROM hive-update.calculate_schema_hash()"
-  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_providers.txt -q -t -A -c "SELECT provider, hash FROM hive.calculate_state_provider_hashes()"
+  psql_do -d "$DB_NAME" -o before_update_columns.txt -q -t -A -c "SELECT table_name, table_columns FROM hive_update.calculate_schema_hash()"
+  psql_do -d "$DB_NAME" -o before_update_constraints.txt -q -t -A -c "SELECT table_name, table_constraints FROM hive_update.calculate_schema_hash()"
+  psql_do -d "$DB_NAME" -o before_update_indexes.txt -q -t -A -c "SELECT table_name, table_indexes FROM hive_update.calculate_schema_hash()"
+  psql_do -d "$DB_NAME" -o before_update_providers.txt -q -t -A -c "SELECT provider, hash FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers))"
+
+  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_columns.txt -q -t -A -c "SELECT table_name, table_columns FROM hive_update.calculate_schema_hash()"
+  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_constraings.txt -q -t -A -c "SELECT table_name, table_constraints FROM hive_update.calculate_schema_hash()"
+  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_indexes.txt -q -t -A -c "SELECT table_name, table_indexes FROM hive_update.calculate_schema_hash()"
+  psql_do -d "$DB_NAME_AFTER_UPDATE" -o after_update_providers.txt -q -t -A -c "SELECT provider, hash FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers))"
 }
 
 verify_table_schema() {
@@ -98,8 +98,9 @@ verify_table_schema() {
   psql_do -a -d "$DB_NAME_AFTER_UPDATE" -c 'CREATE EXTENSION hive_fork_manager CASCADE;'
   psql_do -d "$DB_NAME_AFTER_UPDATE" -q -t -A -f "$SCRIPTPATH/update.sql"
 
-  BEFORE_UPDATE=$(psql_do -d "$DB_NAME" -t -A -c "SELECT hive_update.create_database_hash()")
-  AFTER_UPDATE=$(psql_do -d "$DB_NAME_AFTER_UPDATE" -t -A -c "SELECT hive_update.create_database_hash()")
+  STATE_PROVIDERS=$(psql_do -d "$DB_NAME" -t -A -c "SELECT hive_update.get_used_state_providers()")
+  BEFORE_UPDATE=$(psql_do -d "$DB_NAME" -t -A -c "SELECT hive_update.create_database_hash('${STATE_PROVIDERS}'::hafd.state_providers[])")
+  AFTER_UPDATE=$(psql_do -d "$DB_NAME_AFTER_UPDATE" -t -A -c "SELECT hive_update.create_database_hash('${STATE_PROVIDERS}'::hafd.state_providers[])")
   if [ "$BEFORE_UPDATE" = "$AFTER_UPDATE" ]; then
     echo "The table schema is correct, verification completed."
     echo "Dropping temporary database"
@@ -110,7 +111,7 @@ verify_table_schema() {
     echo "COLUMNS"
     diff --suppress-common-lines before_update_columns.txt after_update_columns.txt || true
     echo "PROVIDERS"
-    ate diff --suppress-common-lines before_update_providers.txt after_update_providers.txt || true
+    diff --suppress-common-lines before_update_providers.txt after_update_providers.txt || true
     echo "Dropping temporary database"
     psql_do -a -d postgres -c "DROP DATABASE IF EXISTS $DB_NAME_AFTER_UPDATE;"
     find -type f -name '*.txt' > /dev/null 2>&1
diff --git a/src/hive_fork_manager/update.sql b/src/hive_fork_manager/update.sql
index cda3d05c7..4b44c8ac8 100644
--- a/src/hive_fork_manager/update.sql
+++ b/src/hive_fork_manager/update.sql
@@ -299,7 +299,7 @@ END;
 $BODY$;
 
 DROP FUNCTION IF EXISTS hive_update.calculate_state_provider_hashes;
-CREATE FUNCTION hive_update.calculate_state_provider_hashes()
+CREATE FUNCTION hive_update.calculate_state_provider_hashes( include_providers hafd.state_providers[] )
     RETURNS SETOF hive_update.state_provider_and_hash
     LANGUAGE plpgsql
     STABLE
@@ -310,7 +310,7 @@ BEGIN
         SELECT
               sp.* as provider
             , hive_update.calculate_state_provider_hash(sp.*) as hash
-        FROM unnest(enum_range(NULL::hafd.state_providers)) as sp;
+        FROM unnest(include_providers) as sp;
 END;
 $BODY$;
 
@@ -335,7 +335,7 @@ $BODY$
 ;
 
 DROP FUNCTION IF EXISTS hive_update.create_database_hash;
-CREATE FUNCTION hive_update.create_database_hash()
+CREATE FUNCTION hive_update.create_database_hash(include_providers hafd.state_providers[] = enum_range(NULL::hafd.state_providers))
     RETURNS UUID
     LANGUAGE plpgsql
     STABLE
@@ -346,11 +346,29 @@ DECLARE
     _provider_hashes TEXT;
 BEGIN
     SELECT string_agg(table_schema, ' | ') FROM hive_update.calculate_schema_hash() INTO _tmp;
-    SELECT string_agg(provider || hash, ' | ') FROM hive_update.calculate_state_provider_hashes() INTO _provider_hashes;
-
-    _tmp = _tmp || _provider_hashes;
+    SELECT string_agg(provider || hash, ' | ') FROM hive_update.calculate_state_provider_hashes(include_providers) INTO _provider_hashes;
+    IF _provider_hashes IS NOT NULL THEN
+        _tmp = _tmp || _provider_hashes;
+    END IF;
     RETURN MD5(_tmp)::uuid;
 END;
 $BODY$
 ;
 
+DROP FUNCTION IF EXISTS hive_update.get_used_state_providers;
+CREATE FUNCTION hive_update.get_used_state_providers()
+    RETURNS hafd.state_providers[]
+    LANGUAGE plpgsql
+    STABLE
+AS
+$BODY$
+DECLARE
+    __result hafd.state_providers[];
+BEGIN
+    SELECT ARRAY_AGG( DISTINCT sp.state_provider ) INTO __result
+    FROM hafd.state_providers_registered sp;
+
+    RETURN COALESCE( __result, ARRAY[]::hafd.state_providers[] );
+END;
+$BODY$
+;
diff --git a/tests/integration/functional/hive_fork_manager/CMakeLists.txt b/tests/integration/functional/hive_fork_manager/CMakeLists.txt
index 5a7470498..68777966d 100644
--- a/tests/integration/functional/hive_fork_manager/CMakeLists.txt
+++ b/tests/integration/functional/hive_fork_manager/CMakeLists.txt
@@ -328,6 +328,7 @@ ADD_TEST( NAME test_update_script
  ADD_SQL_FUNCTIONAL_TEST( state_providers/import_state_provider_negative_non_context_owner_create_table.sql )
  ADD_SQL_FUNCTIONAL_TEST( state_providers/switch_state_provider_to_forkable.sql )
  ADD_SQL_FUNCTIONAL_TEST( state_providers/hash.sql )
+ ADD_SQL_FUNCTIONAL_TEST( state_providers/no_hash.sql )
  ADD_SQL_FUNCTIONAL_TEST( state_providers/update_state_provider_runtime.sql )
 
  ADD_SQL_FUNCTIONAL_TEST( authorization/alice_access_to_bob_negative.sql )
diff --git a/tests/integration/functional/hive_fork_manager/state_providers/hash.sql b/tests/integration/functional/hive_fork_manager/state_providers/hash.sql
index 9da0e479e..257c8f1b4 100644
--- a/tests/integration/functional/hive_fork_manager/state_providers/hash.sql
+++ b/tests/integration/functional/hive_fork_manager/state_providers/hash.sql
@@ -10,24 +10,26 @@ DECLARE
     __all_before_hashes TEXT;
     __all_after_hashes TEXT;
     __database_hash_before TEXT;
+    __database_hash_before1 TEXT;
     __database_hash_after TEXT;
 BEGIN
-    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes() WHERE provider = 'ACCOUNTS' ) IS NOT NULL
+    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers)) WHERE provider = 'ACCOUNTS' ) IS NOT NULL
         , 'ACCOUNTS not hashed';
 
-    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes() WHERE provider = 'KEYAUTH' ) IS NOT NULL
+    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers)) WHERE provider = 'KEYAUTH' ) IS NOT NULL
         , 'KEYAUTH not hashed';
 
-    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes() WHERE provider = 'METADATA' ) IS NOT NULL
+    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers)) WHERE provider = 'METADATA' ) IS NOT NULL
         , 'METADATA not hashed';
 
-    ASSERT ( SELECT COUNT(*) FROM hive_update.calculate_state_provider_hashes() ) = 3
+    ASSERT ( SELECT COUNT(*) FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers)) ) = 3
         , 'More   than 3 known providers are hashed';
 
-    SELECT STRING_AGG( hash, '|') FROM hive_update.calculate_state_provider_hashes() INTO __all_before_hashes;
+    SELECT STRING_AGG( hash, '|') FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers)) INTO __all_before_hashes;
     SELECT * FROM hive_update.calculate_state_provider_hash( 'KEYAUTH'::hafd.state_providers ) INTO __keyauth_before_hash;
 
     SELECT hive_update.create_database_hash()  INTO __database_hash_before;
+    SELECT hive_update.create_database_hash(ARRAY['METADATA']::hafd.state_providers[])  INTO __database_hash_before1;
 
     EXECUTE format( 'CREATE OR REPLACE FUNCTION hive.start_provider_keyauth( _context hafd.context_name )
     RETURNS TEXT[]
@@ -41,12 +43,13 @@ BEGIN
     $$
     ;');
 
-    SELECT STRING_AGG( hash, '|') FROM hive_update.calculate_state_provider_hashes() INTO __all_after_hashes;
+    SELECT STRING_AGG( hash, '|') FROM hive_update.calculate_state_provider_hashes(enum_range(NULL::hafd.state_providers)) INTO __all_after_hashes;
     SELECT * FROM hive_update.calculate_state_provider_hash( 'KEYAUTH'::hafd.state_providers ) INTO __keyauth_after_hash;
     SELECT hive_update.create_database_hash()  INTO __database_hash_after;
 
     ASSERT __all_after_hashes != __all_before_hashes, 'Hashes not changed after modification';
     ASSERT __keyauth_after_hash != __keyauth_before_hash, 'Hash not changed after modification';
     ASSERT __database_hash_before != __database_hash_after, 'DB Hash not changed after modification';
+    ASSERT __database_hash_before1 != __database_hash_after, 'DB Hash not changed after modification 1';
 END;
 $BODY$;
\ No newline at end of file
diff --git a/tests/integration/functional/hive_fork_manager/state_providers/no_hash.sql b/tests/integration/functional/hive_fork_manager/state_providers/no_hash.sql
new file mode 100644
index 000000000..83b4e407d
--- /dev/null
+++ b/tests/integration/functional/hive_fork_manager/state_providers/no_hash.sql
@@ -0,0 +1,41 @@
+-- check if there is no registered state_provider, then hash is not computed
+
+
+CREATE OR REPLACE PROCEDURE haf_admin_test_then()
+    LANGUAGE 'plpgsql'
+AS
+$BODY$
+DECLARE
+    __database_hash_before TEXT;
+    __database_hash_after TEXT;
+BEGIN
+    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes( ARRAY['KEYAUTH', 'METADATA']::hafd.state_providers[] ) WHERE provider = 'ACCOUNTS' ) IS NULL
+        , 'ACCOUNTS hashed';
+
+    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes( ARRAY['KEYAUTH', 'METADATA']::hafd.state_providers[] ) WHERE provider = 'KEYAUTH' ) IS NOT NULL
+        , 'KEYAUTH not hashed';
+
+    ASSERT ( SELECT 1 FROM hive_update.calculate_state_provider_hashes( ARRAY['KEYAUTH', 'METADATA']::hafd.state_providers[] ) WHERE provider = 'METADATA' ) IS NOT NULL
+        , 'METADATA not hashed';
+
+    ASSERT ( SELECT hive_update.calculate_state_provider_hashes( ARRAY[]::hafd.state_providers[] ) ) IS NULL, 'NOT NULL returned for empty state providers';
+
+    SELECT hive_update.create_database_hash(ARRAY['METADATA']::hafd.state_providers[])  INTO __database_hash_before;
+
+    EXECUTE format( 'CREATE OR REPLACE FUNCTION hive.start_provider_keyauth( _context hafd.context_name )
+    RETURNS TEXT[]
+    LANGUAGE plpgsql
+    VOLATILE
+    AS
+    $$
+    BEGIN
+        RETURN '''';
+    END;
+    $$
+    ;');
+
+    SELECT hive_update.create_database_hash(ARRAY['METADATA']::hafd.state_providers[])  INTO __database_hash_after;
+
+    ASSERT __database_hash_after = __database_hash_before, 'Unused state provider has impact on database hash';
+END;
+$BODY$;
\ No newline at end of file
diff --git a/tests/integration/functional/hive_fork_manager/test_update_script.sh b/tests/integration/functional/hive_fork_manager/test_update_script.sh
index 8b2025036..0834cbe5a 100755
--- a/tests/integration/functional/hive_fork_manager/test_update_script.sh
+++ b/tests/integration/functional/hive_fork_manager/test_update_script.sh
@@ -33,6 +33,7 @@ prepare_sql_script() {
 
 prepare_database() {
     "$SCRIPTS_DIR/setup_db.sh" --haf-db-name="$UPDATE_DB_NAME" "$@"
+    exec_sql "SELECT hive.initialize_extension_data();"
 }
 
 update_database() {
@@ -241,4 +242,38 @@ prepare_database
 exec_sql "CREATE FUNCTION hafd.bad_function() RETURNS VOID VOLATILE AS '/lib/postgresql/${POSTGRES_VERSION}/lib/tablefunc.so', 'crosstab' language c;"
 failswith 3 update_database
 
+printf "\nTEST: Change keyauth state provider, but not use it. This should pass\n"
+prepare_database
+exec_sql "CREATE OR REPLACE FUNCTION hive.start_provider_keyauth( _context hafd.context_name )
+             RETURNS TEXT[]
+             LANGUAGE plpgsql
+             VOLATILE
+             AS
+             \$\$
+             BEGIN
+                 RETURN '';
+             END;
+             \$\$
+             ;"
+update_database
+
+printf "\nTEST: Change keyauth state provider, and not use it. This should fail\n"
+prepare_database
+exec_sql "CREATE SCHEMA workaround;
+          SELECT hive.app_create_context('workaround', 'workaround');
+          SELECT hive.app_state_provider_import( 'KEYAUTH', 'workaround' );
+          "
+exec_sql "CREATE OR REPLACE FUNCTION hive.start_provider_keyauth( _context hafd.context_name )
+             RETURNS TEXT[]
+             LANGUAGE plpgsql
+             VOLATILE
+             AS
+             \$\$
+             BEGIN
+                 RETURN '';
+             END;
+             \$\$
+             ;"
+failswith 1 update_database
+
 echo "Succeeded"
-- 
GitLab