From 136255f585fca6750b0dc2063a54bcd8d1d94387 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Mon, 26 May 2025 09:39:02 +0200 Subject: [PATCH 01/12] update hivemind --- submodules/hivemind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/hivemind b/submodules/hivemind index 1242d37..15bde01 160000 --- a/submodules/hivemind +++ b/submodules/hivemind @@ -1 +1 @@ -Subproject commit 1242d377bf0bc6b35ede2567e7a0340f2efdd178 +Subproject commit 15bde014284fd5691c1bff1b0a9321958a2cc6fc -- GitLab From afcf4210a889b514ce23250a06f3cb14c363d5a1 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Mon, 26 May 2025 12:17:36 +0200 Subject: [PATCH 02/12] init hivemind/reputation_tracker submodule to correctly get its version --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1de1ff7..7a43fde 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -137,6 +137,7 @@ sync: docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY echo -e "\e[0Ksection_end:$(date +%s):login\r\e[0K" echo -e "\e[0Ksection_start:$(date +%s):git[collapsed=true]\r\e[0KConfiguring Git..." + git -C submodules/hivemind submodule update --init reputation_tracker git config --global --add safe.directory "$CI_PROJECT_DIR" git config --global --add safe.directory "$CI_PROJECT_DIR/submodules/haf" git config --global --add safe.directory "$CI_PROJECT_DIR/submodules/haf_api_node" -- GitLab From b791ede1fb7a5885022e665f50ac12a2ac0177a1 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Tue, 27 May 2025 09:56:45 +0200 Subject: [PATCH 03/12] update HAF to have required python packages: spacy and transformers --- submodules/haf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/haf b/submodules/haf index 61e4e13..6f124cd 160000 --- a/submodules/haf +++ b/submodules/haf @@ -1 +1 @@ -Subproject commit 61e4e13a348ea3deac6ac118bfc7b99e23fec0be +Subproject commit 6f124cd1a1eee6b0fc5507492c7b66c6cc22500c -- GitLab From 817f9593a49a5bae612db98e16ceecfeecfa9d33 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Wed, 28 May 2025 10:49:50 +0200 Subject: [PATCH 04/12] fix for CI because name of index has been changed --- scripts/ci-helpers/wait-for-hivesense-startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci-helpers/wait-for-hivesense-startup.sh b/scripts/ci-helpers/wait-for-hivesense-startup.sh index 05677b1..230b6a4 100755 --- a/scripts/ci-helpers/wait-for-hivesense-startup.sh +++ b/scripts/ci-helpers/wait-for-hivesense-startup.sh @@ -3,7 +3,7 @@ set -e wait_for_hivesense_startup() { - COMMAND="SELECT EXISTS (SELECT 1 FROM pg_class WHERE relkind = 'i' AND relname = 'hivensense_vectors_embed_hnsw_idxs' );" + COMMAND="SELECT EXISTS (SELECT 1 FROM pg_class WHERE relkind = 'i' AND ( relname = 'posts_vectors_embedding_half_hnsw' OR relname = 'posts_vectors_embedding_hnsw' ));" MESSAGE="Waiting for Hivesense to finish processing blocks..." HIVEMIND_BLOCK_COMMAND="SELECT last_completed_block_num FROM hivemind_app.hive_state" HAF_BLOCK_COMMAND="SELECT consistent_block FROM hafd.hive_state" -- GitLab From fb684f452b7a5a0e3c75cc35e1823cdcccde61b2 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Wed, 28 May 2025 12:11:30 +0200 Subject: [PATCH 05/12] change number in chunks in test chunking alghorithm was changed, the test needs to be updated --- tests/integration/api_node/hivesense_synced_api_node_test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/api_node/hivesense_synced_api_node_test.sh b/tests/integration/api_node/hivesense_synced_api_node_test.sh index 31c744a..1d16cf1 100755 --- a/tests/integration/api_node/hivesense_synced_api_node_test.sh +++ b/tests/integration/api_node/hivesense_synced_api_node_test.sh @@ -17,8 +17,8 @@ fi # 2. check number of chunks number_of_chunks=$(query_database "SELECT COUNT(*) FROM hivesense_app.posts_vectors") -if [ "$number_of_chunks" -ne 244 ]; then - echo "Wrong number of chunks ${number_of_chunks} != 244" >&2 +if [ "$number_of_chunks" -ne 194 ]; then + echo "Wrong number of chunks ${number_of_chunks} != 194" >&2 exit 1 fi -- GitLab From 202e22def4ebb444059b26b13b60a119b2bc95a0 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Wed, 18 Jun 2025 11:26:52 +0200 Subject: [PATCH 06/12] bump haf and hivemind --- submodules/haf | 2 +- submodules/hivemind | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/submodules/haf b/submodules/haf index 6f124cd..33499a4 160000 --- a/submodules/haf +++ b/submodules/haf @@ -1 +1 @@ -Subproject commit 6f124cd1a1eee6b0fc5507492c7b66c6cc22500c +Subproject commit 33499a41ee83c51fbcdf8f7ee814954b8f1a72c4 diff --git a/submodules/hivemind b/submodules/hivemind index 15bde01..f5f1119 160000 --- a/submodules/hivemind +++ b/submodules/hivemind @@ -1 +1 @@ -Subproject commit 15bde014284fd5691c1bff1b0a9321958a2cc6fc +Subproject commit f5f1119094b6fd4d23f0e803dc0b6d2f3ed96456 -- GitLab From 850e065bd41881090b341dec1fc2c515dde092eb Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Wed, 18 Jun 2025 14:20:09 +0200 Subject: [PATCH 07/12] fix CI for scheduler --- scripts/ci-helpers/wait-for-hivesense-startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci-helpers/wait-for-hivesense-startup.sh b/scripts/ci-helpers/wait-for-hivesense-startup.sh index 230b6a4..8696493 100755 --- a/scripts/ci-helpers/wait-for-hivesense-startup.sh +++ b/scripts/ci-helpers/wait-for-hivesense-startup.sh @@ -7,7 +7,7 @@ wait_for_hivesense_startup() { MESSAGE="Waiting for Hivesense to finish processing blocks..." HIVEMIND_BLOCK_COMMAND="SELECT last_completed_block_num FROM hivemind_app.hive_state" HAF_BLOCK_COMMAND="SELECT consistent_block FROM hafd.hive_state" - HIVESENSE_BLOCK_COMMAND="SELECT hive.app_get_current_block_num('hivesense_app1')" + HIVESENSE_BLOCK_COMMAND="SELECT hive.app_get_current_block_num('hivesense_app')" i=0 while : -- GitLab From bee22414629a87734f74ee5c80f6c958c77cf5f1 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Mon, 23 Jun 2025 09:03:05 +0200 Subject: [PATCH 08/12] issue #41: when a worker crashes then whole hivesense sync must be stopped --- db/main_loop.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/db/main_loop.sql b/db/main_loop.sql index b17b0b4..906a6f5 100644 --- a/db/main_loop.sql +++ b/db/main_loop.sql @@ -532,6 +532,7 @@ BEGIN EXIT WHEN __todo = 0; RAISE NOTICE 'SCHEDULER: Error -- scheduler was woken up but job queue is not empty...'; RAISE NOTICE 'SCHEDULER: switching to polling...'; + EXIT WHEN NOT continueProcessing(); PERFORM pg_sleep(0.1); --PERFORM pg_sleep(5); END LOOP; -- GitLab From 72054aa61fc002001ec8dde05d31bcf7e070c8c2 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Mon, 23 Jun 2025 11:56:44 +0200 Subject: [PATCH 09/12] update haf --- submodules/haf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/haf b/submodules/haf index 33499a4..2ada090 160000 --- a/submodules/haf +++ b/submodules/haf @@ -1 +1 @@ -Subproject commit 33499a41ee83c51fbcdf8f7ee814954b8f1a72c4 +Subproject commit 2ada090f5bc021133aa4f5131688c59e89d0e8c0 -- GitLab From d5f836732f61afba35af24ede8e1f2cb795960c6 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Wed, 25 Jun 2025 16:57:41 +0200 Subject: [PATCH 10/12] longer waiting --- scripts/ci-helpers/wait-for-hivesense-startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci-helpers/wait-for-hivesense-startup.sh b/scripts/ci-helpers/wait-for-hivesense-startup.sh index 8696493..09d69cd 100755 --- a/scripts/ci-helpers/wait-for-hivesense-startup.sh +++ b/scripts/ci-helpers/wait-for-hivesense-startup.sh @@ -13,7 +13,7 @@ wait_for_hivesense_startup() { while : do i=$((i+1)) - if [ "$i" -gt 20 ]; then + if [ "$i" -gt 40 ]; then echo "Too long waiting, pending logs dump:" LOCK_DUMP_COMMAND="SELECT -- GitLab From 52a9ce61dbc43d359ba8b6e2ee351faa0e87232b Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Thu, 26 Jun 2025 07:34:45 +0200 Subject: [PATCH 11/12] update hivemind --- submodules/hivemind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/hivemind b/submodules/hivemind index f5f1119..d046afe 160000 --- a/submodules/hivemind +++ b/submodules/hivemind @@ -1 +1 @@ -Subproject commit f5f1119094b6fd4d23f0e803dc0b6d2f3ed96456 +Subproject commit d046afef0b590771ed4bc3c9310e4837d36ca467 -- GitLab From 3bf8f319618664c1268afa03c6a8e73eb05d691c Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Thu, 26 Jun 2025 08:55:03 +0200 Subject: [PATCH 12/12] fix for pgai --- .gitlab-ci.yml | 16 ++++++++-------- db/main_loop.sql | 5 +++++ db/ollama.sql | 29 +++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7a43fde..ee6b154 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -170,14 +170,14 @@ sync: ls -la $(pwd) OLDPWD=$(pwd) cd submodules/haf_api_node || exit 1 - docker logs haf-world-haf-1 > ${OLDPWD}/docker/haf.log - docker logs haf-world-haproxy-1 > ${OLDPWD}/docker/haproxy.log - docker logs haf-world-hivesense-install-schema-1 > ${OLDPWD}/docker/hivesense-install.log - docker logs haf-world-hivesense-block-processing-1 > ${OLDPWD}/docker/hivesense-block-processing.log - docker logs haf-world-hivesense-postgrest-1 > ${OLDPWD}/docker/hivesense-postgrest.log - docker logs haf-world-hivesense-swagger-1 > ${OLDPWD}/docker/hivesense-swagger.log - docker logs haf-world-hivesense-postgrest-rewriter-1 > ${OLDPWD}/docker/hivesense-postgrest-rewriter.log - docker logs haf-world-hivesense-ollama-1 > ${OLDPWD}/docker/hivesense-ollama.log + docker logs haf-world-haf-1 > ${OLDPWD}/docker/haf.log 2>&1 + docker logs haf-world-haproxy-1 > ${OLDPWD}/docker/haproxy.log 2>&1 + docker logs haf-world-hivesense-install-schema-1 > ${OLDPWD}/docker/hivesense-install.log 2>&1 + docker logs haf-world-hivesense-block-processing-1 > ${OLDPWD}/docker/hivesense-block-processing.log 2>&1 + docker logs haf-world-hivesense-postgrest-1 > ${OLDPWD}/docker/hivesense-postgrest.log 2>&1 + docker logs haf-world-hivesense-swagger-1 > ${OLDPWD}/docker/hivesense-swagger.log 2>&1 + docker logs haf-world-hivesense-postgrest-rewriter-1 > ${OLDPWD}/docker/hivesense-postgrest-rewriter.log 2>&1 + docker logs haf-world-hivesense-ollama-1 > ${OLDPWD}/docker/hivesense-ollama.log 2>&1 cd "$OLDPWD" || exit 1 # Create docker directory if it doesn't exist diff --git a/db/main_loop.sql b/db/main_loop.sql index 906a6f5..87faad1 100644 --- a/db/main_loop.sql +++ b/db/main_loop.sql @@ -574,6 +574,11 @@ DECLARE _done_key BIGINT := 20_000_000 + _worker; _ack_key BIGINT := 30_000_000 + _worker; BEGIN + -- pgai adds site_packages what ends with a big mess and random problems with + -- lack of python modules for import, to prevent this at the begining + -- site pgai site packages are added at the loop begining + -- PERFORM hivesense_app.pgai_initialize(); + -- by default, postgresql logs when threads are blocked on a lock for more than a second. -- we use locks for synchronization, and expect threads to be blocked for at least 3s -- at a time. Disable that logging to avoid spamming the log file diff --git a/db/ollama.sql b/db/ollama.sql index 5427247..aa7f2de 100644 --- a/db/ollama.sql +++ b/db/ollama.sql @@ -166,6 +166,35 @@ AS $BODY$ return embeddings $BODY$; +DROP FUNCTION IF EXISTS hivesense_app.pgai_initialize(); +CREATE FUNCTION hivesense_app.pgai_initialize() + RETURNS void + LANGUAGE plpython3u +AS $BODY$ + if "ai.version" not in GD: + r = plpy.execute( + "SELECT coalesce(current_setting('ai.python_lib_dir', true), " + "'/usr/local/lib/pgai') AS python_lib_dir" + ) + python_lib_dir = r[0]["python_lib_dir"] + from pathlib import Path + import sys, sysconfig, site + if "purelib" in sysconfig.get_path_names() and sysconfig.get_path("purelib") in sys.path: + sys.path.remove(sysconfig.get_path("purelib")) + python_lib_dir = Path(python_lib_dir).joinpath("0.8.0") + site.addsitedir(str(python_lib_dir)) + from ai import __version__ as ai_version + assert("0.8.0" == ai_version) + GD["ai.version"] = "0.8.0" + else: + if GD["ai.version"] != "0.8.0": + plpy.fatal("the pgai extension version has changed. start a new session") +$BODY$; + GRANT EXECUTE ON FUNCTION hivesense_app.ollama_embed(text, hivesense_app.id_and_post_chunk [], text, text, jsonb) TO haf_admin WITH GRANT OPTION; GRANT EXECUTE ON FUNCTION hivesense_app.ollama_embed(text, hivesense_app.id_and_post_chunk [], text, text, jsonb) TO hivesense_user; GRANT EXECUTE ON FUNCTION hivesense_app.ollama_embed(text, hivesense_app.id_and_post_chunk [], text, text, jsonb) TO pg_database_owner WITH GRANT OPTION; + +GRANT EXECUTE ON FUNCTION hivesense_app.pgai_initialize() TO haf_admin WITH GRANT OPTION; +GRANT EXECUTE ON FUNCTION hivesense_app.pgai_initialize() TO hivesense_user; +GRANT EXECUTE ON FUNCTION hivesense_app.pgai_initialize() TO pg_database_owner WITH GRANT OPTION; -- GitLab