diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1de1ff7a73fa5611eee31559307d87a247fd2e58..ee6b154ecb662992c94bc34a36b4eb694a5ddc86 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -137,6 +137,7 @@ sync: docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY echo -e "\e[0Ksection_end:$(date +%s):login\r\e[0K" echo -e "\e[0Ksection_start:$(date +%s):git[collapsed=true]\r\e[0KConfiguring Git..." + git -C submodules/hivemind submodule update --init reputation_tracker git config --global --add safe.directory "$CI_PROJECT_DIR" git config --global --add safe.directory "$CI_PROJECT_DIR/submodules/haf" git config --global --add safe.directory "$CI_PROJECT_DIR/submodules/haf_api_node" @@ -169,14 +170,14 @@ sync: ls -la $(pwd) OLDPWD=$(pwd) cd submodules/haf_api_node || exit 1 - docker logs haf-world-haf-1 > ${OLDPWD}/docker/haf.log - docker logs haf-world-haproxy-1 > ${OLDPWD}/docker/haproxy.log - docker logs haf-world-hivesense-install-schema-1 > ${OLDPWD}/docker/hivesense-install.log - docker logs haf-world-hivesense-block-processing-1 > ${OLDPWD}/docker/hivesense-block-processing.log - docker logs haf-world-hivesense-postgrest-1 > ${OLDPWD}/docker/hivesense-postgrest.log - docker logs haf-world-hivesense-swagger-1 > ${OLDPWD}/docker/hivesense-swagger.log - docker logs haf-world-hivesense-postgrest-rewriter-1 > ${OLDPWD}/docker/hivesense-postgrest-rewriter.log - docker logs haf-world-hivesense-ollama-1 > ${OLDPWD}/docker/hivesense-ollama.log + docker logs haf-world-haf-1 > ${OLDPWD}/docker/haf.log 2>&1 + docker logs haf-world-haproxy-1 > ${OLDPWD}/docker/haproxy.log 2>&1 + docker logs haf-world-hivesense-install-schema-1 > ${OLDPWD}/docker/hivesense-install.log 2>&1 + docker logs haf-world-hivesense-block-processing-1 > ${OLDPWD}/docker/hivesense-block-processing.log 2>&1 + docker logs haf-world-hivesense-postgrest-1 > ${OLDPWD}/docker/hivesense-postgrest.log 2>&1 + docker logs haf-world-hivesense-swagger-1 > ${OLDPWD}/docker/hivesense-swagger.log 2>&1 + docker logs haf-world-hivesense-postgrest-rewriter-1 > ${OLDPWD}/docker/hivesense-postgrest-rewriter.log 2>&1 + docker logs haf-world-hivesense-ollama-1 > ${OLDPWD}/docker/hivesense-ollama.log 2>&1 cd "$OLDPWD" || exit 1 # Create docker directory if it doesn't exist diff --git a/db/main_loop.sql b/db/main_loop.sql index b17b0b445e5bf3fcad4397c8ead92760298814b0..87faad1fd8140046b46c8381ea1e31627e8f3933 100644 --- a/db/main_loop.sql +++ b/db/main_loop.sql @@ -532,6 +532,7 @@ BEGIN EXIT WHEN __todo = 0; RAISE NOTICE 'SCHEDULER: Error -- scheduler was woken up but job queue is not empty...'; RAISE NOTICE 'SCHEDULER: switching to polling...'; + EXIT WHEN NOT continueProcessing(); PERFORM pg_sleep(0.1); --PERFORM pg_sleep(5); END LOOP; @@ -573,6 +574,11 @@ DECLARE _done_key BIGINT := 20_000_000 + _worker; _ack_key BIGINT := 30_000_000 + _worker; BEGIN + -- pgai adds site_packages what ends with a big mess and random problems with + -- lack of python modules for import, to prevent this at the begining + -- site pgai site packages are added at the loop begining + -- PERFORM hivesense_app.pgai_initialize(); + -- by default, postgresql logs when threads are blocked on a lock for more than a second. -- we use locks for synchronization, and expect threads to be blocked for at least 3s -- at a time. Disable that logging to avoid spamming the log file diff --git a/db/ollama.sql b/db/ollama.sql index 5427247d9d5d42666609534a014298028f090917..aa7f2de7f72179bdbc3c61ea2484dd332f6b8d7e 100644 --- a/db/ollama.sql +++ b/db/ollama.sql @@ -166,6 +166,35 @@ AS $BODY$ return embeddings $BODY$; +DROP FUNCTION IF EXISTS hivesense_app.pgai_initialize(); +CREATE FUNCTION hivesense_app.pgai_initialize() + RETURNS void + LANGUAGE plpython3u +AS $BODY$ + if "ai.version" not in GD: + r = plpy.execute( + "SELECT coalesce(current_setting('ai.python_lib_dir', true), " + "'/usr/local/lib/pgai') AS python_lib_dir" + ) + python_lib_dir = r[0]["python_lib_dir"] + from pathlib import Path + import sys, sysconfig, site + if "purelib" in sysconfig.get_path_names() and sysconfig.get_path("purelib") in sys.path: + sys.path.remove(sysconfig.get_path("purelib")) + python_lib_dir = Path(python_lib_dir).joinpath("0.8.0") + site.addsitedir(str(python_lib_dir)) + from ai import __version__ as ai_version + assert("0.8.0" == ai_version) + GD["ai.version"] = "0.8.0" + else: + if GD["ai.version"] != "0.8.0": + plpy.fatal("the pgai extension version has changed. start a new session") +$BODY$; + GRANT EXECUTE ON FUNCTION hivesense_app.ollama_embed(text, hivesense_app.id_and_post_chunk [], text, text, jsonb) TO haf_admin WITH GRANT OPTION; GRANT EXECUTE ON FUNCTION hivesense_app.ollama_embed(text, hivesense_app.id_and_post_chunk [], text, text, jsonb) TO hivesense_user; GRANT EXECUTE ON FUNCTION hivesense_app.ollama_embed(text, hivesense_app.id_and_post_chunk [], text, text, jsonb) TO pg_database_owner WITH GRANT OPTION; + +GRANT EXECUTE ON FUNCTION hivesense_app.pgai_initialize() TO haf_admin WITH GRANT OPTION; +GRANT EXECUTE ON FUNCTION hivesense_app.pgai_initialize() TO hivesense_user; +GRANT EXECUTE ON FUNCTION hivesense_app.pgai_initialize() TO pg_database_owner WITH GRANT OPTION; diff --git a/scripts/ci-helpers/wait-for-hivesense-startup.sh b/scripts/ci-helpers/wait-for-hivesense-startup.sh index 05677b12514802fd283767b5ffa0ade6113673d9..09d69cd238e133725925080c277376082fee4b14 100755 --- a/scripts/ci-helpers/wait-for-hivesense-startup.sh +++ b/scripts/ci-helpers/wait-for-hivesense-startup.sh @@ -3,17 +3,17 @@ set -e wait_for_hivesense_startup() { - COMMAND="SELECT EXISTS (SELECT 1 FROM pg_class WHERE relkind = 'i' AND relname = 'hivensense_vectors_embed_hnsw_idxs' );" + COMMAND="SELECT EXISTS (SELECT 1 FROM pg_class WHERE relkind = 'i' AND ( relname = 'posts_vectors_embedding_half_hnsw' OR relname = 'posts_vectors_embedding_hnsw' ));" MESSAGE="Waiting for Hivesense to finish processing blocks..." HIVEMIND_BLOCK_COMMAND="SELECT last_completed_block_num FROM hivemind_app.hive_state" HAF_BLOCK_COMMAND="SELECT consistent_block FROM hafd.hive_state" - HIVESENSE_BLOCK_COMMAND="SELECT hive.app_get_current_block_num('hivesense_app1')" + HIVESENSE_BLOCK_COMMAND="SELECT hive.app_get_current_block_num('hivesense_app')" i=0 while : do i=$((i+1)) - if [ "$i" -gt 20 ]; then + if [ "$i" -gt 40 ]; then echo "Too long waiting, pending logs dump:" LOCK_DUMP_COMMAND="SELECT diff --git a/submodules/haf b/submodules/haf index 61e4e13a348ea3deac6ac118bfc7b99e23fec0be..2ada090f5bc021133aa4f5131688c59e89d0e8c0 160000 --- a/submodules/haf +++ b/submodules/haf @@ -1 +1 @@ -Subproject commit 61e4e13a348ea3deac6ac118bfc7b99e23fec0be +Subproject commit 2ada090f5bc021133aa4f5131688c59e89d0e8c0 diff --git a/submodules/hivemind b/submodules/hivemind index 1242d377bf0bc6b35ede2567e7a0340f2efdd178..d046afef0b590771ed4bc3c9310e4837d36ca467 160000 --- a/submodules/hivemind +++ b/submodules/hivemind @@ -1 +1 @@ -Subproject commit 1242d377bf0bc6b35ede2567e7a0340f2efdd178 +Subproject commit d046afef0b590771ed4bc3c9310e4837d36ca467 diff --git a/tests/integration/api_node/hivesense_synced_api_node_test.sh b/tests/integration/api_node/hivesense_synced_api_node_test.sh index 31c744aa7123f9516f623a6c9d09d9d23942268d..1d16cf1d8acb6e313fd90c51266207c6bc618b26 100755 --- a/tests/integration/api_node/hivesense_synced_api_node_test.sh +++ b/tests/integration/api_node/hivesense_synced_api_node_test.sh @@ -17,8 +17,8 @@ fi # 2. check number of chunks number_of_chunks=$(query_database "SELECT COUNT(*) FROM hivesense_app.posts_vectors") -if [ "$number_of_chunks" -ne 244 ]; then - echo "Wrong number of chunks ${number_of_chunks} != 244" >&2 +if [ "$number_of_chunks" -ne 194 ]; then + echo "Wrong number of chunks ${number_of_chunks} != 194" >&2 exit 1 fi