From 2ada090f5bc021133aa4f5131688c59e89d0e8c0 Mon Sep 17 00:00:00 2001 From: Marcin Ickiewicz Date: Tue, 27 May 2025 09:52:15 +0200 Subject: [PATCH] add python packages required by hivesense --- Dockerfile | 8 ++++---- scripts/ci-helpers/build_ci_base_image.sh | 2 +- scripts/setup_ubuntu.sh | 25 ++++++++++++++++++++++- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5d72d498b..f6acc0760 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ # docker buildx build --progress=plain --target=ci-base-image --tag registry.gitlab.syncad.com/hive/haf/ci-base-image$CI_IMAGE_TAG --file Dockerfile . # To be started from cloned haf source directory. ARG CI_REGISTRY_IMAGE=registry.gitlab.syncad.com/hive/haf/ -ARG CI_IMAGE_TAG=ubuntu24.04-3 +ARG CI_IMAGE_TAG=ubuntu24.04-4 ARG BUILD_IMAGE_TAG ARG IMAGE_TAG_PREFIX @@ -36,7 +36,7 @@ RUN apt-get update && \ apt-get remove -y gnupg && \ apt-get autoremove -y && \ busybox --install -s && \ - python3.12 -m pip install --break-system-packages langchain && \ + bash -x ./scripts/setup_ubuntu.sh --ai && \ rm -rf /var/lib/apt/lists/* # change the UID and GID to match the ones postgres is assigned in our non-minimal runtime @@ -114,7 +114,7 @@ RUN <<-EOF sudo chown -R hived "${INSTALLATION_DIR}/"* EOF -FROM registry.gitlab.syncad.com/hive/haf/minimal-runtime:ubuntu24.04-3 AS instance +FROM registry.gitlab.syncad.com/hive/haf/minimal-runtime:ubuntu24.04-4 AS instance ARG BUILD_HIVE_TESTNET=OFF ENV BUILD_HIVE_TESTNET=${BUILD_HIVE_TESTNET} @@ -128,7 +128,7 @@ ENV HIVE_CONVERTER_BUILD=${HIVE_CONVERTER_BUILD} ARG HIVE_LINT=OFF ENV HIVE_LINT=${HIVE_LINT} -ENV BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-:ubuntu24.04-3} +ENV BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-:ubuntu24.04-4} ARG P2P_PORT=2001 ENV P2P_PORT=${P2P_PORT} diff --git a/scripts/ci-helpers/build_ci_base_image.sh b/scripts/ci-helpers/build_ci_base_image.sh index 76e071cdd..d05343253 100755 --- a/scripts/ci-helpers/build_ci_base_image.sh +++ b/scripts/ci-helpers/build_ci_base_image.sh @@ -1,7 +1,7 @@ #! /bin/bash REGISTRY=${1:-registry.gitlab.syncad.com/hive/haf} -CI_IMAGE_TAG=ubuntu24.04-3 +CI_IMAGE_TAG=ubuntu24.04-4 # exit when any command fails set -e diff --git a/scripts/setup_ubuntu.sh b/scripts/setup_ubuntu.sh index 3084a7883..652f3cf46 100755 --- a/scripts/setup_ubuntu.sh +++ b/scripts/setup_ubuntu.sh @@ -42,7 +42,8 @@ install_ai_packages() { curl # required by Hivesense as pgai - python3.12 -m pip install --break-system-packages langchain + #python3.12 -m pip install --break-system-packages langchain spacy transformers beautifulsoup4 lxml pysbd + #python3.12 -m pip install --break-system-packages "xx_sent_ud_sm @ https://sourceforge.net/projects/spacy-models.mirror/files/xx_sent_ud_sm-3.8.0/xx_sent_ud_sm-3.8.0-py3-none-any.whl" pushd /tmp git clone https://github.com/timescale/pgai.git --branch extension-0.8.0 @@ -57,6 +58,28 @@ install_ai_packages() { rm -r pgai popd + python3.12 -m pip install -t /usr/local/lib/pgai/0.8.0/ langchain spacy transformers beautifulsoup4 lxml pysbd huggingface_hub + python3.12 -m pip install -t /usr/local/lib/pgai/0.8.0/ "xx_sent_ud_sm @ https://sourceforge.net/projects/spacy-models.mirror/files/xx_sent_ud_sm-3.8.0/xx_sent_ud_sm-3.8.0-py3-none-any.whl" + + mkdir -p /home/hived/tokenizer-files + cat << EOF > /tmp/download-tokenizer-files.py +import sys +sys.path.insert(0, "/usr/local/lib/pgai/0.8.0/") + +from huggingface_hub import snapshot_download + +snapshot_download( + repo_id="intfloat/multilingual-e5-base", + local_dir="/home/hived/tokenizer-files/e5-base", + allow_patterns=[ + "tokenizer.json" + ] +) +EOF + python3 /tmp/download-tokenizer-files.py + rm /tmp/download-tokenizer-files.py + chown -R hived.users /home/hived/tokenizer-files + apt-get clean rm -rf /var/lib/apt/lists/* rm -rf /root/.cache ~/.cache /tmp/* /var/tmp/* -- GitLab