From 09d5e2d9972c1cd34b30ef122046684da8fe6777 Mon Sep 17 00:00:00 2001 From: Eric Frias Date: Mon, 25 Aug 2025 19:06:32 -0400 Subject: [PATCH 1/3] Optimize dockerfiles for faster builds, better caching --- .dockerignore | 100 ++++++++++++++++++++++++++++++++++++++++ .gitlab-ci.yml | 16 ++++++- Dockerfile.pca | 12 +++-- Dockerfile.syncer | 8 ++-- scripts/build_images.sh | 45 ++++++++++++++++-- 5 files changed, 168 insertions(+), 13 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4c2d28c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,100 @@ +# Git +.git +.gitignore +.gitmodules + +# Submodules - these are huge and not needed for the Docker build +submodules/ + +# CI/CD +.gitlab-ci.yml +.github/ + +# Documentation +*.md +doc/ +docs/ + +# Tests +tests/ +test/ +*_test.go +*_test.py +*.test +coverage/ +.coverage + +# Development files +*.log +*.swp +*.swo +*~ +.DS_Store +.idea/ +.vscode/ +*.iml + +# Python +__pycache__/ +*.py[cod] +*$py.class +.Python +env/ +venv/ +.venv/ +pip-log.txt +.pytest_cache/ +.tox/ + +# Build artifacts +build/ +dist/ +*.egg-info/ +.eggs/ + +# Temporary files +*.tmp +tmp/ +temp/ + +# Large files that shouldn't be in the build context +raw.txt +*.sql.gz +*.tar.gz +*.zip + +# Local configuration +.env +.env.* +config.local.* + +# Database dumps +*.dump +*.sql.backup + +# Docker compose files (not needed in build context) +docker-compose*.yml +compose*.yml + +# Build cache +.cache/ + +# Node modules (if any) +node_modules/ + +# CMake build directories (if any local builds) +cmake-build*/ +CMakeCache.txt +CMakeFiles/ + +# Editor backup files +*.bak +*.orig + +# OS specific +Thumbs.db +.Trash-* + +# Project specific large/unnecessary files +CLAUDE.md +switch_mcp.sh \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2ac4c00..478d0ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -113,8 +113,15 @@ build_images: echo -e "\e[0Ksection_start:$(date +%s):login[collapsed=true]\r\e[0KLogging to Docker registry..." docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY echo -e "\e[0Ksection_end:$(date +%s):login\r\e[0K" + - | + # Enable BuildKit for better caching + export DOCKER_BUILDKIT=1 + echo "DOCKER_BUILDKIT enabled" script: - | + # Export BuildKit for the script + export DOCKER_BUILDKIT=1 + # Build and push with commit SHA tag $CI_PROJECT_DIR/scripts/build_images.sh --push @@ -146,11 +153,18 @@ publish_release_images: echo -e "\e[0Ksection_start:$(date +%s):login[collapsed=true]\r\e[0KLogging to Docker registry..." docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY echo -e "\e[0Ksection_end:$(date +%s):login\r\e[0K" + - | + # Enable BuildKit for better caching + export DOCKER_BUILDKIT=1 + echo "DOCKER_BUILDKIT enabled" script: - | echo "Publishing release images for tag: $CI_COMMIT_TAG" - # Build images with release tag + # Export BuildKit for the script + export DOCKER_BUILDKIT=1 + + # Build images with release tag (will still use cache from develop) $CI_PROJECT_DIR/scripts/build_images.sh --tag="$CI_COMMIT_TAG" --push echo "Successfully published release images with tag: $CI_COMMIT_TAG" diff --git a/Dockerfile.pca b/Dockerfile.pca index 514b30b..96b7beb 100644 --- a/Dockerfile.pca +++ b/Dockerfile.pca @@ -1,5 +1,9 @@ FROM python:3.11-slim +# Set environment variables to reduce image size +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + # Install system dependencies for psycopg2 and numpy RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -9,17 +13,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Set work directory WORKDIR /app -# Install Python dependencies +# Install Python dependencies (before copying scripts for better caching) RUN pip install --no-cache-dir \ psycopg2-binary \ numpy \ scikit-learn \ matplotlib -# Copy your scripts -COPY scripts/run_pca.py /app -COPY scripts/recall.py /app -COPY scripts/convert_embeddings.py /app +# Copy scripts last (changes most frequently) +COPY scripts/run_pca.py scripts/recall.py scripts/convert_embeddings.py /app/ # Default command (can be overridden) CMD ["python", "run_pca.py"] diff --git a/Dockerfile.syncer b/Dockerfile.syncer index 4ece200..1348fa1 100644 --- a/Dockerfile.syncer +++ b/Dockerfile.syncer @@ -12,14 +12,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpq-dev \ && rm -rf /var/lib/apt/lists/* -# Create working directory and copy script +# Create working directory WORKDIR /app -COPY scripts/sync_embeddings.py . -# Install only necessary Python packages +# Install only necessary Python packages (before copying scripts for better caching) RUN pip install --no-cache-dir \ psycopg2-binary \ requests +# Copy script last (changes most frequently) +COPY scripts/sync_embeddings.py . + # Default command CMD ["python", "sync_embeddings.py"] diff --git a/scripts/build_images.sh b/scripts/build_images.sh index 22e8231..41e2ed3 100755 --- a/scripts/build_images.sh +++ b/scripts/build_images.sh @@ -50,10 +50,47 @@ set -eu pipefail SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -docker build -t "registry.gitlab.syncad.com/hive/hivesense:${TAG}" "${SCRIPTPATH}/.." -docker build -t "registry.gitlab.syncad.com/hive/hivesense/postgrest-rewriter:${TAG}" -f "${SCRIPTPATH}/../Dockerfile.rewriter" "${SCRIPTPATH}/.." -docker build -t "registry.gitlab.syncad.com/hive/hivesense/syncer:${TAG}" -f "${SCRIPTPATH}/../Dockerfile.syncer" "${SCRIPTPATH}/.." -docker build -t "registry.gitlab.syncad.com/hive/hivesense/pca:${TAG}" -f "${SCRIPTPATH}/../Dockerfile.pca" "${SCRIPTPATH}/.." +# Enable BuildKit for better caching and performance +export DOCKER_BUILDKIT=1 + +# Try to pull latest images for cache (ignore failures if images don't exist) +echo "Pulling latest images for cache..." +docker pull "registry.gitlab.syncad.com/hive/hivesense:develop" 2>/dev/null || true +docker pull "registry.gitlab.syncad.com/hive/hivesense/postgrest-rewriter:develop" 2>/dev/null || true +docker pull "registry.gitlab.syncad.com/hive/hivesense/syncer:develop" 2>/dev/null || true +docker pull "registry.gitlab.syncad.com/hive/hivesense/pca:develop" 2>/dev/null || true + +# Build with cache-from and inline cache export for registry caching +echo "Building hivesense..." +docker build \ + --cache-from "registry.gitlab.syncad.com/hive/hivesense:develop" \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + -t "registry.gitlab.syncad.com/hive/hivesense:${TAG}" \ + "${SCRIPTPATH}/.." + +echo "Building postgrest-rewriter..." +docker build \ + --cache-from "registry.gitlab.syncad.com/hive/hivesense/postgrest-rewriter:develop" \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + -t "registry.gitlab.syncad.com/hive/hivesense/postgrest-rewriter:${TAG}" \ + -f "${SCRIPTPATH}/../Dockerfile.rewriter" \ + "${SCRIPTPATH}/.." + +echo "Building syncer..." +docker build \ + --cache-from "registry.gitlab.syncad.com/hive/hivesense/syncer:develop" \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + -t "registry.gitlab.syncad.com/hive/hivesense/syncer:${TAG}" \ + -f "${SCRIPTPATH}/../Dockerfile.syncer" \ + "${SCRIPTPATH}/.." + +echo "Building pca..." +docker build \ + --cache-from "registry.gitlab.syncad.com/hive/hivesense/pca:develop" \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + -t "registry.gitlab.syncad.com/hive/hivesense/pca:${TAG}" \ + -f "${SCRIPTPATH}/../Dockerfile.pca" \ + "${SCRIPTPATH}/.." echo "Build images tag ${TAG}" -- GitLab From b7025a0c579accb0dc4ce633a83c342a81b10c2a Mon Sep 17 00:00:00 2001 From: Eric Frias Date: Thu, 28 Aug 2025 12:36:19 -0400 Subject: [PATCH 2/3] Allow projection matrixes to be provided either as a file or a URL which we should download. Also allow it to be compressed. --- .gitlab-ci.yml | 2 - Dockerfile | 1 + scripts/install_app.sh | 88 ++++++++++++++++++++++++--- scripts/matrix_handler.sh | 123 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 10 deletions(-) create mode 100755 scripts/matrix_handler.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 478d0ef..0ba7a4d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -139,8 +139,6 @@ build_images: docker push "registry.gitlab.syncad.com/hive/hivesense/syncer:develop" docker push "registry.gitlab.syncad.com/hive/hivesense/pca:develop" fi - needs: - - job: prepare_haf_image tags: - public-runner-docker diff --git a/Dockerfile b/Dockerfile index 9f66010..447768b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,6 +49,7 @@ USER haf_admin COPY scripts/install_app.sh /app/scripts/install_app.sh COPY scripts/uninstall_app.sh /app/scripts/uninstall_app.sh COPY scripts/process_blocks.sh /app/scripts/process_blocks.sh +COPY scripts/matrix_handler.sh /app/scripts/matrix_handler.sh COPY db /app/db COPY endpoints /app/endpoints COPY docker/scripts/docker-entrypoint.sh /app/docker-entrypoint.sh diff --git a/scripts/install_app.sh b/scripts/install_app.sh index 01abdf9..c7a5bfe 100755 --- a/scripts/install_app.sh +++ b/scripts/install_app.sh @@ -10,6 +10,23 @@ SRCPATH="${SCRIPTPATH}/../" echo "All arguments: $*" +# Function to normalize boolean values to lowercase true/false +normalize_bool() { + case "$(echo "$1" | tr '[:upper:]' '[:lower:]')" in + true|yes|on|1) + echo "true" + ;; + false|no|off|0) + echo "false" + ;; + *) + echo "ERROR: Invalid boolean value: $1" >&2 + echo "Expected: true/false, yes/no, on/off, 1/0 (case insensitive)" >&2 + exit 1 + ;; + esac +} + print_help () { echo "Usage: $0 [OPTION[=VALUE]]..." echo @@ -31,8 +48,8 @@ print_help () { echo " --tokenizer-model=MODEL_NAME The tokenizer model, must be compatible with 'llm'" echo " --tokens_per_chunk=NUMBER The maximum number of tokens to break long posts into" echo " --overlap_amount=NUMBER The percentage of tokens_per_chunk that will be overlapped with the previous chunk (range 0-1, default 0.15)" - echo " --use-halfvec-index=TRUE/FALSE Use HNSW half-precision index (defaults to false)" - echo " --store-halfvec-embeddings=TRUE/FALSE Use HNSW half-precision index (defaults to false)" + echo " --use-halfvec-index=BOOL Use HNSW half-precision index (true/false, yes/no, on/off, 1/0)" + echo " --store-halfvec-embeddings=BOOL Store half-precision embeddings (true/false, yes/no, on/off, 1/0)" echo " --document-prefix=TEXT Prefix for documents (defaults to 'passage: ')" echo " --query-prefix=TEXT Prefix for queries (defaults to 'query: ')" echo " --min-token-threshold=INT Don't generate embeddings for posts with fewer than this number of tokens" @@ -41,7 +58,12 @@ print_help () { echo " --maintenance-work-mem Desired setting of maintenance_work_mem to use while creating the HNSW index" echo " --default-ef-search Default exploratory factor when searching" echo " --minimum-ann-candidates Always consider at least this many candidates for reranking" - echo " --allow-debugging Set to true to enable debugging flags for API calls" + echo " --use-reduced-embeddings=BOOL Enable dimension reduction (true/false, yes/no, on/off, 1/0, case insensitive)" + echo " --reduced-dim=NUMBER Reduced dimension size" + echo " --reduced-matrix-json=PATH Path to reduction matrix JSON file" + echo " --reduced-matrix-url=URL URL to download reduction matrix JSON" + echo " --reduced-matrix-file=PATH Alternative to --reduced-matrix-json for consistency" + echo " --allow-debugging=BOOL Enable debugging flags for API calls (true/false, yes/no, on/off, 1/0)" echo " --help Display this help screen and exit" echo } @@ -74,6 +96,8 @@ MAINTENANCE_WORK_MEM=28 # GB USE_REDUCED_EMBEDDINGS=false REDUCED_DIM=0 # must be set when flag=true REDUCED_MATRIX_JSON="" +REDUCED_MATRIX_URL="" +REDUCED_MATRIX_FILE="" HNSW_M=32 HNSW_EF_CONSTRUCTION=400 DEFAULT_EF_SEARCH=500 @@ -122,10 +146,10 @@ while [ $# -gt 0 ]; do OVERLAP_AMOUNT="${1#*=}" ;; --use-halfvec-index=*) - USE_HALFVEC_INDEX="${1#*=}" + USE_HALFVEC_INDEX=$(normalize_bool "${1#*=}") ;; --store-halfvec-embeddings=*) - STORE_HALFVEC_EMBEDDINGS="${1#*=}" + STORE_HALFVEC_EMBEDDINGS=$(normalize_bool "${1#*=}") ;; --document-prefix=*) DOCUMENT_PREFIX="${1#*=}" @@ -146,7 +170,7 @@ while [ $# -gt 0 ]; do MAINTENANCE_WORK_MEM="${1#*=}" ;; --use-reduced-embeddings=*) - USE_REDUCED_EMBEDDINGS="${1#*=}" + USE_REDUCED_EMBEDDINGS=$(normalize_bool "${1#*=}") ;; --reduced-dim=*) REDUCED_DIM="${1#*=}" @@ -154,6 +178,12 @@ while [ $# -gt 0 ]; do --reduced-matrix-json=*) REDUCED_MATRIX_JSON="${1#*=}" ;; + --reduced-matrix-url=*) + REDUCED_MATRIX_URL="${1#*=}" + ;; + --reduced-matrix-file=*) + REDUCED_MATRIX_FILE="${1#*=}" + ;; --hnsw-m=*) HNSW_M="${1#*=}" ;; @@ -167,7 +197,10 @@ while [ $# -gt 0 ]; do MINIMUM_ANN_CANDIDATES="${1#*=}" ;; --allow-debugging=*) - ALLOW_DEBUGGING="${1#*=}" + ALLOW_DEBUGGING=$(normalize_bool "${1#*=}") + ;; + --config-dir=*) + HIVESENSE_CONFIG_DIR="${1#*=}" ;; --start_block=*) START_BLOCK="${1#*=}" @@ -242,17 +275,56 @@ psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c " " -f "$SRCPATH/db/database_schema.sql" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${HIVESENSE_SCHEMA};" -f "$SRCPATH/db/helpers.sql" +# Handle matrix JSON configuration only if USE_REDUCED_EMBEDDINGS is true if [ "$USE_REDUCED_EMBEDDINGS" = "true" ]; then + # Handle URL download if specified + if [ -n "$REDUCED_MATRIX_URL" ]; then + echo "Handling matrix URL download..." + # Build arguments for matrix_handler.sh + MATRIX_HANDLER_ARGS="--matrix-url=$REDUCED_MATRIX_URL" + if [ -n "${HIVESENSE_CONFIG_DIR:-}" ]; then + MATRIX_HANDLER_ARGS="$MATRIX_HANDLER_ARGS --config-dir=$HIVESENSE_CONFIG_DIR" + fi + + # Source the matrix handler to resolve the path + # shellcheck disable=SC1090,SC2086 + . "$SRCPATH/scripts/matrix_handler.sh" $MATRIX_HANDLER_ARGS + + # Use the resolved path + if [ -n "$HIVESENSE_REDUCED_MATRIX_JSON" ]; then + REDUCED_MATRIX_JSON="$HIVESENSE_REDUCED_MATRIX_JSON" + fi + # Handle file path if specified (REDUCED_MATRIX_FILE takes precedence over REDUCED_MATRIX_JSON for consistency) + elif [ -n "$REDUCED_MATRIX_FILE" ]; then + REDUCED_MATRIX_JSON="$REDUCED_MATRIX_FILE" + fi + + # Now check if we have a matrix file if [ -z "$REDUCED_MATRIX_JSON" ]; then echo "ERROR: --reduced-matrix-json is required when --use-reduced-embeddings=true" exit 1 fi + echo "Loading projection matrix ($REDUCED_MATRIX_JSON)…" - psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on <&2 + exit 1 + ;; + esac + shift +done + +# Function to extract filename from URL +get_filename_from_url() { + url="$1" + # Extract filename from URL, handling query strings + basename "${url%%\?*}" +} + +# Function to download file if not cached +download_if_needed() { + url="$1" + filename="$2" + filepath="${CONFIG_DIR}/${filename}" + + if [ -f "$filepath" ]; then + echo "Matrix file already cached at: $filepath" >&2 + echo "$filepath" + return 0 + fi + + echo "Downloading matrix from: $url" >&2 + echo "Saving to: $filepath" >&2 + + # Create config directory if it doesn't exist + mkdir -p "$CONFIG_DIR" + + # Download with resume support + if curl -L -C - -o "$filepath" "$url" >&2 2>&1; then + echo "Successfully downloaded matrix file" >&2 + echo "$filepath" + return 0 + else + echo "ERROR: Failed to download matrix from $url" >&2 + rm -f "$filepath" # Clean up partial download + return 1 + fi +} + +# Main logic +main() { + # Check that only one option is set + if [ -n "$MATRIX_URL" ] && [ -n "$MATRIX_FILE" ]; then + echo "ERROR: Both HIVESENSE_MATRIX_JSON_URL and HIVESENSE_MATRIX_JSON_FILE are set. Please use only one." >&2 + exit 1 + fi + + if [ -n "$MATRIX_URL" ]; then + # URL mode: download if needed + filename=$(get_filename_from_url "$MATRIX_URL") + if [ -z "$filename" ]; then + echo "ERROR: Could not extract filename from URL: $MATRIX_URL" >&2 + exit 1 + fi + + if ! filepath=$(download_if_needed "$MATRIX_URL" "$filename"); then + exit 1 + fi + + # Export the resolved path for use by install_app.sh + export HIVESENSE_REDUCED_MATRIX_JSON="$filepath" + + elif [ -n "$MATRIX_FILE" ]; then + # File mode: use provided path + if [ ! -f "$MATRIX_FILE" ]; then + echo "ERROR: Matrix file not found at: $MATRIX_FILE" >&2 + exit 1 + fi + + echo "Using local matrix file: $MATRIX_FILE" >&2 + export HIVESENSE_REDUCED_MATRIX_JSON="$MATRIX_FILE" + + else + # No matrix configuration + echo "No matrix JSON configured (neither URL nor FILE specified)" >&2 + export HIVESENSE_REDUCED_MATRIX_JSON="" + fi + + # Output the final path for logging + if [ -n "$HIVESENSE_REDUCED_MATRIX_JSON" ]; then + echo "Matrix JSON path resolved to: $HIVESENSE_REDUCED_MATRIX_JSON" >&2 + + # Check if it's gzipped + case "$HIVESENSE_REDUCED_MATRIX_JSON" in + *.gz) + echo "Matrix file is gzipped, will decompress on-the-fly when loading" >&2 + ;; + esac + fi +} + +# Run main function +main \ No newline at end of file -- GitLab From 6e2ec36c355a9fddc9765721412da2e41d8e2205 Mon Sep 17 00:00:00 2001 From: Eric Frias Date: Fri, 29 Aug 2025 13:06:08 -0400 Subject: [PATCH 3/3] Change approach to autodetect either file or url for matrix --- scripts/install_app.sh | 45 +++++++++++++++++++++++++++++++++------ scripts/matrix_handler.sh | 22 ++++++++++++++----- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/scripts/install_app.sh b/scripts/install_app.sh index c7a5bfe..4674583 100755 --- a/scripts/install_app.sh +++ b/scripts/install_app.sh @@ -60,9 +60,10 @@ print_help () { echo " --minimum-ann-candidates Always consider at least this many candidates for reranking" echo " --use-reduced-embeddings=BOOL Enable dimension reduction (true/false, yes/no, on/off, 1/0, case insensitive)" echo " --reduced-dim=NUMBER Reduced dimension size" - echo " --reduced-matrix-json=PATH Path to reduction matrix JSON file" - echo " --reduced-matrix-url=URL URL to download reduction matrix JSON" - echo " --reduced-matrix-file=PATH Alternative to --reduced-matrix-json for consistency" + echo " --reduced-matrix-source=PATH|URL Path to matrix file or URL to download (auto-detected)" + echo " --reduced-matrix-json=PATH (Deprecated) Path to reduction matrix JSON file" + echo " --reduced-matrix-url=URL (Deprecated) URL to download reduction matrix JSON" + echo " --reduced-matrix-file=PATH (Deprecated) Alternative to --reduced-matrix-json" echo " --allow-debugging=BOOL Enable debugging flags for API calls (true/false, yes/no, on/off, 1/0)" echo " --help Display this help screen and exit" echo @@ -175,6 +176,9 @@ while [ $# -gt 0 ]; do --reduced-dim=*) REDUCED_DIM="${1#*=}" ;; + --reduced-matrix-source=*) + REDUCED_MATRIX_SOURCE="${1#*=}" + ;; --reduced-matrix-json=*) REDUCED_MATRIX_JSON="${1#*=}" ;; @@ -277,9 +281,35 @@ psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${HIVESENSE_S # Handle matrix JSON configuration only if USE_REDUCED_EMBEDDINGS is true if [ "$USE_REDUCED_EMBEDDINGS" = "true" ]; then - # Handle URL download if specified - if [ -n "$REDUCED_MATRIX_URL" ]; then - echo "Handling matrix URL download..." + # Handle new unified matrix source parameter + if [ -n "$REDUCED_MATRIX_SOURCE" ]; then + # Detect if it's a URL or file path + case "$REDUCED_MATRIX_SOURCE" in + http://*|https://*) + echo "Detected URL source: $REDUCED_MATRIX_SOURCE" + MATRIX_HANDLER_ARGS="--matrix-url=$REDUCED_MATRIX_SOURCE" + ;; + *) + echo "Detected file source: $REDUCED_MATRIX_SOURCE" + MATRIX_HANDLER_ARGS="--matrix-file=$REDUCED_MATRIX_SOURCE" + ;; + esac + + if [ -n "${HIVESENSE_CONFIG_DIR:-}" ]; then + MATRIX_HANDLER_ARGS="$MATRIX_HANDLER_ARGS --config-dir=$HIVESENSE_CONFIG_DIR" + fi + + # Source the matrix handler to resolve the path + # shellcheck disable=SC1090,SC2086 + . "$SRCPATH/scripts/matrix_handler.sh" $MATRIX_HANDLER_ARGS + + # Use the resolved path + if [ -n "$HIVESENSE_REDUCED_MATRIX_JSON" ]; then + REDUCED_MATRIX_JSON="$HIVESENSE_REDUCED_MATRIX_JSON" + fi + # Keep backward compatibility with old parameters + elif [ -n "$REDUCED_MATRIX_URL" ]; then + echo "Using deprecated --reduced-matrix-url parameter" # Build arguments for matrix_handler.sh MATRIX_HANDLER_ARGS="--matrix-url=$REDUCED_MATRIX_URL" if [ -n "${HIVESENSE_CONFIG_DIR:-}" ]; then @@ -296,7 +326,10 @@ if [ "$USE_REDUCED_EMBEDDINGS" = "true" ]; then fi # Handle file path if specified (REDUCED_MATRIX_FILE takes precedence over REDUCED_MATRIX_JSON for consistency) elif [ -n "$REDUCED_MATRIX_FILE" ]; then + echo "Using deprecated --reduced-matrix-file parameter" REDUCED_MATRIX_JSON="$REDUCED_MATRIX_FILE" + elif [ -n "$REDUCED_MATRIX_JSON" ]; then + echo "Using deprecated --reduced-matrix-json parameter" fi # Now check if we have a matrix file diff --git a/scripts/matrix_handler.sh b/scripts/matrix_handler.sh index d5ab9a1..9a2729e 100755 --- a/scripts/matrix_handler.sh +++ b/scripts/matrix_handler.sh @@ -91,14 +91,26 @@ main() { export HIVESENSE_REDUCED_MATRIX_JSON="$filepath" elif [ -n "$MATRIX_FILE" ]; then - # File mode: use provided path - if [ ! -f "$MATRIX_FILE" ]; then - echo "ERROR: Matrix file not found at: $MATRIX_FILE" >&2 + # File mode: resolve relative paths to config dir + case "$MATRIX_FILE" in + /*) + # Absolute path - use as-is + RESOLVED_PATH="$MATRIX_FILE" + ;; + *) + # Relative path - resolve to config dir + RESOLVED_PATH="${CONFIG_DIR}/${MATRIX_FILE}" + echo "Resolved relative path to: $RESOLVED_PATH" >&2 + ;; + esac + + if [ ! -f "$RESOLVED_PATH" ]; then + echo "ERROR: Matrix file not found at: $RESOLVED_PATH" >&2 exit 1 fi - echo "Using local matrix file: $MATRIX_FILE" >&2 - export HIVESENSE_REDUCED_MATRIX_JSON="$MATRIX_FILE" + echo "Using local matrix file: $RESOLVED_PATH" >&2 + export HIVESENSE_REDUCED_MATRIX_JSON="$RESOLVED_PATH" else # No matrix configuration -- GitLab