diff --git a/ci/scripts/prepare-stack-data-directory.sh b/ci/scripts/prepare-stack-data-directory.sh index d89605555990dabd2c385462e3c27c2f13190888..4081d13fdf8bf55d8f588995926e06d57af0c858 100755 --- a/ci/scripts/prepare-stack-data-directory.sh +++ b/ci/scripts/prepare-stack-data-directory.sh @@ -7,7 +7,9 @@ HAF_MOUNTPOINT="${1:?"Please pass a valid path for the stack's data directory to echo "Creating HAF's mountpoint at ${HAF_MOUNTPOINT}..." mkdir -p "${HAF_MOUNTPOINT}/blockchain" -mkdir -p "${HAF_MOUNTPOINT}/shared_memory/haf_wal" +mkdir -p "${HAF_MOUNTPOINT}/state/shared_memory" +mkdir -p "${HAF_MOUNTPOINT}/state/haf_wal" +mkdir -p "${HAF_MOUNTPOINT}/state/rocksdb" mkdir -p "${HAF_MOUNTPOINT}/logs/caddy" mkdir -p "${HAF_MOUNTPOINT}/logs/pgbadger" mkdir -p "${HAF_MOUNTPOINT}/logs/postgresql" diff --git a/clear_pool.sh b/clear_pool.sh index f6268c1b042a21e31aca4e1692b2fafcd3fda472..f7510c02f33e1bab399002ab8b874e13efa6711f 100755 --- a/clear_pool.sh +++ b/clear_pool.sh @@ -2,4 +2,4 @@ . ./.env sudo rm -rf ${HAF_LOG_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/logs}/postgresql/* sudo rm -rf ${HAF_DATA_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}}/haf_db_store/* -rm -rf ${HAF_SHM_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory}/shared_memory.bin ${HAF_SHM_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory}/haf_wal/* ${HAF_SHM_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory}/comments-rocksdb-storage +rm -rf ${HAF_SHM_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/state/shared_memory}/shared_memory.bin ${HAF_WAL_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/state/haf_wal}/* ${TOP_LEVEL_DATASET_MOUNTPOINT}/state/rocksdb/* diff --git a/create_zfs_datasets.sh b/create_zfs_datasets.sh index c06cf8110d0fae6b32f02aab5e6515ebac658879..f07c8a2464293fc79c0cd4f39c319762a02f09d5 100755 --- a/create_zfs_datasets.sh +++ b/create_zfs_datasets.sh @@ -88,14 +88,32 @@ zfs_uncompressed_options="-o compression=off" # several sites recommend 8k blocks for optimizing postgres on zfs, but we have found that it # kills compression ratios for haf_block_log, so we've decided to leave it at the default 128k zfs_postgres_options="" # or "-o recordsize=8k -o recordsize=16k", consider also "-o logbias=throughput" +zfs_shared_memory_options="-o primarycache=metadata -o recordsize=64k -o logbias=throughput -o sync=disabled" +# RocksDB optimizations: smaller recordsize for better random I/O, disabled compression (RocksDB handles it), +# and settings to optimize for write-heavy workloads with compaction +zfs_rocksdb_options="-o recordsize=16k -o primarycache=all -o logbias=throughput -o sync=standard -o redundant_metadata=most" +# Blockchain optimizations: larger recordsize for sequential access, metadata caching, and enhanced prefetch +zfs_blockchain_options="-o recordsize=1M -o prefetch=1 -o logbias=latency" + zfs create $zfs_common_options $zfs_compressed_options "${ZPOOL}/${TOP_LEVEL_DATASET}" # create an uncompressed dataset for the blockchain. Blocks in it are already compressed, so won't compress further. -zfs create $zfs_common_options $zfs_uncompressed_options "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" +# Optimized with larger recordsize for sequential access, metadata-only caching, and enhanced prefetch for large files +zfs create $zfs_common_options $zfs_uncompressed_options $zfs_blockchain_options "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" + +# create a dataset for state-related data +zfs create $zfs_common_options $zfs_compressed_options "${ZPOOL}/${TOP_LEVEL_DATASET}/state" # create an uncompressed dataset for the shared_memory.bin file and WAL. -# AFAIK we haven't done studies on whether compression helps shared_memory.bin. -zfs create $zfs_common_options $zfs_uncompressed_options "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" +# Optimized for memory-mapped files with reduced latency and tuned for random access patterns +zfs create $zfs_common_options $zfs_uncompressed_options $zfs_shared_memory_options "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" + +# create an optimized dataset for RocksDB with settings tuned for LSM-tree databases: +# - smaller recordsize (16k) for better random I/O performance +# - full caching for both data and metadata to improve read performance +# - logbias=throughput to optimize for write-heavy workloads with compaction +# - redundant_metadata=most for better resilience during power loss +zfs create $zfs_common_options $zfs_uncompressed_options $zfs_rocksdb_options "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" # create an unmountable dataset to serve as the parent for pgdata & tablespaces zfs create $zfs_common_options $zfs_compressed_options -o canmount=off "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store" @@ -116,7 +134,7 @@ zfs create $zfs_common_options $zfs_compressed_options $zfs_postgres_options -o zfs create $zfs_common_options $zfs_compressed_options -o canmount=on "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" # needs to exist to be bind-mounted, no real reason to make it a dataset of its own though -mkdir -p "$TOP_LEVEL_DATASET_MOUNTPOINT/shared_memory/haf_wal" +mkdir -p "$TOP_LEVEL_DATASET_MOUNTPOINT/state/haf_wal" # 1000:100 is hived:users inside the container chown -R 1000:100 "$TOP_LEVEL_DATASET_MOUNTPOINT" diff --git a/haf_base.yaml b/haf_base.yaml index 1b86cd666227f7645cebde4d4235f60f39a95f8f..6d7688c46da9563c2c9f9520a686c8998a71219b 100644 --- a/haf_base.yaml +++ b/haf_base.yaml @@ -4,6 +4,11 @@ services: - core - block-explorer-uninstall image: ${HAF_IMAGE:-${HIVE_API_NODE_REGISTRY:-registry.hive.blog}/haf}:${HAF_VERSION:-${HIVE_API_NODE_VERSION}} + privileged: true + cap_add: [SYS_ADMIN] + security_opt: + - apparmor=unconfined + - label=disable networks: - haf-network tty: true @@ -102,11 +107,18 @@ services: source: ${HAF_LOG_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/logs}/postgresql target: /home/hived/postgresql_logs/ - type: bind - source: ${HAF_SHM_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory}/ - target: /home/hived/shm_dir + source: ${HAF_SHM_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/state/shared_memory}/ + target: /home/hived/state/shm_dir - type: bind - source: ${HAF_WAL_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory/haf_wal} - target: /home/hived/wal_dir + source: ${HAF_WAL_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/state/haf_wal} + target: /home/hived/state/wal_dir + - type: bind + source: ${HAF_WAL_DIRECTORY:-${TOP_LEVEL_DATASET_MOUNTPOINT}/state/rocksdb} + target: /home/hived/state/rocksdb + - type: bind + source: /proc/sys/vm + target: /host-proc/sys/vm + read_only: false shm_size: 8gb healthcheck: test: ["CMD-SHELL","curl -f 127.0.0.1:8091"] diff --git a/make_ramdisk.sh b/make_ramdisk.sh index cbcd3b51164f0c91ce01dd6bdfef2366546e3949..8f574913b718a9cd043b0b0b739c4d5dd187bed3 100755 --- a/make_ramdisk.sh +++ b/make_ramdisk.sh @@ -1,4 +1,4 @@ #another braindead script for me sudo mkdir -p /mnt/haf_shared_mem -sudo mount -t tmpfs -o size=26G ramfs /mnt/haf_shared_mem +sudo mount -t tmpfs -o size=12G ramfs /mnt/haf_shared_mem sudo chmod 777 /mnt/haf_shared_mem diff --git a/rollback_zfs_datasets.sh b/rollback_zfs_datasets.sh index f2360fc80f76c1ffb318db3d536bc808c63b5ec7..d9dedafc5c23ea391eb2176257c136a4dc48232c 100755 --- a/rollback_zfs_datasets.sh +++ b/rollback_zfs_datasets.sh @@ -94,7 +94,9 @@ check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/haf_db_store/pgdat check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/haf_db_store/tablespace" check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/logs" check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/blockchain" -check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory" +check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/state/shared_memory" +check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/state/rocksdb" +check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/state" check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}" echo "All datasets appear unmountable" @@ -128,7 +130,9 @@ unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/pgdata" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" -unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" +unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" +unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" +unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/state" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}" rollback() { @@ -138,7 +142,9 @@ rollback() { } rollback "${ZPOOL}/${TOP_LEVEL_DATASET}" -rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" +rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/state" +rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" +rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" rollback "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace" @@ -152,7 +158,9 @@ remount() { } remount "${ZPOOL}/${TOP_LEVEL_DATASET}" -remount "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" +remount "${ZPOOL}/${TOP_LEVEL_DATASET}/state" +remount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" +remount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace" @@ -160,7 +168,9 @@ remount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/pgdata" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/pgdata/pg_wal" zfs list "${ZPOOL}/${TOP_LEVEL_DATASET}" \ - "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" \ + "${ZPOOL}/${TOP_LEVEL_DATASET}/state" \ + "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" \ + "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" \ "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" \ "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" \ "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace" \ diff --git a/snapshot_zfs_datasets.sh b/snapshot_zfs_datasets.sh index 8626b8d6d1260ce21724191962111f4d7ef8ce3c..e06a98a1201bf65a6b29c75eb63fa7785bce4e9a 100755 --- a/snapshot_zfs_datasets.sh +++ b/snapshot_zfs_datasets.sh @@ -132,7 +132,9 @@ check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/haf_db_store/pgdat check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/haf_db_store/tablespace" check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/logs" check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/blockchain" -check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory" +check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/state/shared_memory" +check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/state/rocksdb" +check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}/state" check_dataset_is_unmountable "${TOP_LEVEL_DATASET_MOUNTPOINT}" if [ ! -z "${SWAP_LOGS_DATASET}" ]; then check_dataset_is_unmountable "${SWAP_LOGS_DATASET}" @@ -141,12 +143,12 @@ fi echo "All datasets appear unmountable" if [ "$SNAPSHOT_NAME" != "empty" ]; then - if [ ! -e "${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory/shared_memory.bin" ]; then - echo "Warning: No shared memory file found in the shared_memory directory" + if [ ! -e "${TOP_LEVEL_DATASET_MOUNTPOINT}/state/shared_memory/shared_memory.bin" ]; then + echo "Warning: No shared memory file found in the state/shared_memory directory" exit 1 fi - last_shared_memory_write=$(stat -c %Y "${TOP_LEVEL_DATASET_MOUNTPOINT}/shared_memory/shared_memory.bin") + last_shared_memory_write=$(stat -c %Y "${TOP_LEVEL_DATASET_MOUNTPOINT}/state/shared_memory/shared_memory.bin") last_blockchain_write=$(find "${TOP_LEVEL_DATASET_MOUNTPOINT}/blockchain" -type f -printf '%T@\n' | sort -n | tail -1 | cut -d. -f1) if [ -z "$last_blockchain_write" ]; then @@ -213,7 +215,9 @@ unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/pgdata" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" -unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" +unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" +unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" +unmount "${ZPOOL}/${TOP_LEVEL_DATASET}/state" unmount "${ZPOOL}/${TOP_LEVEL_DATASET}" if [ ! -z "${SWAP_LOGS_DATASET}" ]; then @@ -241,7 +245,9 @@ remount() { } remount "${ZPOOL}/${TOP_LEVEL_DATASET}" -remount "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory" +remount "${ZPOOL}/${TOP_LEVEL_DATASET}/state" +remount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory" +remount "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/logs" remount "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace" @@ -259,7 +265,9 @@ fi zfs list "${ZPOOL}/${TOP_LEVEL_DATASET}@${SNAPSHOT_NAME}" \ - "${ZPOOL}/${TOP_LEVEL_DATASET}/shared_memory@${SNAPSHOT_NAME}" \ + "${ZPOOL}/${TOP_LEVEL_DATASET}/state@${SNAPSHOT_NAME}" \ + "${ZPOOL}/${TOP_LEVEL_DATASET}/state/shared_memory@${SNAPSHOT_NAME}" \ + "${ZPOOL}/${TOP_LEVEL_DATASET}/state/rocksdb@${SNAPSHOT_NAME}" \ "${ZPOOL}/${TOP_LEVEL_DATASET}/blockchain@${SNAPSHOT_NAME}" \ "${SWAP_LOGS_DATASET:-${ZPOOL}/${TOP_LEVEL_DATASET}/logs}@${SNAPSHOT_NAME}" \ "${ZPOOL}/${TOP_LEVEL_DATASET}/haf_db_store/tablespace@${SNAPSHOT_NAME}" \