From 4d9e88b58f367fc9a530ef56ffcc5c91a3d3a15f Mon Sep 17 00:00:00 2001 From: Michal Zander <mzander@syncad.com> Date: Fri, 14 Feb 2025 08:37:01 +0000 Subject: [PATCH] Revamp reptracker processing to utilize native tables instead of Hive indexes --- db/backend.sql | 75 +++++++++++ db/database_schema.sql | 21 +++ db/process_block_range.sql | 269 +++++++++++++++++++++++-------------- db/rep_helpers.sql | 2 + db/rep_views.sql | 37 ----- scripts/install_app.sh | 4 +- 6 files changed, 271 insertions(+), 137 deletions(-) create mode 100644 db/backend.sql delete mode 100644 db/rep_views.sql diff --git a/db/backend.sql b/db/backend.sql new file mode 100644 index 0000000..3d90e54 --- /dev/null +++ b/db/backend.sql @@ -0,0 +1,75 @@ +SET ROLE reptracker_owner; + +DROP TYPE IF EXISTS effective_vote_return CASCADE; +CREATE TYPE effective_vote_return AS +( + author TEXT, + voter TEXT, + permlink TEXT, + rshares BIGINT +); + +CREATE OR REPLACE FUNCTION process_vote_impacting_operations(IN _operation_body JSONB, IN _op_type_id INT) +RETURNS effective_vote_return +LANGUAGE plpgsql +STABLE +AS +$BODY$ +BEGIN + RETURN ( + CASE + WHEN _op_type_id = 72 THEN + process_effective_vote_operation(_operation_body) + + ELSE + process_deleted_comment_operation(_operation_body) + END + ); + +END; +$BODY$; + +--72 +CREATE OR REPLACE FUNCTION process_effective_vote_operation(IN _operation_body JSONB) +RETURNS effective_vote_return +LANGUAGE 'plpgsql' STABLE +AS +$$ +BEGIN + RETURN ( + ((_operation_body)->'value'->>'author')::TEXT, + ((_operation_body)->'value'->>'voter')::TEXT, + ((_operation_body)->'value'->>'permlink')::TEXT, + ( + CASE jsonb_typeof(_operation_body -> 'value' -> 'rshares') + WHEN 'number' THEN + (_operation_body -> 'value' -> 'rshares')::BIGINT + WHEN 'string' THEN + trim(both '"' FROM (_operation_body -> 'value' ->> 'rshares'))::BIGINT + ELSE + NULL::BIGINT + END + ) + )::effective_vote_return; + +END +$$; + +--17,61 +CREATE OR REPLACE FUNCTION process_deleted_comment_operation(IN _operation_body JSONB) +RETURNS effective_vote_return +LANGUAGE 'plpgsql' STABLE +AS +$$ +BEGIN + RETURN ( + ((_operation_body)->'value'->>'author')::TEXT, + NULL, + ((_operation_body)->'value'->>'permlink')::TEXT, + NULL + )::effective_vote_return; + +END +$$; + +RESET ROLE; diff --git a/db/database_schema.sql b/db/database_schema.sql index 33583d1..f596168 100644 --- a/db/database_schema.sql +++ b/db/database_schema.sql @@ -49,6 +49,27 @@ CREATE TABLE IF NOT EXISTS account_reputations PERFORM hive.app_register_table( __schema_name, 'account_reputations', __schema_name ); +CREATE TABLE IF NOT EXISTS permlinks +( + permlink_id SERIAL PRIMARY KEY, + permlink TEXT UNIQUE +); + +PERFORM hive.app_register_table( __schema_name, 'permlinks', __schema_name ); + +CREATE TABLE IF NOT EXISTS active_votes +( + author_id INT NOT NULL, + voter_id INT NOT NULL, + permlink_serial_id INT NOT NULL, + rshares BIGINT NOT NULL, + + CONSTRAINT pk_active_votes PRIMARY KEY (author_id, permlink_serial_id, voter_id), + CONSTRAINT fk_active_votes_permlink FOREIGN KEY (permlink_serial_id) REFERENCES permlinks (permlink_id) deferrable +); + +PERFORM hive.app_register_table( __schema_name, 'active_votes', __schema_name ); + DROP TYPE IF EXISTS AccountReputation CASCADE; CREATE TYPE AccountReputation AS ( diff --git a/db/process_block_range.sql b/db/process_block_range.sql index 5ba68e9..4b87184 100644 --- a/db/process_block_range.sql +++ b/db/process_block_range.sql @@ -13,84 +13,188 @@ SET join_collapse_limit = 16 SET jit = OFF AS $BODY$ DECLARE - _result INT; + __rep_change INT; + __delete_votes INT; + __upsert_votes INT; BEGIN - -WITH select_ef_vote_ops AS MATERIALIZED -( -SELECT o.id, - o.block_num, - o.trx_in_block, - o.op_pos, - o.body_binary::JSONB as body -FROM operations_view o WHERE o.op_type_id = 72 -AND o.block_num BETWEEN _first_block_num AND _last_block_num ), -selected_range AS MATERIALIZED -( -SELECT - o.id, - o.block_num, - o.body->'value'->>'author' AS author, - o.body->'value'->>'voter' AS voter, - o.body->'value'->>'permlink' AS permlink, - (CASE WHEN jsonb_typeof(o.body->'value'->'rshares') = 'number' THEN - (o.body->'value'->'rshares')::bigint - ELSE - TRIM(BOTH '"'::text FROM o.body->'value'->>'rshares')::bigint - END) AS rshares -FROM select_ef_vote_ops o +--------------------------------------------------------------------------------------- +WITH vote_operations AS ( + SELECT + process_vote_impacting_operations(ov.body, ov.op_type_id) AS effective_votes, + ov.op_type_id, + ov.id AS source_op + FROM operations_view ov + WHERE ov.op_type_id IN (72, 17, 61) + AND ov.block_num BETWEEN _first_block_num AND _last_block_num +), +prepare_vote_comment_data AS MATERIALIZED ( + SELECT + (SELECT ha.id FROM accounts_view ha WHERE ha.name = (vo.effective_votes).author) AS author_id, + (SELECT ha.id FROM accounts_view ha WHERE ha.name = (vo.effective_votes).voter) AS voter_id, + (vo.effective_votes).permlink AS permlink, + (vo.effective_votes).rshares AS rshares, + vo.source_op, + vo.op_type_id + FROM vote_operations vo ), -filtered_range AS MATERIALIZED -( -SELECT - up.id AS up_id, - up.block_num, - up.author, - up.permlink, - up.voter, - up.rshares AS up_rshares, - COALESCE(( - SELECT prd.rshares - FROM hive_reputation_data_view prd - WHERE - prd.author = up.author AND - prd.voter = up.voter AND - prd.permlink = up.permlink AND - prd.id < up.id AND - NOT EXISTS (SELECT NULL FROM deleted_comment_operation_view dp - WHERE dp.author = up.author and dp.permlink = up.permlink and dp.id between prd.id and up.id) - ORDER BY prd.id DESC LIMIT 1 - ), 0) AS prev_rshares -FROM selected_range up +--------------------------------------------------------------------------------------- +-- Insert currently processed permlinks and reuse it in the following steps +supplement_permlink_dictionary AS ( + INSERT INTO permlinks AS dict + (permlink) + SELECT DISTINCT permlink + FROM prepare_vote_comment_data + ON CONFLICT (permlink) DO UPDATE SET + permlink = EXCLUDED.permlink + RETURNING (xmax = 0) as is_new_permlink, dict.permlink_id, dict.permlink +), +prev_votes_in_query AS ( + SELECT + ja.author_id, + ja.voter_id, + sp.permlink_id, + ja.rshares, + ja.source_op, + ja.op_type_id + FROM prepare_vote_comment_data ja + JOIN supplement_permlink_dictionary sp ON ja.permlink = sp.permlink +), +ranked_data AS MATERIALIZED ( + SELECT + author_id, + voter_id, + permlink_id, + rshares, + source_op, + op_type_id, + ROW_NUMBER() OVER (PARTITION BY author_id, voter_id, permlink_id ORDER BY source_op DESC) AS row_num + FROM prev_votes_in_query ), -balance_change AS MATERIALIZED -( - SELECT calculate_account_reputations( - ja.up_id, - ja.block_num, - ja.author, - (SELECT ha.id FROM hive.accounts_view ha WHERE ha.name = ja.author), - ja.permlink, - ja.voter, - (SELECT hv.id FROM hive.accounts_view hv WHERE hv.name = ja.voter), - ja.up_rshares, - ja.prev_rshares) - FROM filtered_range ja - ORDER BY ja.up_id +-- Prepare resets for reputation calculation +join_permlink_id_to_deletes AS MATERIALIZED ( + SELECT + author_id, + permlink_id, + source_op, + row_num + FROM ranked_data + WHERE op_type_id != 72 +), +add_prev_votes AS ( + SELECT + current.author_id, + current.voter_id, + current.permlink_id, + current.rshares, + current.source_op, + previous.rshares AS prev_rshares, + COALESCE(previous.source_op, 0) AS prev_source_op, + current.row_num + FROM ranked_data current + LEFT JOIN ranked_data previous ON + current.author_id = previous.author_id AND + current.voter_id = previous.voter_id AND + current.permlink_id = previous.permlink_id AND + current.op_type_id = previous.op_type_id AND + current.row_num = previous.row_num - 1 + WHERE current.op_type_id = 72 +), +-- Link previous votes +find_prev_votes_in_table AS ( + SELECT + q.author_id, + q.voter_id, + q.permlink_id, + q.rshares, + COALESCE(q.prev_rshares, av.rshares, 0) AS prev_rshares, + q.source_op, + q.prev_source_op, + q.row_num + FROM add_prev_votes q + LEFT JOIN active_votes av ON + q.prev_rshares IS NULL AND + q.author_id = av.author_id AND + q.voter_id = av.voter_id AND + q.permlink_id = av.permlink_serial_id +), +-- Check and reset previous rshares +check_if_prev_balances_canceled AS ( + SELECT + ja.author_id, + ja.voter_id, + ja.permlink_id, + ja.rshares, + ja.source_op, + CASE + WHEN ja.prev_rshares != 0 AND NOT EXISTS ( + SELECT NULL + FROM join_permlink_id_to_deletes dp + WHERE + dp.author_id = ja.author_id AND + dp.permlink_id = ja.permlink_id AND + dp.source_op BETWEEN ja.prev_source_op AND ja.source_op + LIMIT 1 + ) THEN ja.prev_rshares + ELSE 0 + END AS prev_rshares, + ja.row_num + FROM find_prev_votes_in_table ja +), +--------------------------------------------------------------------------------------- +rep_change AS ( + SELECT + calculate_account_reputations( + uv.author_id, + uv.voter_id, + uv.rshares, + uv.prev_rshares + ) + FROM check_if_prev_balances_canceled uv + ORDER BY uv.source_op +), +delete_votes AS ( + DELETE FROM active_votes av + USING join_permlink_id_to_deletes dp + WHERE + av.author_id = dp.author_id AND + av.permlink_serial_id = dp.permlink_id AND + dp.row_num = 1 + RETURNING av.author_id, av.permlink_serial_id +), +upsert_votes AS ( + INSERT INTO active_votes AS av + (author_id, voter_id, permlink_serial_id, rshares) + SELECT + author_id, + voter_id, + permlink_id, + rshares + FROM ranked_data uv + WHERE + uv.row_num = 1 AND + uv.op_type_id = 72 AND + NOT EXISTS ( + SELECT NULL + FROM join_permlink_id_to_deletes dv + WHERE dv.author_id = uv.author_id AND dv.permlink_id = uv.permlink_id AND dv.source_op > uv.source_op + LIMIT 1 + ) + ON CONFLICT ON CONSTRAINT pk_active_votes DO UPDATE SET + rshares = EXCLUDED.rshares + RETURNING av.author_id, av.voter_id, av.permlink_serial_id ) -SELECT COUNT(*) FROM balance_change INTO _result; +SELECT + (SELECT count(*) FROM rep_change) AS rep_change, + (SELECT count(*) FROM delete_votes) AS delete_votes, + (SELECT count(*) FROM upsert_votes) AS upsert_votes +INTO __rep_change, __delete_votes, __upsert_votes; END $BODY$; CREATE OR REPLACE FUNCTION calculate_account_reputations( - _id BIGINT, - _block_num INT, - _author TEXT, _author_id INT, - _permlink TEXT, - _voter TEXT, _voter_id INT, _rshares BIGINT, _prev_rshares BIGINT @@ -139,16 +243,6 @@ __voter_rep := __account_reputations[2].reputation; __implicit_author_rep := __account_reputations[1].is_implicit; __implicit_voter_rep := __account_reputations[2].is_implicit; -IF __debug_log THEN - raise notice 'Block: % - Preprocessing a vote: author: %, voter: % permlink: %', _block_num, _author, _voter, _permlink; - - --- Author must have set explicit reputation to allow its correction -IF NOT __implicit_author_rep AND __prev_rshares != 0 THEN - raise notice 'Author % reputation (pre-correction): %', _author, __author_rep; - raise notice 'Author % - Correcting a vote: (voter: %) rshares: %', _author, _voter, __prev_rshares; - raise notice 'Author % - Voter % reputation: %', _author, _voter, __voter_rep; - END IF; -END IF; --- Author must have set explicit reputation to allow its correction --- Voter must have explicitly set reputation to match hived old conditions @@ -165,19 +259,6 @@ __implicit_author_rep := __author_rep = 0; __account_reputations[1] := ROW(_author_id, __author_rep, __implicit_author_rep, true)::AccountReputation; -IF __debug_log THEN - IF __implicit_author_rep THEN - raise notice 'Author % reputation (past-correction): implicit-0', _author; - ELSE - raise notice 'Author % reputation (past-correction): %', _author, __author_rep; - END IF; - END IF; - -END IF; - -IF __debug_log THEN - raise notice 'Block: % - Author % - Processing a vote: (voter: %) rshares: %', _block_num, _author, _voter, __rshares; - raise notice 'Author % - Voter % reputation: %', _author, _voter, __voter_rep; END IF; IF __voter_rep >= 0 AND (__rshares >= 0 OR (__rshares < 0 AND NOT __implicit_voter_rep AND __voter_rep > __author_rep)) THEN @@ -185,14 +266,6 @@ IF __voter_rep >= 0 AND (__rshares >= 0 OR (__rshares < 0 AND NOT __implicit_vot __new_author_rep = __author_rep + (__rshares >> 6)::BIGINT; __account_reputations[1] := ROW(_author_id, __new_author_rep, false, true)::AccountReputation; -IF __debug_log THEN - IF __implicit_author_rep THEN - raise notice 'Setting a reputation of author: % to %', _author, __new_author_rep; - ELSE - raise notice 'Changing reputation of author: % from % to %', _author, __author_rep, __new_author_rep; - END IF; - END IF; - END IF; INSERT INTO account_reputations diff --git a/db/rep_helpers.sql b/db/rep_helpers.sql index bfbbcf1..2a5871a 100644 --- a/db/rep_helpers.sql +++ b/db/rep_helpers.sql @@ -117,6 +117,8 @@ BEGIN CALL reptracker_massive_processing(_block_range.first_block, _block_range.last_block, _logs); PERFORM hive.app_request_table_vacuum('reptracker_app.account_reputations', interval '10 minutes'); --eventually fixup hard-coded schema name + PERFORM hive.app_request_table_vacuum('reptracker_app.active_votes', interval '10 minutes'); --eventually fixup hard-coded schema nam + PERFORM hive.app_request_table_vacuum('reptracker_app.permlinks', interval '10 minutes'); --eventually fixup hard-coded schema name RETURN; END IF; diff --git a/db/rep_views.sql b/db/rep_views.sql deleted file mode 100644 index 9cabba6..0000000 --- a/db/rep_views.sql +++ /dev/null @@ -1,37 +0,0 @@ -SET ROLE reptracker_owner; - -CREATE OR REPLACE VIEW deleted_comment_operation_view AS -SELECT - o.id, - o.block_num, - o.trx_in_block, - o.op_pos, - (o.body::jsonb -> 'value') ->> 'author' AS author, - (o.body::jsonb -> 'value') ->> 'permlink' AS permlink -FROM operations_view o -WHERE o.op_type_id in (17, 61); -- include delete_comment_operation and comment_payout_update_operation - -CREATE OR REPLACE VIEW hive_reputation_data_view AS -SELECT - o.id, - o.block_num, - o.trx_in_block, - o.op_pos, - (o.body::jsonb -> 'value' ->> 'author') as author, - (o.body::jsonb -> 'value' ->> 'voter') as voter, - (o.body::jsonb -> 'value' ->> 'permlink') as permlink, - ( - CASE jsonb_typeof(o.body::jsonb -> 'value' -> 'rshares') - WHEN 'number' THEN - (o.body::jsonb -> 'value' -> 'rshares')::BIGINT - WHEN 'string' THEN - trim(both '"' FROM (o.body::jsonb -> 'value' ->> 'rshares'))::BIGINT - ELSE - NULL::BIGINT - END - ) AS rshares - -FROM operations_view o -WHERE o.op_type_id = 72; - -RESET ROLE; diff --git a/scripts/install_app.sh b/scripts/install_app.sh index d5d04e0..84d2619 100755 --- a/scripts/install_app.sh +++ b/scripts/install_app.sh @@ -86,7 +86,7 @@ POSTGRES_ACCESS=${POSTGRES_URL:-"postgresql://$POSTGRES_USER@$POSTGRES_HOST:$POS psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -f "$SRCPATH/db/builtin_roles.sql" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET ROLE reptracker_owner;CREATE SCHEMA IF NOT EXISTS ${REPTRACKER_SCHEMA} AUTHORIZATION reptracker_owner;" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET custom.is_forking = '$IS_FORKING'; SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/database_schema.sql" - psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/rep_views.sql" + psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/backend.sql" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/process_block_range.sql" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/rep_helpers.sql" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/main_loop.sql" @@ -101,5 +101,5 @@ POSTGRES_ACCESS=${POSTGRES_URL:-"postgresql://$POSTGRES_USER@$POSTGRES_HOST:$POS psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET ROLE reptracker_owner;GRANT SELECT ON ALL TABLES IN SCHEMA ${REPTRACKER_SCHEMA} TO reptracker_user;" psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET ROLE reptracker_owner;GRANT SELECT ON ALL TABLES IN SCHEMA reptracker_endpoints TO reptracker_user;" #register indexes - psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/rep_indexes.sql" + #psql "$POSTGRES_ACCESS" -v ON_ERROR_STOP=on -c "SET SEARCH_PATH TO ${REPTRACKER_SCHEMA};" -f "$SRCPATH/db/rep_indexes.sql" -- GitLab