Rewrite calculate_account_reputations
I tried few diffrent approaches to the problem - my goal was to reduce number of inserts to account_reputations table (to calculate the reputation change in a query instead of increasing/decreasing value in a table) and speedup sync
- to write a query updating table in a local variable
FOR __reputation_change IN
SELECT ir.author_id, ir.voter_id, ir.rshares, ir.prev_rshares, ir.source_op
FROM unnest(__votes_array) WITH ORDINALITY AS ir
ORDER BY ir.source_op
LOOP
__is_changed := false;
SELECT ir.reputation, ir.is_implicit
INTO __ini_author_rep, __implicit_ini_author_rep
FROM unnest(__initial_reputations) AS ir
WHERE (ir).account_id = __reputation_change.author_id;
__author_rep := __ini_author_rep;
__implicit_author_rep := __implicit_ini_author_rep;
SELECT ir.reputation, ir.is_implicit
INTO __voter_rep, __implicit_voter_rep
FROM unnest(__initial_reputations) AS ir
WHERE (ir).account_id = __reputation_change.voter_id;
-- Reputation reset calculation logic
IF NOT __implicit_author_rep AND __voter_rep >= 0 AND
(__reputation_change.prev_rshares >= 0 OR (__reputation_change.prev_rshares < 0 AND NOT __implicit_voter_rep AND __voter_rep > __author_rep - (__reputation_change.prev_rshares >> 6)::BIGINT)) THEN
__is_changed := true;
__author_rep := __author_rep - (__reputation_change.prev_rshares >> 6)::BIGINT;
__implicit_author_rep := (__author_rep = 0)::BOOLEAN; -- Check if it became 0
IF __reputation_change.voter_id = __reputation_change.author_id THEN -- Same account
__implicit_voter_rep := __implicit_author_rep;
__voter_rep := __author_rep; -- Update voter's rep if same
END IF;
END IF;
-- Update author's reputation based on rshares
IF __voter_rep >= 0 AND
(__reputation_change.rshares >= 0 OR (__reputation_change.rshares < 0 AND NOT __implicit_voter_rep AND __voter_rep > __author_rep)) THEN
__is_changed := true;
__author_rep = __author_rep + (__reputation_change.rshares >> 6)::BIGINT;
END IF;
IF __is_changed THEN
__initial_reputations := array_replace(
__initial_reputations,
(__reputation_change.author_id, __ini_author_rep, __implicit_ini_author_rep, FALSE)::AccountReputation,
(__reputation_change.author_id, __author_rep, __implicit_author_rep, __is_changed)::AccountReputation
);
END IF;
END LOOP;
this calculated reputation correctly but was MUCH slower and in result unusable - I couldn't figure out how can i make it faster.
- recursive query that holds each update on account's reputation - and chosing the 'latest' reputation
join_reputation AS MATERIALIZED (
SELECT
da.account_id,
COALESCE(ar.reputation, 0)::BIGINT,
COALESCE(ar.is_implicit, true)::BOOLEAN
FROM add_row_num_to_acc da
LEFT JOIN account_reputations ar ON da.account_id = ar.account_id
),
rep_change AS MATERIALIZED (
SELECT
uv.author_id,
uv.voter_id,
uv.rshares,
uv.prev_rshares,
ROW_NUMBER() OVER (ORDER BY uv.source_op) AS row_num
FROM check_if_prev_balances_canceled uv
),
--for context ^
WITH RECURSIVE ReputationUpdates AS (
-- Base case: Start with the first row (row_num = 1)
SELECT
(cr.new_rep).account_id AS account_id,
(cr.new_rep).reputation AS reputation,
(cr.new_rep).is_implicit AS is_implicit,
(cr.new_rep).changed AS changed,
cr.row_num
FROM (
SELECT
calculate_account_reputations(
ar.author_id,
ar.voter_id,
a_rep.reputation,
v_rep.reputation,
a_rep.is_implicit,
v_rep.is_implicit,
ar.rshares,
ar.prev_rshares
) AS new_rep,
ar.row_num
FROM rep_change ar
JOIN join_reputation a_rep ON ar.author_id = a_rep.account_id
JOIN join_reputation v_rep ON ar.voter_id = v_rep.account_id
WHERE ar.row_num = 1
) AS cr
UNION ALL
-- Recursive part: Iterate over subsequent rows
SELECT
(cr.new_rep).account_id AS account_id,
(cr.new_rep).reputation AS reputation,
(cr.new_rep).is_implicit AS is_implicit,
(cr.new_rep).changed AS changed,
cr.row_num
FROM (
SELECT
calculate_account_reputations(
ar.author_id,
ar.voter_id,
a_rep.reputation,
v_rep.reputation,
a_rep.is_implicit,
v_rep.is_implicit,
ar.rshares,
ar.prev_rshares
) AS new_rep,
ar.row_num
FROM rep_change ar
JOIN (
SELECT
jr.account_id,
COALESCE(a.reputation, jr.reputation) AS reputation,
COALESCE(a.is_implicit, jr.is_implicit) AS is_implicit
FROM join_reputation jr
JOIN LATERAL (
SELECT
ru.account_id,
ru.reputation,
ru.is_implicit
FROM ReputationUpdates ru
WHERE
ru.account_id = jr.account_id AND
ru.changed AND
ru.row_num < ar.row_num
ORDER BY ru.row_num DESC
LIMIT 1
) a ON TRUE
WHERE jr.account_id = ar.author_id
) a_rep ON a_rep.account_id = ar.author_id
JOIN (
SELECT
jr.account_id,
COALESCE(a.reputation, jr.reputation) AS reputation,
COALESCE(a.is_implicit, jr.is_implicit) AS is_implicit
FROM join_reputation jr
JOIN LATERAL (
SELECT
ru.account_id,
ru.reputation,
ru.is_implicit
FROM ReputationUpdates ru
WHERE
ru.account_id = jr.account_id AND
ru.changed AND
ru.row_num < ar.row_num
ORDER BY ru.row_num DESC
LIMIT 1
) a ON TRUE
WHERE jr.account_id = ar.author_id
) v_rep ON v_rep.account_id = ar.voter_id
) AS cr
WHERE cr.row_num = (SELECT MIN(rp.row_num) FROM rep_change rp WHERE rp.row_num > cr.row_num)
)
Unfortunately recursive query doesn't allow outer joins and multiple queries to recursive table, I counldn't manage to find a way out of it
- update temporary sql table and then inserting the temp table to account_reputations (that wasn't ideal choice - but I wanted to check if it has any impact on performance)
It again - calculated reputations correctly - but few % slower than current develop.
- This MR's version - I ended up optimizing calculate_account_reputations function that ended up 10% faster than develop + i found that cross join is much faster in comparison to extracting values from type in next CTE - overall speedup is 44% on 5m blocks (390s to 270s) - I'm running test on steem-11 to find out how this change impacts performance on full block_log
edit. on steem-11 develop 24,5h, new version 18.5h
Edited by Michal Zander