Skip to content
Snippets Groups Projects

Rewrite calculate_account_reputations

Merged Michal Zander requested to merge mzander/one-insert-2 into develop

I tried few diffrent approaches to the problem - my goal was to reduce number of inserts to account_reputations table (to calculate the reputation change in a query instead of increasing/decreasing value in a table) and speedup sync

  1. to write a query updating table in a local variable
  FOR __reputation_change IN 
    SELECT ir.author_id, ir.voter_id, ir.rshares, ir.prev_rshares, ir.source_op
    FROM unnest(__votes_array) WITH ORDINALITY AS ir
    ORDER BY ir.source_op
  LOOP
    __is_changed := false;
    
    SELECT ir.reputation, ir.is_implicit
    INTO __ini_author_rep, __implicit_ini_author_rep
    FROM unnest(__initial_reputations) AS ir
    WHERE (ir).account_id = __reputation_change.author_id;

    __author_rep := __ini_author_rep; 
    __implicit_author_rep := __implicit_ini_author_rep;

    SELECT ir.reputation, ir.is_implicit
    INTO __voter_rep, __implicit_voter_rep
    FROM unnest(__initial_reputations) AS ir
    WHERE (ir).account_id = __reputation_change.voter_id;

    -- Reputation reset calculation logic
    IF NOT __implicit_author_rep AND __voter_rep >= 0 AND 
      (__reputation_change.prev_rshares >= 0 OR (__reputation_change.prev_rshares < 0 AND NOT __implicit_voter_rep AND __voter_rep > __author_rep - (__reputation_change.prev_rshares >> 6)::BIGINT)) THEN

      __is_changed := true;
      __author_rep := __author_rep - (__reputation_change.prev_rshares >> 6)::BIGINT;
      __implicit_author_rep := (__author_rep = 0)::BOOLEAN;  -- Check if it became 0

      IF __reputation_change.voter_id = __reputation_change.author_id THEN  -- Same account
        __implicit_voter_rep := __implicit_author_rep;
        __voter_rep := __author_rep; -- Update voter's rep if same
      END IF;
    END IF;

    -- Update author's reputation based on rshares
    IF __voter_rep >= 0 AND 
      (__reputation_change.rshares >= 0 OR (__reputation_change.rshares < 0 AND NOT __implicit_voter_rep AND __voter_rep > __author_rep)) THEN

      __is_changed := true;
      __author_rep = __author_rep + (__reputation_change.rshares >> 6)::BIGINT;
    END IF;

    IF __is_changed THEN
      __initial_reputations := array_replace(
          __initial_reputations,
          (__reputation_change.author_id, __ini_author_rep, __implicit_ini_author_rep, FALSE)::AccountReputation,
          (__reputation_change.author_id, __author_rep, __implicit_author_rep, __is_changed)::AccountReputation
      );
    END IF;

  END LOOP;

this calculated reputation correctly but was MUCH slower and in result unusable - I couldn't figure out how can i make it faster.

  1. recursive query that holds each update on account's reputation - and chosing the 'latest' reputation
join_reputation AS MATERIALIZED (
  SELECT 
    da.account_id,
    COALESCE(ar.reputation, 0)::BIGINT,
    COALESCE(ar.is_implicit, true)::BOOLEAN
  FROM add_row_num_to_acc da
  LEFT JOIN account_reputations ar ON da.account_id = ar.account_id
),
rep_change AS MATERIALIZED (
  SELECT 
    uv.author_id,
    uv.voter_id,
    uv.rshares, 
    uv.prev_rshares,
    ROW_NUMBER() OVER (ORDER BY uv.source_op) AS row_num
  FROM check_if_prev_balances_canceled uv
),

--for context  ^

WITH RECURSIVE ReputationUpdates AS (
    -- Base case: Start with the first row (row_num = 1)
    SELECT 
      (cr.new_rep).account_id AS account_id, 
      (cr.new_rep).reputation AS reputation, 
      (cr.new_rep).is_implicit AS is_implicit, 
      (cr.new_rep).changed AS changed, 
      cr.row_num
    FROM (
      SELECT 
        calculate_account_reputations(
          ar.author_id,
          ar.voter_id,
          a_rep.reputation,
          v_rep.reputation,
          a_rep.is_implicit,
          v_rep.is_implicit,
          ar.rshares,
          ar.prev_rshares
        ) AS new_rep,
        ar.row_num
      FROM rep_change ar
      JOIN join_reputation a_rep ON ar.author_id = a_rep.account_id
      JOIN join_reputation v_rep ON ar.voter_id = v_rep.account_id
      WHERE ar.row_num = 1
    ) AS cr

    UNION ALL
    
    -- Recursive part: Iterate over subsequent rows
    SELECT 
      (cr.new_rep).account_id AS account_id, 
      (cr.new_rep).reputation AS reputation, 
      (cr.new_rep).is_implicit AS is_implicit, 
      (cr.new_rep).changed AS changed, 
      cr.row_num
    FROM (
      SELECT 
        calculate_account_reputations(
          ar.author_id,
          ar.voter_id,
          a_rep.reputation,
          v_rep.reputation,
          a_rep.is_implicit,
          v_rep.is_implicit,
          ar.rshares,
          ar.prev_rshares
        ) AS new_rep,
        ar.row_num
      FROM rep_change ar

      JOIN (
        SELECT
          jr.account_id,
          COALESCE(a.reputation, jr.reputation) AS reputation,
          COALESCE(a.is_implicit, jr.is_implicit) AS is_implicit
        FROM join_reputation jr
        JOIN LATERAL (
          SELECT
            ru.account_id,
            ru.reputation,
            ru.is_implicit
          FROM ReputationUpdates ru
          WHERE 
            ru.account_id = jr.account_id AND
            ru.changed AND
            ru.row_num < ar.row_num
          ORDER BY ru.row_num DESC
          LIMIT 1
        ) a ON TRUE  
        WHERE jr.account_id = ar.author_id
      ) a_rep ON a_rep.account_id = ar.author_id

      JOIN (
        SELECT
          jr.account_id,
          COALESCE(a.reputation, jr.reputation) AS reputation,
          COALESCE(a.is_implicit, jr.is_implicit) AS is_implicit
        FROM join_reputation jr
        JOIN LATERAL (
          SELECT
            ru.account_id,
            ru.reputation,
            ru.is_implicit
          FROM ReputationUpdates ru
          WHERE 
            ru.account_id = jr.account_id AND
            ru.changed AND
            ru.row_num < ar.row_num
          ORDER BY ru.row_num DESC
          LIMIT 1
        ) a ON TRUE  
        WHERE jr.account_id = ar.author_id
      ) v_rep ON v_rep.account_id = ar.voter_id
    ) AS cr
    WHERE cr.row_num = (SELECT MIN(rp.row_num) FROM rep_change rp WHERE rp.row_num > cr.row_num)
)

Unfortunately recursive query doesn't allow outer joins and multiple queries to recursive table, I counldn't manage to find a way out of it

  1. update temporary sql table and then inserting the temp table to account_reputations (that wasn't ideal choice - but I wanted to check if it has any impact on performance)

It again - calculated reputations correctly - but few % slower than current develop.

  1. This MR's version - I ended up optimizing calculate_account_reputations function that ended up 10% faster than develop + i found that cross join is much faster in comparison to extracting values from type in next CTE - overall speedup is 44% on 5m blocks (390s to 270s) - I'm running test on steem-11 to find out how this change impacts performance on full block_log

edit. on steem-11 develop 24,5h, new version 18.5h

Edited by Michal Zander

Merge request reports

Loading
Loading

Activity

Filter activity
  • Approvals
  • Assignees & reviewers
  • Comments (from bots)
  • Comments (from users)
  • Commits & branches
  • Edits
  • Labels
  • Lock status
  • Mentions
  • Merge request status
  • Tracking
Please register or sign in to reply
Loading