From 00b2db99c749de99d0bea89aad73017b4d8c18d3 Mon Sep 17 00:00:00 2001 From: Bartek Wrona <wrona@syncad.com> Date: Sun, 21 Jun 2020 17:21:05 +0200 Subject: [PATCH] - Defined PK constraints for tables where they were missed - Added additional index to hive_posts (indexing single permlink_id field) to speedup insertion - Removed not needed multi column indices from hive_account table - Introduced function `process_hive_post_operation' covering new-post, post-edit, post-undelete scenarios in single query at SQL side - Introduced function `delete_hive_post' to simplify post deletion at client side and improve performance. - Improved reblog operation queries (eliminated separate queries for IDs to be next processed/updated) - Improved follow operation queries (eliminated separate queries for IDs to be next processed/updated). Prerequisites for value caching and flush at Python side, not finished yet --- hive/db/db_state.py | 17 ++- hive/db/schema.py | 252 ++++++++++++++++++++++-------------- hive/indexer/custom_op.py | 53 +++++--- hive/indexer/follow.py | 74 +++++++++-- hive/indexer/posts.py | 265 +++++++------------------------------- 5 files changed, 312 insertions(+), 349 deletions(-) diff --git a/hive/db/db_state.py b/hive/db/db_state.py index 68f1ccc42..8a5f1f12d 100644 --- a/hive/db/db_state.py +++ b/hive/db/db_state.py @@ -90,7 +90,17 @@ class DbState: #'hive_posts_ix5', # (community_id>0, is_pinned=1) 'hive_follows_ix5a', # (following, state, created_at, follower) 'hive_follows_ix5b', # (follower, state, created_at, following) - 'hive_reblogs_ix1', # (post_id, account, created_at) + + 'hive_posts_parent_id_idx', + 'hive_posts_author_id', + 'hive_posts_depth_idx', + 'hive_posts_community_id_idx', + 'hive_posts_category_id_idx', + 'hive_posts_payout_at_idx', + 'hive_posts_payout_idx', + 'hive_posts_promoted_idx', + 'hive_posts_sc_trend_idx', + 'hive_posts_sc_hot_idx', #'hive_posts_cache_ix6a', # (sc_trend, post_id, paidout=0) #'hive_posts_cache_ix6b', # (post_id, sc_trend, paidout=0) #'hive_posts_cache_ix7a', # (sc_hot, post_id, paidout=0) @@ -104,9 +114,8 @@ class DbState: #'hive_posts_cache_ix32', # API: community created #'hive_posts_cache_ix33', # API: community payout #'hive_posts_cache_ix34', # API: community muted - 'hive_accounts_ix3', # (vote_weight, name VPO) - 'hive_accounts_ix4', # (id, name) - 'hive_accounts_ix5', # (cached_at, name) + 'hive_accounts_ix1', # (cached_at, name) + 'hive_accounts_ix5' # (cached_at, name) ] to_return = [] diff --git a/hive/db/schema.py b/hive/db/schema.py index 55254cb82..4b6270ded 100644 --- a/hive/db/schema.py +++ b/hive/db/schema.py @@ -60,13 +60,9 @@ def build_metadata(): sa.Column('cached_at', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), sa.Column('raw_json', sa.Text), - sa.UniqueConstraint('name', name='hive_accounts_ux1'), - sa.Index('hive_accounts_ix1', 'vote_weight', 'id'), # core: quick ranks - sa.Index('hive_accounts_ix2', 'name', 'id'), # core: quick id map - sa.Index('hive_accounts_ix3', 'vote_weight', 'name', postgresql_ops=dict(name='varchar_pattern_ops')), # API: lookup - sa.Index('hive_accounts_ix4', 'id', 'name'), # API: quick filter/sort - sa.Index('hive_accounts_ix5', 'cached_at', 'name'), # core/listen sweep + sa.Index('hive_accounts_ix1', 'vote_weight'), # core: quick ranks + sa.Index('hive_accounts_ix5', 'cached_at'), # core/listen sweep ) sa.Table( @@ -146,21 +142,20 @@ def build_metadata(): sa.ForeignKeyConstraint(['author_id'], ['hive_accounts.id'], name='hive_posts_fk1'), sa.ForeignKeyConstraint(['parent_id'], ['hive_posts.id'], name='hive_posts_fk3'), sa.UniqueConstraint('author_id', 'permlink_id', name='hive_posts_ux1'), - sa.Index('hive_posts_ix3', 'author_id', 'depth', 'id', postgresql_where=sql_text("is_deleted = '0'")), # API: author blog/comments - sa.Index('hive_posts_ix5', 'id', postgresql_where=sql_text("is_pinned = '1' AND is_deleted = '0'")), # API: pinned post status + sa.Index('hive_posts_permlink_id', 'permlink_id'), + sa.Index('hive_posts_depth_idx', 'depth'), sa.Index('hive_posts_parent_id_idx', 'parent_id'), sa.Index('hive_posts_community_id_idx', 'community_id'), + sa.Index('hive_posts_author_id', 'author_id'), sa.Index('hive_posts_category_id_idx', 'category_id'), sa.Index('hive_posts_payout_at_idx', 'payout_at'), - sa.Index('hive_posts_payout_at_idx2', 'payout_at', postgresql_where=sql_text("is_paidout = '0'")), sa.Index('hive_posts_payout_idx', 'payout'), sa.Index('hive_posts_promoted_idx', 'promoted'), sa.Index('hive_posts_sc_trend_idx', 'sc_trend'), sa.Index('hive_posts_sc_hot_idx', 'sc_hot'), sa.Index('hive_posts_created_at_idx', 'created_at'), - sa.UniqueConstraint('author_id', 'permlink_id', name='hive_posts_ux1') ) sa.Table( @@ -215,10 +210,8 @@ def build_metadata(): sa.Table( 'hive_post_tags', metadata, - sa.Column('post_id', sa.Integer, nullable=False), + sa.Column('post_id', sa.Integer, nullable=False, primary_key=True), sa.Column('tag', sa.String(32), nullable=False), - sa.UniqueConstraint('tag', 'post_id', name='hive_post_tags_ux1'), # core - sa.Index('hive_post_tags_ix1', 'post_id'), # core ) sa.Table( @@ -228,7 +221,7 @@ def build_metadata(): sa.Column('state', SMALLINT, nullable=False, server_default='1'), sa.Column('created_at', sa.DateTime, nullable=False), - sa.UniqueConstraint('following', 'follower', name='hive_follows_ux3'), # core + sa.PrimaryKeyConstraint('following', 'follower', name='hive_follows_pk'), # core sa.Index('hive_follows_ix5a', 'following', 'state', 'created_at', 'follower'), sa.Index('hive_follows_ix5b', 'follower', 'state', 'created_at', 'following'), ) @@ -241,8 +234,9 @@ def build_metadata(): sa.ForeignKeyConstraint(['account'], ['hive_accounts.name'], name='hive_reblogs_fk1'), sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id'], name='hive_reblogs_fk2'), - sa.UniqueConstraint('account', 'post_id', name='hive_reblogs_ux1'), # core - sa.Index('hive_reblogs_ix1', 'post_id', 'account', 'created_at'), # API -- not yet used + sa.PrimaryKeyConstraint('account', 'post_id', name='hive_reblogs_pk'), # core + sa.Index('hive_reblogs_account', 'account'), + sa.Index('hive_reblogs_post_id', 'post_id'), ) sa.Table( @@ -259,15 +253,17 @@ def build_metadata(): sa.ForeignKeyConstraint(['from_account'], ['hive_accounts.id'], name='hive_payments_fk1'), sa.ForeignKeyConstraint(['to_account'], ['hive_accounts.id'], name='hive_payments_fk2'), sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id'], name='hive_payments_fk3'), + sa.Index('hive_payments_from', 'from_account'), + sa.Index('hive_payments_to', 'to_account'), + sa.Index('hive_payments_post_id', 'post_id'), ) sa.Table( 'hive_feed_cache', metadata, - sa.Column('post_id', sa.Integer, nullable=False), + sa.Column('post_id', sa.Integer, nullable=False, primary_key=True), sa.Column('account_id', sa.Integer, nullable=False), sa.Column('created_at', sa.DateTime, nullable=False), - sa.UniqueConstraint('post_id', 'account_id', name='hive_feed_cache_ux1'), # core - sa.Index('hive_feed_cache_ix1', 'account_id', 'post_id', 'created_at'), # API (and rebuild?) + sa.Index('hive_feed_cache_account_id', 'account_id'), # API (and rebuild?) ) sa.Table( @@ -403,92 +399,156 @@ def setup(db): db.query(sql) sql = """ - DROP FUNCTION IF EXISTS add_hive_post(character varying,character varying,character varying,character varying,timestamp without time zone,timestamp without time zone); - CREATE OR REPLACE FUNCTION add_hive_post( + DROP FUNCTION if exists process_hive_post_operation(character varying,character varying,character varying,character varying,timestamp without time zone,timestamp without time zone) + ; + CREATE OR REPLACE FUNCTION process_hive_post_operation( in _author hive_accounts.name%TYPE, in _permlink hive_permlink_data.permlink%TYPE, in _parent_author hive_accounts.name%TYPE, in _parent_permlink hive_permlink_data.permlink%TYPE, in _date hive_posts.created_at%TYPE, in _community_support_start_date hive_posts.created_at%TYPE) - RETURNS TABLE (id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE, - parent_id hive_posts.parent_id%TYPE, community_id hive_posts.community_id%TYPE, - is_valid hive_posts.is_valid%TYPE, is_muted hive_posts.is_muted%TYPE, depth hive_posts.depth%TYPE) - LANGUAGE plpgsql - AS - $function$ - BEGIN - INSERT INTO - hive_permlink_data (permlink) - VALUES - (_permlink) - ON CONFLICT DO NOTHING; - if _parent_author != '' THEN - RETURN QUERY - INSERT INTO - hive_posts (parent_id, parent_author_id, parent_permlink_id, depth, community_id, - category_id, - root_author_id, root_permlink_id, - is_muted, is_valid, - author_id, permlink_id, created_at - ) - SELECT - php.id AS parent_id, - php.author_id AS parent_author_id, - php.permlink_id AS parent_permlink_id, php.depth + 1 AS depth, - (CASE - WHEN _date > _community_support_start_date THEN - COALESCE(php.community_id, (select hc.id from hive_communities hc where hc.name = _parent_permlink)) - ELSE NULL - END) AS community_id, - COALESCE(php.category_id, (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink)) AS category_id, - php.root_author_id AS root_author_id, - php.root_permlink_id AS root_permlink_id, - php.is_muted as is_muted, php.is_valid AS is_valid, - ha.id AS author_id, hpd.id AS permlink_id, _date AS created_at - FROM hive_accounts ha, - hive_permlink_data hpd, - hive_posts php - INNER JOIN hive_accounts pha ON pha.id = php.author_id - INNER JOIN hive_permlink_data phpd ON phpd.id = php.permlink_id - WHERE pha.name = _parent_author and phpd.permlink = _parent_permlink AND - ha.name = _author and hpd.permlink = _permlink - RETURNING hive_posts.id, hive_posts.author_id, hive_posts.permlink_id, hive_posts.parent_id, hive_posts.community_id, hive_posts.is_valid, hive_posts.is_muted, hive_posts.depth; - ELSE - INSERT INTO - hive_category_data (category) - VALUES - (_parent_permlink) - ON CONFLICT (category) DO NOTHING; - RETURN QUERY - INSERT INTO - hive_posts (parent_id, parent_author_id, parent_permlink_id, depth, community_id, - category_id, - root_author_id, root_permlink_id, - is_muted, is_valid, - author_id, permlink_id, created_at - ) - SELECT 0 AS parent_id, 0 AS parent_author_id, 0 AS parent_permlink_id, 0 AS depth, - (CASE - WHEN _date > _community_support_start_date THEN - (select hc.id from hive_communities hc where hc.name = _parent_permlink) - ELSE NULL - END) AS community_id, - (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink) AS category_id, - ha.id AS root_author_id, -- use author_id AS root one if no parent - hpd.id AS root_permlink_id, -- use perlink_id AS root one if no parent - false AS is_muted, true AS is_valid, - ha.id AS author_id, hpd.id AS permlink_id, _date AS created_at - FROM hive_accounts ha, - hive_permlink_data hpd - WHERE ha.name = _author and hpd.permlink = _permlink - RETURNING hive_posts.id, hive_posts.author_id, hive_posts.permlink_id, hive_posts.parent_id, hive_posts.community_id, hive_posts.is_valid, hive_posts.is_muted, hive_posts.depth; - END IF; - END - $function$ + RETURNS TABLE (id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE, + parent_id hive_posts.parent_id%TYPE, community_id hive_posts.community_id%TYPE, + is_valid hive_posts.is_valid%TYPE, is_muted hive_posts.is_muted%TYPE, depth hive_posts.depth%TYPE, + is_edited boolean) + LANGUAGE plpgsql + AS + $function$ + BEGIN + + INSERT INTO hive_permlink_data + (permlink) + values + ( + _permlink + ) + ON CONFLICT DO NOTHING + ; + if _parent_author != '' THEN + RETURN QUERY INSERT INTO hive_posts as hp + (parent_id, parent_author_id, parent_permlink_id, depth, community_id, + category_id, + root_author_id, root_permlink_id, + is_muted, is_valid, + author_id, permlink_id, created_at) + SELECT php.id AS parent_id, php.author_id as parent_author_id, + php.permlink_id as parent_permlink_id, php.depth + 1 as depth, + (CASE + WHEN _date > _community_support_start_date THEN + COALESCE(php.community_id, (select hc.id from hive_communities hc where hc.name = _parent_permlink)) + ELSE NULL + END) as community_id, + COALESCE(php.category_id, (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink)) as category_id, + php.root_author_id as root_author_id, + php.root_permlink_id as root_permlink_id, + php.is_muted as is_muted, php.is_valid as is_valid, + ha.id as author_id, hpd.id as permlink_id, _date as created_at + FROM hive_accounts ha, + hive_permlink_data hpd, + hive_posts php + INNER JOIN hive_accounts pha ON pha.id = php.author_id + INNER JOIN hive_permlink_data phpd ON phpd.id = php.permlink_id + WHERE pha.name = _parent_author and phpd.permlink = _parent_permlink AND + ha.name = _author and hpd.permlink = _permlink + + ON CONFLICT ON CONSTRAINT hive_posts_ux1 DO UPDATE SET + --- During post update it is disallowed to change: parent-post, category, community-id + --- then also depth, is_valid and is_muted is impossible to change + --- post edit part + updated_at = _date, + + --- post undelete part (if was deleted) + is_deleted = (CASE hp.is_deleted + WHEN true THEN false + ELSE false + END + ), + is_pinned = (CASE hp.is_deleted + WHEN true THEN false + ELSE hp.is_pinned --- no change + END + ) + + RETURNING hp.id, hp.author_id, hp.permlink_id, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited + ; + ELSE + INSERT INTO hive_category_data + (category) + VALUES (_parent_permlink) + ON CONFLICT (category) DO NOTHING + ; + + RETURN QUERY INSERT INTO hive_posts as hp + (parent_id, parent_author_id, parent_permlink_id, depth, community_id, + category_id, + root_author_id, root_permlink_id, + is_muted, is_valid, + author_id, permlink_id, created_at) + SELECT 0 AS parent_id, 0 as parent_author_id, 0 as parent_permlink_id, 0 as depth, + (CASE + WHEN _date > _community_support_start_date THEN + (select hc.id from hive_communities hc where hc.name = _parent_permlink) + ELSE NULL + END) as community_id, + (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink) as category_id, + ha.id as root_author_id, -- use author_id as root one if no parent + hpd.id as root_permlink_id, -- use perlink_id as root one if no parent + false as is_muted, true as is_valid, + ha.id as author_id, hpd.id as permlink_id, _date as created_at + FROM hive_accounts ha, + hive_permlink_data hpd + WHERE ha.name = _author and hpd.permlink = _permlink + + ON CONFLICT ON CONSTRAINT hive_posts_ux1 DO UPDATE SET + --- During post update it is disallowed to change: parent-post, category, community-id + --- then also depth, is_valid and is_muted is impossible to change + --- post edit part + updated_at = _date, + + --- post undelete part (if was deleted) + is_deleted = (CASE hp.is_deleted + WHEN true THEN false + ELSE false + END + ), + is_pinned = (CASE hp.is_deleted + WHEN true THEN false + ELSE hp.is_pinned --- no change + END + ) + + RETURNING hp.id, hp.author_id, hp.permlink_id, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited + ; + END IF; + END + $function$ """ db.query_no_return(sql) + sql = """ + DROP FUNCTION if exists delete_hive_post(character varying,character varying,character varying) + ; + CREATE OR REPLACE FUNCTION delete_hive_post( + in _author hive_accounts.name%TYPE, + in _permlink hive_permlink_data.permlink%TYPE) + RETURNS TABLE (id hive_posts.id%TYPE, depth hive_posts.depth%TYPE) + LANGUAGE plpgsql + AS + $function$ + BEGIN + RETURN QUERY UPDATE hive_posts AS hp + SET is_deleted = false + FROM hive_posts hp1 + INNER JOIN hive_accounts ha ON hp1.author_id = ha.id + INNER JOIN hive_permlink_data hpd ON hp1.permlink_id = hpd.id + WHERE hp.id = hp1.id AND ha.name = _author AND hpd.permlink = _permlink + RETURNING hp.id, hp.depth; + END + $function$ + """ + db.query_no_return(sql) + sql = """ DROP MATERIALIZED VIEW IF EXISTS hive_posts_a_p ; diff --git a/hive/indexer/custom_op.py b/hive/indexer/custom_op.py index 6c7eb84f4..47897b567 100644 --- a/hive/indexer/custom_op.py +++ b/hive/indexer/custom_op.py @@ -119,29 +119,46 @@ class CustomOp: if not all(map(Accounts.exists, [author, blogger])): return - post_id, depth = Posts.get_id_and_depth(author, permlink) - - if depth > 0: - return # prevent comment reblogs - - if not post_id: - log.debug("reblog: post not found: %s/%s", author, permlink) - return - - author_id = Accounts.get_id(author) - blogger_id = Accounts.get_id(blogger) - if 'delete' in op_json and op_json['delete'] == 'delete': - DB.query("DELETE FROM hive_reblogs WHERE account = :a AND " - "post_id = :pid LIMIT 1", a=blogger, pid=post_id) + sql = """ + WITH processing_set AS ( + SELECT hp.id as post_id, ha.id as account_id + FROM hive_posts hp + INNER JOIN hive_accounts ha ON hp.author_id = ha.id + INNER JOIN hive_permlink_data hpd ON hp.permlink_id = hpd.id + WHERE ha.name = :a AND hpd.permlink = :permlink AND hp.depth <= 0 + ) + DELETE FROM hive_reblogs AS hr + WHERE hr.account = :a AND hr.post_id IN (SELECT ps.post_id FROM processing_set ps) + RETURNING hr.post_id, (SELECT ps.account_id FROM processing_set ps) AS account_id + """ + + row = DB.query_row(sql, a=blogger, permlink=permlink) + if row is None: + log.debug("reblog: post not found: %s/%s", author, permlink) + return + if not DbState.is_initial_sync(): - FeedCache.delete(post_id, blogger_id) + result = dict(row) + FeedCache.delete(result['post_id'], result['account_id']) else: - sql = ("INSERT INTO hive_reblogs (account, post_id, created_at) " - "VALUES (:a, :pid, :date) ON CONFLICT (account, post_id) DO NOTHING") - DB.query(sql, a=blogger, pid=post_id, date=block_date) + sql = """ + INSERT INTO hive_reblogs (account, post_id, created_at) + SELECT ha.name, hp.id, :date + FROM hive_accounts ha + INNER JOIN hive_posts hp ON hp.author_id = ha.id + INNER JOIN hive_permlink_data hpd ON hpd.id = hp.permlink_id + WHERE ha.name = :a AND hpd.permlink = :p + ON CONFLICT (account, post_id) DO NOTHING + RETURNING post_id + """ + row = DB.query_row(sql, a=blogger, p=permlink, date=block_date) if not DbState.is_initial_sync(): + author_id = Accounts.get_id(author) + blogger_id = Accounts.get_id(blogger) + result = dict(row) + post_id = result['post_id'] FeedCache.insert(post_id, blogger_id, block_date) Notify('reblog', src_id=blogger_id, dst_id=author_id, post_id=post_id, when=block_date, diff --git a/hive/indexer/follow.py b/hive/indexer/follow.py index 07dc82bf9..21ddfc646 100644 --- a/hive/indexer/follow.py +++ b/hive/indexer/follow.py @@ -16,6 +16,15 @@ DB = Db.instance() FOLLOWERS = 'followers' FOLLOWING = 'following' +FOLLOW_ITEM_INSERT_QUERY = """ + INSERT INTO hive_follows as hf (follower, following, created_at, state) + VALUES( :flr, :flg, :at, :state ) + ON CONFLICT (follower, following) DO UPDATE SET state = (CASE hf.state + when 0 then 0 -- 0 blocks possibility to update state + ELSE 1 + END) + """ + def _flip_dict(dict_to_flip): """Swap keys/values. Returned dict values are array of keys.""" flipped = {} @@ -29,6 +38,8 @@ def _flip_dict(dict_to_flip): class Follow: """Handles processing of incoming follow ups and flushing to db.""" + follow_items_to_flush = [] + @classmethod def follow_op(cls, account, op_json, date): """Process an incoming follow op.""" @@ -38,21 +49,21 @@ class Follow: # perform delta check new_state = op['state'] - old_state = cls._get_follow_db_state(op['flr'], op['flg']) - if new_state == (old_state or 0): - return - - # insert or update state - if old_state is None: - sql = """INSERT INTO hive_follows (follower, following, - created_at, state) VALUES (:flr, :flg, :at, :state)""" + old_state = None + if DbState.is_initial_sync(): + # insert or update state + DB.query(FOLLOW_ITEM_INSERT_QUERY, **op) + +# cls.follow_items_to_flush.append({ +# 'flr': op['flr'], +# 'flg': op['flg'], +# 'state': op['state'], +# 'at': op['at']}) else: - sql = """UPDATE hive_follows SET state = :state - WHERE follower = :flr AND following = :flg""" - DB.query(sql, **op) - - # track count deltas - if not DbState.is_initial_sync(): + old_state = cls._get_follow_db_state(op['flr'], op['flg']) + # insert or update state + DB.query(FOLLOW_ITEM_INSERT_QUERY, **op) + # track count deltas if new_state == 1: Follow.follow(op['flr'], op['flg']) if old_state is None: @@ -121,10 +132,45 @@ class Follow: cls._delta[role][account] = 0 cls._delta[role][account] += direction + @classmethod + def _flush_follow_items(cls): + sql_prefix = """ + INSERT INTO hive_follows as hf (follower, following, created_at, state) + VALUES """ + + sql_postfix = """ + ON CONFLICT (follower, following) DO UPDATE SET state = (CASE hf.state + WHEN 0 THEN 0 -- 0 blocks possibility to update state + ELSE 1 + END) + """ + values = [] + limit = 1000 + count = 0; + for follow_item in cls.follow_items_to_flush: + if count < limit: + values.append("({}, {}, '{}', {})".format(follow_item['flr'], follow_item['flg'], follow_item['at'], follow_item['state'])) + count = count + 1 + else: + query = sql_prefix + ",".join(values) + query += sql_postfix + DB.query(query) + values = [] + count = 0 + + if len(values): + query = sql_prefix + ",".join(values) + query += sql_postfix + DB.query(query) + + cls.follow_items_to_flush = [] + @classmethod def flush(cls, trx=True): """Flushes pending follow count deltas.""" + cls._flush_follow_items() + updated = 0 sqls = [] for col, deltas in cls._delta.items(): diff --git a/hive/indexer/posts.py b/hive/indexer/posts.py index 99d2bcbeb..9dcda3c58 100644 --- a/hive/indexer/posts.py +++ b/hive/indexer/posts.py @@ -79,28 +79,6 @@ class Posts: cls._set_id(url, pid) return tuples - @classmethod - def get_id_and_depth(cls, author, permlink): - """Get the id and depth of @author/permlink post.""" - sql = """ - SELECT - hp.id, - COALESCE(hp.depth, -1) - FROM - hive_posts hp - INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id - INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id - WHERE ha_a.name = :author AND hpd_p.permlink = :permlink - """ - pid, depth = DB.query_row(sql, author=author, permlink=permlink) - return (pid, depth) - - @classmethod - def is_pid_deleted(cls, pid): - """Check if the state of post is deleted.""" - sql = "SELECT is_deleted FROM hive_posts WHERE id = :id" - return DB.query_one(sql, id=pid) - @classmethod def delete_op(cls, op): """Given a delete_comment op, mark the post as deleted. @@ -112,16 +90,44 @@ class Posts: @classmethod def comment_op(cls, op, block_date): """Register new/edited/undeleted posts; insert into feed cache.""" - pid = cls.get_id(op['author'], op['permlink']) - if not pid: - # post does not exist, go ahead and process it. - cls.insert(op, block_date) - elif not cls.is_pid_deleted(pid): - # post exists, not deleted, thus an edit. ignore. - cls.update(op, block_date, pid) - else: - # post exists but was deleted. time to reinstate. - cls.undelete(op, block_date, pid) + + sql = """ + SELECT id, author_id, permlink_id, parent_id, community_id, is_valid, is_muted, depth, is_edited + FROM process_hive_post_operation((:author)::varchar, (:permlink)::varchar, (:parent_author)::varchar, (:parent_permlink)::varchar, (:date)::timestamp, (:community_support_start_date)::timestamp); + """ + + row = DB.query_row(sql, author=op['author'], permlink=op['permlink'], parent_author=op['parent_author'], + parent_permlink=op['parent_permlink'], date=block_date, community_support_start_date=START_DATE) + + result = dict(row) + + # TODO we need to enhance checking related community post validation and honor is_muted. + error = cls._verify_post_against_community(op, result['community_id'], result['is_valid'], result['is_muted']) + + cls._set_id(op['author']+'/'+op['permlink'], result['id']) + + # add content data to hive_post_data + sql = """ + INSERT INTO hive_post_data (id, title, preview, img_url, body, json) + VALUES (:id, :title, :preview, :img_url, :body, :json) + ON CONFLICT ON CONSTRAINT hive_post_data_pkey DO UPDATE SET + title = :title, + preview = :preview, + img_url = :img_url, + body = :body, + json = :json + """ + DB.query(sql, id=result['id'], title=op['title'], + preview=op['preview'] if 'preview' in op else "", + img_url=op['img_url'] if 'img_url' in op else "", + body=op['body'], json=op['json_metadata'] if op['json_metadata'] else '{}') + + if not DbState.is_initial_sync(): + if error: + author_id = result['author_id'] + Notify('error', dst_id=author_id, when=date, + post_id=result['id'], payload=error).write() + cls._insert_feed_cache(result) @classmethod def comment_payout_op(cls, ops, date): @@ -182,52 +188,6 @@ class Posts: cashout_time=date, author=author, permlink=permlink) - @classmethod - def insert(cls, op, date): - """Inserts new post records.""" - # inserting new post - # * Check for permlink, parent_permlink, root_permlink - # * Check for authro, parent_author, root_author - # * check for category data - # * insert post basic data - # * obtain id - # * insert post content data - - sql = """ - SELECT id, author_id, permlink_id, parent_id, community_id, is_valid, is_muted, depth - FROM add_hive_post((:author)::varchar, (:permlink)::varchar, (:parent_author)::varchar, (:parent_permlink)::varchar, (:date)::timestamp, (:community_support_start_date)::timestamp); - """ - - row = DB.query_row(sql, author=op['author'], permlink=op['permlink'], parent_author=op['parent_author'], - parent_permlink=op['parent_permlink'], date=date, community_support_start_date=START_DATE) - - result = dict(row) - - # TODO we need to enhance checking related community post validation and honor is_muted. - error = cls._verify_post_against_community(op, result['community_id'], result['is_valid'], result['is_muted']) - - cls._set_id(op['author']+'/'+op['permlink'], result['id']) - - # add content data to hive_post_data - sql = """ - INSERT INTO hive_post_data (id, title, preview, img_url, body, json) - VALUES (:id, :title, :preview, :img_url, :body, :json)""" - DB.query(sql, id=result['id'], title=op['title'], - preview=op['preview'] if 'preview' in op else "", - img_url=op['img_url'] if 'img_url' in op else "", - body=op['body'], json=op['json_metadata'] if op['json_metadata'] else '{}') - - if not DbState.is_initial_sync(): - if error: - author_id = result['author_id'] - Notify('error', dst_id=author_id, when=date, - post_id=result['id'], payload=error).write() - cls._insert_feed_cache(result) - - if op['parent_author']: - #update parent child count - cls.update_child_count(result['id']) - @classmethod def update_child_count(cls, child_id, op='+'): """ Increase/decrease child count by 1 """ @@ -238,7 +198,7 @@ class Posts: children = GREATEST(0, ( SELECT CASE - WHEN children=NULL THEN 0 + WHEN children is NULL THEN 0 WHEN children=32762 THEN 0 ELSE children END @@ -255,48 +215,22 @@ class Posts: DB.query(sql, child_id=child_id) - @classmethod - def undelete(cls, op, date, pid): - """Re-allocates an existing record flagged as deleted.""" - # add category to category table - - sql = """ - INSERT INTO - hive_category_data (category) - VALUES - (:category) - ON CONFLICT (category) DO NOTHING; - UPDATE - hive_posts - SET - is_valid = :is_valid, - is_muted = :is_muted, - is_deleted = '0', - is_pinned = '0', - category_id = (SELECT id FROM hive_category_data WHERE category = :category), - community_id = :community_id, - depth = :depth - WHERE - id = :id - """ - post = cls._build_post(op, date, pid) - DB.query(sql, **post) - - if not DbState.is_initial_sync(): - if post['error']: - author_id = Accounts.get_id(post['author']) - Notify('error', dst_id=author_id, when=date, - post_id=post['id'], payload=post['error']).write() - cls._insert_feed_cache(post) @classmethod def delete(cls, op): """Marks a post record as being deleted.""" - pid, depth = cls.get_id_and_depth(op['author'], op['permlink']) - DB.query("UPDATE hive_posts SET is_deleted = '1' WHERE id = :id", id=pid) + sql = """ + SELECT id, depth + FROM delete_hive_post((:author)::varchar, (:permlink)::varchar); + """ + row = DB.query_row(sql, author=op['author'], permlink = op['permlink']) + result = dict(row) + pid = result['id'] if not DbState.is_initial_sync(): + depth = result['depth'] + if depth == 0: # TODO: delete from hive_reblogs -- otherwise feed cache gets # populated with deleted posts somwrimas @@ -305,51 +239,6 @@ class Posts: # force parent child recount when child is deleted cls.update_child_count(pid, '-') - @classmethod - def update(cls, op, date, pid): - """Handle post updates.""" - # pylint: disable=unused-argument - post = cls._build_post(op, date) - - # add category to category table - sql = """ - INSERT INTO hive_category_data (category) - VALUES (:category) - ON CONFLICT (category) DO NOTHING""" - DB.query(sql, category=post['category']) - - sql = """ - UPDATE hive_posts - SET - category_id = (SELECT id FROM hive_category_data WHERE category = :category), - community_id = :community_id, - updated_at = :date, - depth = :depth, - is_muted = :is_muted, - is_valid = :is_valid - WHERE id = :id - """ - - post['id'] = pid - DB.query(sql, **post) - - sql = """ - UPDATE - hive_post_data - SET - title = :title, - preview = :preview, - img_url = :img_url, - body = :body, - json = :json - WHERE id = :id - """ - - DB.query(sql, id=pid, title=op['title'], - preview=op['preview'] if 'preview' in op else "", - img_url=op['img_url'] if 'img_url' in op else "", - body=op['body'], json=op['json_metadata'] if op['json_metadata'] else '{}') - @classmethod def update_comment_pending_payouts(cls, hived, posts): comment_pending_payouts = hived.get_comment_pending_payouts(posts) @@ -428,61 +317,3 @@ class Posts: is_muted = True return error - @classmethod - def _build_post(cls, op, date, pid=None): - """Validate and normalize a post operation. - - Post is muted if: - - parent was muted - - author unauthorized - - Post is invalid if: - - parent is invalid - - author unauthorized - """ - # TODO: non-nsfw post in nsfw community is `invalid` - - # if this is a top-level post: - if not op['parent_author']: - depth = 0 - category = op['parent_permlink'] - community_id = None - if date > START_DATE: - community_id = Community.validated_id(category) - is_valid = True - is_muted = False - - # this is a comment; inherit parent props. - else: - sql = """ - SELECT depth, hcd.category as category, community_id, is_valid, is_muted - FROM hive_posts hp - INNER JOIN hive_category_data hcd ON hcd.id = hp.category_id - WHERE hp.id = ( - SELECT hp1.id - FROM hive_posts hp1 - INNER JOIN hive_accounts ha_a ON ha_a.id = hp1.author_id - INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp1.permlink_id - WHERE ha_a.name = :author AND hpd_p.permlink = :permlink - ) - """ - (parent_depth, category, community_id, is_valid, is_muted) = DB.query_row(sql, author=op['parent_author'], permlink=op['parent_permlink']) - depth = parent_depth + 1 - if not is_valid: - error = 'replying to invalid post' - elif is_muted: - error = 'replying to muted post' - #find root comment - - # check post validity in specified context - error = None - if community_id and is_valid and not Community.is_post_valid(community_id, op): - error = 'not authorized' - #is_valid = False # TODO: reserved for future blacklist status? - is_muted = True - - ret = dict(id=pid, community_id=community_id, - category=category, is_muted=is_muted, is_valid=is_valid, - depth=depth, date=date, error=error) - - return ret -- GitLab