diff --git a/.gitignore b/.gitignore index c27602b70d00ecdc6d095ee560ee81250b91a79f..be366e101d99286d8fe35e05497b5da7364c0dc8 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,6 @@ tests/failed_blocks/ /tests/envdir-to-envfile.sh /deploy/ /scripts/hive.sqlite + +# vscode +.vscode/* diff --git a/hive/cli.py b/hive/cli.py index 8d617960990622866ad41e05b32a80cf51e113f7..9df933413f06b0d47b020bce5211892494711aed 100755 --- a/hive/cli.py +++ b/hive/cli.py @@ -22,7 +22,6 @@ def run(): else: launch_mode(mode, conf) - def launch_mode(mode, conf): """Launch a routine as indicated by `mode`.""" if mode == 'server': diff --git a/hive/db/adapter.py b/hive/db/adapter.py index c4f63262ee56d57032e3cbef515946b01d3cc25c..e175c540bdb4e9e754e2b8e8e9c80aa862f12e08 100644 --- a/hive/db/adapter.py +++ b/hive/db/adapter.py @@ -78,6 +78,9 @@ class Db: assert self._is_write_query(sql), sql return self._query(sql, **kwargs) + def query_no_return(self, sql, **kwargs): + self._query(sql, **kwargs) + def query_all(self, sql, **kwargs): """Perform a `SELECT n*m`""" res = self._query(sql, **kwargs) @@ -154,11 +157,12 @@ class Db: return (sql, values) def _sql_text(self, sql): - if sql in self._prep_sql: - query = self._prep_sql[sql] - else: - query = sqlalchemy.text(sql).execution_options(autocommit=False) - self._prep_sql[sql] = query +# if sql in self._prep_sql: +# query = self._prep_sql[sql] +# else: +# query = sqlalchemy.text(sql).execution_options(autocommit=False) +# self._prep_sql[sql] = query + query = sqlalchemy.text(sql).execution_options(autocommit=False) return query def _query(self, sql, **kwargs): diff --git a/hive/db/db_state.py b/hive/db/db_state.py index 9d49ae1b5f872cdb41925984617f2dde103b7a7b..8a5f1f12d707e20edc7dfea77360f49e0d6fd2fd 100644 --- a/hive/db/db_state.py +++ b/hive/db/db_state.py @@ -85,28 +85,37 @@ class DbState: @classmethod def _disableable_indexes(cls): to_locate = [ - 'hive_posts_ix3', # (author, depth, id) - 'hive_posts_ix4', # (parent_id, id, is_deleted=0) - 'hive_posts_ix5', # (community_id>0, is_pinned=1) + #'hive_posts_ix3', # (author, depth, id) + #'hive_posts_ix4', # (parent_id, id, is_deleted=0) + #'hive_posts_ix5', # (community_id>0, is_pinned=1) 'hive_follows_ix5a', # (following, state, created_at, follower) 'hive_follows_ix5b', # (follower, state, created_at, following) - 'hive_reblogs_ix1', # (post_id, account, created_at) - 'hive_posts_cache_ix6a', # (sc_trend, post_id, paidout=0) - 'hive_posts_cache_ix6b', # (post_id, sc_trend, paidout=0) - 'hive_posts_cache_ix7a', # (sc_hot, post_id, paidout=0) - 'hive_posts_cache_ix7b', # (post_id, sc_hot, paidout=0) - 'hive_posts_cache_ix8', # (category, payout, depth, paidout=0) - 'hive_posts_cache_ix9a', # (depth, payout, post_id, paidout=0) - 'hive_posts_cache_ix9b', # (category, depth, payout, post_id, paidout=0) - 'hive_posts_cache_ix10', # (post_id, payout, gray=1, payout>0) - 'hive_posts_cache_ix30', # API: community trend - 'hive_posts_cache_ix31', # API: community hot - 'hive_posts_cache_ix32', # API: community created - 'hive_posts_cache_ix33', # API: community payout - 'hive_posts_cache_ix34', # API: community muted - 'hive_accounts_ix3', # (vote_weight, name VPO) - 'hive_accounts_ix4', # (id, name) - 'hive_accounts_ix5', # (cached_at, name) + + 'hive_posts_parent_id_idx', + 'hive_posts_author_id', + 'hive_posts_depth_idx', + 'hive_posts_community_id_idx', + 'hive_posts_category_id_idx', + 'hive_posts_payout_at_idx', + 'hive_posts_payout_idx', + 'hive_posts_promoted_idx', + 'hive_posts_sc_trend_idx', + 'hive_posts_sc_hot_idx', + #'hive_posts_cache_ix6a', # (sc_trend, post_id, paidout=0) + #'hive_posts_cache_ix6b', # (post_id, sc_trend, paidout=0) + #'hive_posts_cache_ix7a', # (sc_hot, post_id, paidout=0) + #'hive_posts_cache_ix7b', # (post_id, sc_hot, paidout=0) + #'hive_posts_cache_ix8', # (category, payout, depth, paidout=0) + #'hive_posts_cache_ix9a', # (depth, payout, post_id, paidout=0) + #'hive_posts_cache_ix9b', # (category, depth, payout, post_id, paidout=0) + #'hive_posts_cache_ix10', # (post_id, payout, gray=1, payout>0) + #'hive_posts_cache_ix30', # API: community trend + #'hive_posts_cache_ix31', # API: community hot + #'hive_posts_cache_ix32', # API: community created + #'hive_posts_cache_ix33', # API: community payout + #'hive_posts_cache_ix34', # API: community muted + 'hive_accounts_ix1', # (cached_at, name) + 'hive_accounts_ix5' # (cached_at, name) ] to_return = [] @@ -231,11 +240,11 @@ class DbState: if cls._ver == 6: cls.db().query("DROP INDEX hive_posts_cache_ix6") - cls.db().query("CREATE INDEX hive_posts_cache_ix6a ON hive_posts_cache (sc_trend, post_id) WHERE is_paidout = '0'") - cls.db().query("CREATE INDEX hive_posts_cache_ix6b ON hive_posts_cache (post_id, sc_trend) WHERE is_paidout = '0'") - cls.db().query("DROP INDEX hive_posts_cache_ix7") - cls.db().query("CREATE INDEX hive_posts_cache_ix7a ON hive_posts_cache (sc_hot, post_id) WHERE is_paidout = '0'") - cls.db().query("CREATE INDEX hive_posts_cache_ix7b ON hive_posts_cache (post_id, sc_hot) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix6a ON hive_posts_cache (sc_trend, post_id) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix6b ON hive_posts_cache (post_id, sc_trend) WHERE is_paidout = '0'") + #cls.db().query("DROP INDEX hive_posts_cache_ix7") + #cls.db().query("CREATE INDEX hive_posts_cache_ix7a ON hive_posts_cache (sc_hot, post_id) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix7b ON hive_posts_cache (post_id, sc_hot) WHERE is_paidout = '0'") cls._set_ver(7) if cls._ver == 7: @@ -257,9 +266,9 @@ class DbState: cls._set_ver(10) if cls._ver == 10: - cls.db().query("CREATE INDEX hive_posts_cache_ix8 ON hive_posts_cache (category, payout, depth) WHERE is_paidout = '0'") - cls.db().query("CREATE INDEX hive_posts_cache_ix9a ON hive_posts_cache (depth, payout, post_id) WHERE is_paidout = '0'") - cls.db().query("CREATE INDEX hive_posts_cache_ix9b ON hive_posts_cache (category, depth, payout, post_id) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix8 ON hive_posts_cache (category, payout, depth) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix9a ON hive_posts_cache (depth, payout, post_id) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix9b ON hive_posts_cache (category, depth, payout, post_id) WHERE is_paidout = '0'") cls._set_ver(11) if cls._ver == 11: @@ -286,13 +295,13 @@ class DbState: if cls._ver == 13: sqls = ("CREATE INDEX hive_posts_ix5 ON hive_posts (id) WHERE is_pinned = '1' AND is_deleted = '0'", - "CREATE INDEX hive_posts_ix6 ON hive_posts (community_id, id) WHERE community_id IS NOT NULL AND is_pinned = '1' AND is_deleted = '0'", - "CREATE INDEX hive_posts_cache_ix10 ON hive_posts_cache (post_id, payout) WHERE is_grayed = '1' AND payout > 0", - "CREATE INDEX hive_posts_cache_ix30 ON hive_posts_cache (community_id, sc_trend, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", - "CREATE INDEX hive_posts_cache_ix31 ON hive_posts_cache (community_id, sc_hot, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", - "CREATE INDEX hive_posts_cache_ix32 ON hive_posts_cache (community_id, created_at, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", - "CREATE INDEX hive_posts_cache_ix33 ON hive_posts_cache (community_id, payout, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND is_paidout = '0'", - "CREATE INDEX hive_posts_cache_ix34 ON hive_posts_cache (community_id, payout, post_id) WHERE community_id IS NOT NULL AND is_grayed = '1' AND is_paidout = '0'") + "CREATE INDEX hive_posts_ix6 ON hive_posts (community_id, id) WHERE community_id IS NOT NULL AND is_pinned = '1' AND is_deleted = '0'",) + #"CREATE INDEX hive_posts_cache_ix10 ON hive_posts_cache (post_id, payout) WHERE is_grayed = '1' AND payout > 0", + #"CREATE INDEX hive_posts_cache_ix30 ON hive_posts_cache (community_id, sc_trend, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", + #"CREATE INDEX hive_posts_cache_ix31 ON hive_posts_cache (community_id, sc_hot, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", + #"CREATE INDEX hive_posts_cache_ix32 ON hive_posts_cache (community_id, created_at, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND depth = 0", + #"CREATE INDEX hive_posts_cache_ix33 ON hive_posts_cache (community_id, payout, post_id) WHERE community_id IS NOT NULL AND is_grayed = '0' AND is_paidout = '0'", + #"CREATE INDEX hive_posts_cache_ix34 ON hive_posts_cache (community_id, payout, post_id) WHERE community_id IS NOT NULL AND is_grayed = '1' AND is_paidout = '0'") for sql in sqls: cls.db().query(sql) cls._set_ver(14) @@ -302,7 +311,7 @@ class DbState: cls.db().query("ALTER TABLE hive_communities ADD COLUMN category VARCHAR(32) NOT NULL DEFAULT ''") cls.db().query("ALTER TABLE hive_communities ADD COLUMN avatar_url VARCHAR(1024) NOT NULL DEFAULT ''") cls.db().query("ALTER TABLE hive_communities ADD COLUMN num_authors INTEGER NOT NULL DEFAULT 0") - cls.db().query("CREATE INDEX hive_posts_cache_ix20 ON hive_posts_cache (community_id, author, payout, post_id) WHERE is_paidout = '0'") + #cls.db().query("CREATE INDEX hive_posts_cache_ix20 ON hive_posts_cache (community_id, author, payout, post_id) WHERE is_paidout = '0'") cls._set_ver(15) if cls._ver == 15: @@ -315,6 +324,12 @@ class DbState: cls.db().query("CREATE INDEX hive_communities_ft1 ON hive_communities USING GIN (to_tsvector('english', title || ' ' || about))") cls._set_ver(17) + if cls._ver == 17: + cls.db().query("INSERT INTO hive_accounts (name, created_at) VALUES ('', '1990-01-01T00:00:00') ON CONFLICT (name) DO NOTHING") + cls.db().query("INSERT INTO hive_permlink_data (permlink) VALUES ('') ON CONFLICT (permlink) DO NOTHING") + cls.db().query("INSERT INTO hive_category_data (category) VALUES ('') ON CONFLICT (category) DO NOTHING") + cls._set_ver(18) + reset_autovac(cls.db()) log.info("[HIVE] db version: %d", cls._ver) diff --git a/hive/db/schema.py b/hive/db/schema.py index 66be8615553269a3b317c4eb4f3b1f0827003069..82d20b073b7a1f76555ec5f22263573d7e06b42b 100644 --- a/hive/db/schema.py +++ b/hive/db/schema.py @@ -10,7 +10,8 @@ from sqlalchemy.types import BOOLEAN #pylint: disable=line-too-long, too-many-lines, bad-whitespace -DB_VERSION = 17 +# [DK] we changed and removed some tables so i upgraded DB_VERSION to 18 +DB_VERSION = 18 def build_metadata(): """Build schema def with SqlAlchemy""" @@ -40,7 +41,7 @@ def build_metadata(): sa.Column('display_name', sa.String(20)), sa.Column('about', sa.String(160)), sa.Column('location', sa.String(30)), - sa.Column('website', sa.String(100)), + sa.Column('website', sa.String(1024)), sa.Column('profile_image', sa.String(1024), nullable=False, server_default=''), sa.Column('cover_image', sa.String(1024), nullable=False, server_default=''), @@ -59,22 +60,18 @@ def build_metadata(): sa.Column('cached_at', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), sa.Column('raw_json', sa.Text), - sa.UniqueConstraint('name', name='hive_accounts_ux1'), - sa.Index('hive_accounts_ix1', 'vote_weight', 'id'), # core: quick ranks - sa.Index('hive_accounts_ix2', 'name', 'id'), # core: quick id map - sa.Index('hive_accounts_ix3', 'vote_weight', 'name', postgresql_ops=dict(name='varchar_pattern_ops')), # API: lookup - sa.Index('hive_accounts_ix4', 'id', 'name'), # API: quick filter/sort - sa.Index('hive_accounts_ix5', 'cached_at', 'name'), # core/listen sweep + sa.Index('hive_accounts_ix1', 'vote_weight'), # core: quick ranks + sa.Index('hive_accounts_ix5', 'cached_at'), # core/listen sweep ) sa.Table( 'hive_posts', metadata, sa.Column('id', sa.Integer, primary_key=True), sa.Column('parent_id', sa.Integer), - sa.Column('author', VARCHAR(16), nullable=False), - sa.Column('permlink', VARCHAR(255), nullable=False), - sa.Column('category', VARCHAR(255), nullable=False, server_default=''), + sa.Column('author_id', sa.Integer, nullable=False), + sa.Column('permlink_id', sa.BigInteger, nullable=False), + sa.Column('category_id', sa.Integer, nullable=False), sa.Column('community_id', sa.Integer, nullable=True), sa.Column('created_at', sa.DateTime, nullable=False), sa.Column('depth', SMALLINT, nullable=False), @@ -84,21 +81,148 @@ def build_metadata(): sa.Column('is_valid', BOOLEAN, nullable=False, server_default='1'), sa.Column('promoted', sa.types.DECIMAL(10, 3), nullable=False, server_default='0'), - sa.ForeignKeyConstraint(['author'], ['hive_accounts.name'], name='hive_posts_fk1'), + sa.Column('children', SMALLINT, nullable=False, server_default='0'), + + # basic/extended-stats + sa.Column('author_rep', sa.Float(precision=6), nullable=False, server_default='0'), + sa.Column('flag_weight', sa.Float(precision=6), nullable=False, server_default='0'), + sa.Column('total_votes', sa.Integer, nullable=False, server_default='0'), + sa.Column('up_votes', sa.Integer, nullable=False, server_default='0'), + + # core stats/indexes + sa.Column('payout', sa.types.DECIMAL(10, 3), nullable=False, server_default='0'), + sa.Column('payout_at', sa.DateTime, nullable=False, server_default='1990-01-01'), + sa.Column('updated_at', sa.DateTime, nullable=False, server_default='1990-01-01'), + sa.Column('is_paidout', BOOLEAN, nullable=False, server_default='0'), + + # ui flags/filters + sa.Column('is_nsfw', BOOLEAN, nullable=False, server_default='0'), + sa.Column('is_declined', BOOLEAN, nullable=False, server_default='0'), + sa.Column('is_full_power', BOOLEAN, nullable=False, server_default='0'), + sa.Column('is_hidden', BOOLEAN, nullable=False, server_default='0'), + sa.Column('is_grayed', BOOLEAN, nullable=False, server_default='0'), + + # important indexes + sa.Column('rshares', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('sc_trend', sa.Float(precision=6), nullable=False, server_default='0'), + sa.Column('sc_hot', sa.Float(precision=6), nullable=False, server_default='0'), + + sa.Column('total_payout_value', sa.String(30), nullable=False, server_default=''), + sa.Column('author_rewards', sa.BigInteger, nullable=False, server_default='0'), + + sa.Column('author_rewards_hive', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('author_rewards_hbd', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('author_rewards_vests', sa.BigInteger, nullable=False, server_default='0'), + + sa.Column('children_abs_rshares', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('abs_rshares', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('vote_rshares', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('net_votes', sa.Integer, nullable=False, server_default='0'), + sa.Column('active', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), + sa.Column('last_payout', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), + sa.Column('cashout_time', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), + sa.Column('max_cashout_time', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), + sa.Column('percent_hbd', sa.Integer, nullable=False, server_default='10000'), + sa.Column('reward_weight', sa.Integer, nullable=False, server_default='0'), + + sa.Column('parent_author_id', sa.Integer, nullable=False), + sa.Column('parent_permlink_id', sa.BigInteger, nullable=False), + sa.Column('curator_payout_value', sa.String(30), nullable=False, server_default=''), + sa.Column('root_author_id', sa.Integer, nullable=False), + sa.Column('root_permlink_id', sa.BigInteger, nullable=False), + sa.Column('max_accepted_payout', sa.String(30), nullable=False, server_default='1000000.000 HBD'), + sa.Column('allow_replies', BOOLEAN, nullable=False, server_default='1'), + sa.Column('allow_votes', BOOLEAN, nullable=False, server_default='1'), + sa.Column('allow_curation_rewards', BOOLEAN, nullable=False, server_default='1'), + sa.Column('beneficiaries', sa.JSON, nullable=False, server_default='[]'), + sa.Column('url', sa.Text, nullable=False, server_default=''), + sa.Column('root_title', sa.String(255), nullable=False, server_default=''), + + sa.ForeignKeyConstraint(['author_id'], ['hive_accounts.id'], name='hive_posts_fk1'), sa.ForeignKeyConstraint(['parent_id'], ['hive_posts.id'], name='hive_posts_fk3'), - sa.UniqueConstraint('author', 'permlink', name='hive_posts_ux1'), - sa.Index('hive_posts_ix3', 'author', 'depth', 'id', postgresql_where=sql_text("is_deleted = '0'")), # API: author blog/comments - sa.Index('hive_posts_ix4', 'parent_id DESC NULLS LAST', 'id'), #postgresql_where=sql_text("is_deleted = '0'")), # API: fetching children #[JES] We decided we want the full index since posts can be deleted/undeleted - sa.Index('hive_posts_ix5', 'id', postgresql_where=sql_text("is_pinned = '1' AND is_deleted = '0'")), # API: pinned post status - sa.Index('hive_posts_ix6', 'community_id', 'id', postgresql_where=sql_text("community_id IS NOT NULL AND is_pinned = '1' AND is_deleted = '0'")), # API: community pinned + sa.UniqueConstraint('author_id', 'permlink_id', name='hive_posts_ux1'), + sa.Index('hive_posts_permlink_id', 'permlink_id'), + + sa.Index('hive_posts_depth_idx', 'depth'), + sa.Index('hive_posts_parent_id_idx', 'parent_id'), + sa.Index('hive_posts_community_id_idx', 'community_id'), + sa.Index('hive_posts_author_id', 'author_id'), + + sa.Index('hive_posts_category_id_idx', 'category_id'), + sa.Index('hive_posts_payout_at_idx', 'payout_at'), + sa.Index('hive_posts_payout_idx', 'payout'), + sa.Index('hive_posts_promoted_idx', 'promoted'), + sa.Index('hive_posts_sc_trend_idx', 'sc_trend'), + sa.Index('hive_posts_sc_hot_idx', 'sc_hot'), + sa.Index('hive_posts_created_at_idx', 'created_at'), + ) + + sa.Table( + 'hive_post_data', metadata, + sa.Column('id', sa.Integer, primary_key=True, autoincrement=False), + sa.Column('title', VARCHAR(512), nullable=False, server_default=''), + sa.Column('preview', VARCHAR(1024), nullable=False, server_default=''), + sa.Column('img_url', VARCHAR(1024), nullable=False, server_default=''), + sa.Column('body', TEXT, nullable=False, server_default=''), + sa.Column('json', TEXT, nullable=False, server_default='') + ) + + sa.Table( + 'hive_permlink_data', metadata, + sa.Column('id', sa.BigInteger, primary_key=True), + sa.Column('permlink', sa.String(255), nullable=False), + sa.UniqueConstraint('permlink', name='hive_permlink_data_permlink') + ) + + sa.Table( + 'hive_category_data', metadata, + sa.Column('id', sa.Integer, primary_key=True), + sa.Column('category', sa.String(255), nullable=False), + sa.UniqueConstraint('category', name='hive_category_data_category') + ) + + sa.Table( + 'hive_votes', metadata, + sa.Column('id', sa.BigInteger, primary_key=True), + sa.Column('post_id', sa.Integer, nullable=False), + sa.Column('voter_id', sa.Integer, nullable=False), + sa.Column('author_id', sa.Integer, nullable=False), + sa.Column('permlink_id', sa.Integer, nullable=False), + sa.Column('weight', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('rshares', sa.BigInteger, nullable=False, server_default='0'), + sa.Column('vote_percent', sa.Integer, server_default='0'), + sa.Column('last_update', sa.DateTime, nullable=False, server_default='1970-01-01 00:00:00'), + sa.Column('num_changes', sa.Integer, server_default='0'), + + sa.UniqueConstraint('voter_id', 'author_id', 'permlink_id', name='hive_votes_ux1'), + + sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id']), + sa.ForeignKeyConstraint(['voter_id'], ['hive_accounts.id']), + sa.ForeignKeyConstraint(['author_id'], ['hive_accounts.id']), + sa.ForeignKeyConstraint(['permlink_id'], ['hive_permlink_data.id']), + + sa.Index('hive_votes_post_id_idx', 'post_id'), + sa.Index('hive_votes_voter_id_idx', 'voter_id'), + sa.Index('hive_votes_author_id_idx', 'author_id'), + sa.Index('hive_votes_permlink_id_idx', 'permlink_id'), + sa.Index('hive_votes_upvote_idx', 'vote_percent', postgresql_where=sql_text("vote_percent > 0")), + sa.Index('hive_votes_downvote_idx', 'vote_percent', postgresql_where=sql_text("vote_percent < 0")) + ) + + sa.Table( + 'hive_tag_data', metadata, + sa.Column('id', sa.Integer, nullable=False, primary_key=True), + sa.Column('tag', VARCHAR(64), nullable=False, server_default=''), + sa.UniqueConstraint('tag', name='hive_tag_data_ux1') ) sa.Table( 'hive_post_tags', metadata, sa.Column('post_id', sa.Integer, nullable=False), - sa.Column('tag', sa.String(32), nullable=False), - sa.UniqueConstraint('tag', 'post_id', name='hive_post_tags_ux1'), # core - sa.Index('hive_post_tags_ix1', 'post_id'), # core + sa.Column('tag_id', sa.Integer, nullable=False), + sa.PrimaryKeyConstraint('post_id', 'tag_id', name='hive_post_tags_pk1'), + sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id']), + sa.ForeignKeyConstraint(['tag_id'], ['hive_tag_data.id']), ) sa.Table( @@ -107,8 +231,10 @@ def build_metadata(): sa.Column('following', sa.Integer, nullable=False), sa.Column('state', SMALLINT, nullable=False, server_default='1'), sa.Column('created_at', sa.DateTime, nullable=False), + sa.Column('blacklisted', sa.Boolean, nullable=False, server_default='0'), + sa.Column('follow_blacklists', sa.Boolean, nullable=False, server_default='0'), - sa.UniqueConstraint('following', 'follower', name='hive_follows_ux3'), # core + sa.PrimaryKeyConstraint('following', 'follower', name='hive_follows_pk'), # core sa.Index('hive_follows_ix5a', 'following', 'state', 'created_at', 'follower'), sa.Index('hive_follows_ix5b', 'follower', 'state', 'created_at', 'following'), ) @@ -121,8 +247,9 @@ def build_metadata(): sa.ForeignKeyConstraint(['account'], ['hive_accounts.name'], name='hive_reblogs_fk1'), sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id'], name='hive_reblogs_fk2'), - sa.UniqueConstraint('account', 'post_id', name='hive_reblogs_ux1'), # core - sa.Index('hive_reblogs_ix1', 'post_id', 'account', 'created_at'), # API -- not yet used + sa.PrimaryKeyConstraint('account', 'post_id', name='hive_reblogs_pk'), # core + sa.Index('hive_reblogs_account', 'account'), + sa.Index('hive_reblogs_post_id', 'post_id'), ) sa.Table( @@ -139,102 +266,27 @@ def build_metadata(): sa.ForeignKeyConstraint(['from_account'], ['hive_accounts.id'], name='hive_payments_fk1'), sa.ForeignKeyConstraint(['to_account'], ['hive_accounts.id'], name='hive_payments_fk2'), sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id'], name='hive_payments_fk3'), + sa.Index('hive_payments_from', 'from_account'), + sa.Index('hive_payments_to', 'to_account'), + sa.Index('hive_payments_post_id', 'post_id'), ) sa.Table( 'hive_feed_cache', metadata, - sa.Column('post_id', sa.Integer, nullable=False), + sa.Column('post_id', sa.Integer, nullable=False, primary_key=True), sa.Column('account_id', sa.Integer, nullable=False), sa.Column('created_at', sa.DateTime, nullable=False), - sa.UniqueConstraint('post_id', 'account_id', name='hive_feed_cache_ux1'), # core - sa.Index('hive_feed_cache_ix1', 'account_id', 'post_id', 'created_at'), # API (and rebuild?) - ) - - sa.Table( - 'hive_posts_cache', metadata, - sa.Column('post_id', sa.Integer, primary_key=True, autoincrement=False), - sa.Column('author', VARCHAR(16), nullable=False), - sa.Column('permlink', VARCHAR(255), nullable=False), - sa.Column('category', VARCHAR(255), nullable=False, server_default=''), - - # important/index - sa.Column('community_id', sa.Integer, nullable=True), - sa.Column('depth', SMALLINT, nullable=False, server_default='0'), - sa.Column('children', SMALLINT, nullable=False, server_default='0'), - - # basic/extended-stats - sa.Column('author_rep', sa.Float(precision=6), nullable=False, server_default='0'), - sa.Column('flag_weight', sa.Float(precision=6), nullable=False, server_default='0'), - sa.Column('total_votes', sa.Integer, nullable=False, server_default='0'), - sa.Column('up_votes', sa.Integer, nullable=False, server_default='0'), - - # basic ui fields - sa.Column('title', sa.String(255), nullable=False, server_default=''), - sa.Column('preview', sa.String(1024), nullable=False, server_default=''), - sa.Column('img_url', sa.String(1024), nullable=False, server_default=''), - - # core stats/indexes - sa.Column('payout', sa.types.DECIMAL(10, 3), nullable=False, server_default='0'), - sa.Column('promoted', sa.types.DECIMAL(10, 3), nullable=False, server_default='0'), - sa.Column('created_at', sa.DateTime, nullable=False, server_default='1990-01-01'), - sa.Column('payout_at', sa.DateTime, nullable=False, server_default='1990-01-01'), - sa.Column('updated_at', sa.DateTime, nullable=False, server_default='1990-01-01'), - sa.Column('is_paidout', BOOLEAN, nullable=False, server_default='0'), - - # ui flags/filters - sa.Column('is_nsfw', BOOLEAN, nullable=False, server_default='0'), - sa.Column('is_declined', BOOLEAN, nullable=False, server_default='0'), - sa.Column('is_full_power', BOOLEAN, nullable=False, server_default='0'), - sa.Column('is_hidden', BOOLEAN, nullable=False, server_default='0'), - sa.Column('is_grayed', BOOLEAN, nullable=False, server_default='0'), - - # important indexes - sa.Column('rshares', sa.BigInteger, nullable=False, server_default='0'), - sa.Column('sc_trend', sa.Float(precision=6), nullable=False, server_default='0'), - sa.Column('sc_hot', sa.Float(precision=6), nullable=False, server_default='0'), - - # bulk data - sa.Column('body', TEXT), - sa.Column('votes', TEXT), - sa.Column('json', sa.Text), - sa.Column('raw_json', sa.Text), - - # index: misc - sa.Index('hive_posts_cache_ix3', 'payout_at', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # core: payout sweep - sa.Index('hive_posts_cache_ix8', 'category', 'payout', 'depth', postgresql_where=sql_text("is_paidout = '0'")), # API: tag stats - - # index: ranked posts - sa.Index('hive_posts_cache_ix2', 'promoted', postgresql_where=sql_text("is_paidout = '0' AND promoted > 0")), # API: promoted - - sa.Index('hive_posts_cache_ix6a', 'sc_trend', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # API: trending todo: depth=0 - sa.Index('hive_posts_cache_ix7a', 'sc_hot', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # API: hot todo: depth=0 - sa.Index('hive_posts_cache_ix6b', 'post_id', 'sc_trend', postgresql_where=sql_text("is_paidout = '0'")), # API: trending, filtered todo: depth=0 - sa.Index('hive_posts_cache_ix7b', 'post_id', 'sc_hot', postgresql_where=sql_text("is_paidout = '0'")), # API: hot, filtered todo: depth=0 - - sa.Index('hive_posts_cache_ix9a', 'depth', 'payout', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # API: payout todo: rem depth - sa.Index('hive_posts_cache_ix9b', 'category', 'depth', 'payout', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # API: payout, filtered todo: rem depth - - sa.Index('hive_posts_cache_ix10', 'post_id', 'payout', postgresql_where=sql_text("is_grayed = '1' AND payout > 0")), # API: muted, by filter/date/payout - - # index: stats - sa.Index('hive_posts_cache_ix20', 'community_id', 'author', 'payout', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # API: pending distribution; author payout - - # index: community ranked posts - sa.Index('hive_posts_cache_ix30', 'community_id', 'sc_trend', 'post_id', postgresql_where=sql_text("community_id IS NOT NULL AND is_grayed = '0' AND depth = 0")), # API: community trend - sa.Index('hive_posts_cache_ix31', 'community_id', 'sc_hot', 'post_id', postgresql_where=sql_text("community_id IS NOT NULL AND is_grayed = '0' AND depth = 0")), # API: community hot - sa.Index('hive_posts_cache_ix32', 'community_id', 'created_at', 'post_id', postgresql_where=sql_text("community_id IS NOT NULL AND is_grayed = '0' AND depth = 0")), # API: community created - sa.Index('hive_posts_cache_ix33', 'community_id', 'payout', 'post_id', postgresql_where=sql_text("community_id IS NOT NULL AND is_grayed = '0' AND is_paidout = '0'")), # API: community payout - sa.Index('hive_posts_cache_ix34', 'community_id', 'payout', 'post_id', postgresql_where=sql_text("community_id IS NOT NULL AND is_grayed = '1' AND is_paidout = '0'")), # API: community muted - sa.Index('hive_posts_cache_ix35', 'author', 'depth'), + sa.Index('hive_feed_cache_account_id', 'account_id'), # API (and rebuild?) + sa.UniqueConstraint('account_id', 'post_id', name='hive_feed_cache_ux1') ) sa.Table( 'hive_state', metadata, sa.Column('block_num', sa.Integer, primary_key=True, autoincrement=False), sa.Column('db_version', sa.Integer, nullable=False), - sa.Column('steem_per_mvest', sa.types.DECIMAL(8, 3), nullable=False), - sa.Column('usd_per_steem', sa.types.DECIMAL(8, 3), nullable=False), - sa.Column('sbd_per_steem', sa.types.DECIMAL(8, 3), nullable=False), + sa.Column('steem_per_mvest', sa.types.DECIMAL(14, 6), nullable=False), + sa.Column('usd_per_steem', sa.types.DECIMAL(14, 6), nullable=False), + sa.Column('sbd_per_steem', sa.types.DECIMAL(14, 6), nullable=False), sa.Column('dgpo', sa.Text, nullable=False), ) @@ -335,16 +387,201 @@ def setup(db): sqls = [ "INSERT INTO hive_state (block_num, db_version, steem_per_mvest, usd_per_steem, sbd_per_steem, dgpo) VALUES (0, %d, 0, 0, 0, '')" % DB_VERSION, "INSERT INTO hive_blocks (num, hash, created_at) VALUES (0, '0000000000000000000000000000000000000000', '2016-03-24 16:04:57')", + + "INSERT INTO hive_permlink_data (id, permlink) VALUES (0, '')", + "INSERT INTO hive_category_data (id, category) VALUES (0, '')", + "INSERT INTO hive_accounts (id, name, created_at) VALUES (0, '', '1990-01-01T00:00:00')", + "INSERT INTO hive_accounts (name, created_at) VALUES ('miners', '2016-03-24 16:05:00')", "INSERT INTO hive_accounts (name, created_at) VALUES ('null', '2016-03-24 16:05:00')", "INSERT INTO hive_accounts (name, created_at) VALUES ('temp', '2016-03-24 16:05:00')", - "INSERT INTO hive_accounts (name, created_at) VALUES ('initminer', '2016-03-24 16:05:00')"] + "INSERT INTO hive_accounts (name, created_at) VALUES ('initminer', '2016-03-24 16:05:00')", + + """ + INSERT INTO + public.hive_posts(id, parent_id, author_id, permlink_id, category_id, + community_id, parent_author_id, parent_permlink_id, root_author_id, + root_permlink_id, created_at, depth + ) + VALUES + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, now(), 0); + """] for sql in sqls: db.query(sql) sql = "CREATE INDEX hive_communities_ft1 ON hive_communities USING GIN (to_tsvector('english', title || ' ' || about))" db.query(sql) + sql = """ + DROP FUNCTION if exists process_hive_post_operation(character varying,character varying,character varying,character varying,timestamp without time zone,timestamp without time zone) + ; + CREATE OR REPLACE FUNCTION process_hive_post_operation( + in _author hive_accounts.name%TYPE, + in _permlink hive_permlink_data.permlink%TYPE, + in _parent_author hive_accounts.name%TYPE, + in _parent_permlink hive_permlink_data.permlink%TYPE, + in _date hive_posts.created_at%TYPE, + in _community_support_start_date hive_posts.created_at%TYPE) + RETURNS TABLE (id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE, + post_category hive_category_data.category%TYPE, parent_id hive_posts.parent_id%TYPE, community_id hive_posts.community_id%TYPE, + is_valid hive_posts.is_valid%TYPE, is_muted hive_posts.is_muted%TYPE, depth hive_posts.depth%TYPE, + is_edited boolean) + LANGUAGE plpgsql + AS + $function$ + BEGIN + + INSERT INTO hive_permlink_data + (permlink) + values + ( + _permlink + ) + ON CONFLICT DO NOTHING + ; + if _parent_author != '' THEN + RETURN QUERY INSERT INTO hive_posts as hp + (parent_id, parent_author_id, parent_permlink_id, depth, community_id, + category_id, + root_author_id, root_permlink_id, + is_muted, is_valid, + author_id, permlink_id, created_at) + SELECT php.id AS parent_id, php.author_id as parent_author_id, + php.permlink_id as parent_permlink_id, php.depth + 1 as depth, + (CASE + WHEN _date > _community_support_start_date THEN + COALESCE(php.community_id, (select hc.id from hive_communities hc where hc.name = _parent_permlink)) + ELSE NULL + END) as community_id, + COALESCE(php.category_id, (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink)) as category_id, + php.root_author_id as root_author_id, + php.root_permlink_id as root_permlink_id, + php.is_muted as is_muted, php.is_valid as is_valid, + ha.id as author_id, hpd.id as permlink_id, _date as created_at + FROM hive_accounts ha, + hive_permlink_data hpd, + hive_posts php + INNER JOIN hive_accounts pha ON pha.id = php.author_id + INNER JOIN hive_permlink_data phpd ON phpd.id = php.permlink_id + WHERE pha.name = _parent_author and phpd.permlink = _parent_permlink AND + ha.name = _author and hpd.permlink = _permlink + + ON CONFLICT ON CONSTRAINT hive_posts_ux1 DO UPDATE SET + --- During post update it is disallowed to change: parent-post, category, community-id + --- then also depth, is_valid and is_muted is impossible to change + --- post edit part + updated_at = _date, + + --- post undelete part (if was deleted) + is_deleted = (CASE hp.is_deleted + WHEN true THEN false + ELSE false + END + ), + is_pinned = (CASE hp.is_deleted + WHEN true THEN false + ELSE hp.is_pinned --- no change + END + ) + + RETURNING hp.id, hp.author_id, hp.permlink_id, (SELECT hcd.category FROM hive_category_data hcd WHERE hcd.id = hp.category_id) as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited + ; + ELSE + INSERT INTO hive_category_data + (category) + VALUES (_parent_permlink) + ON CONFLICT (category) DO NOTHING + ; + + RETURN QUERY INSERT INTO hive_posts as hp + (parent_id, parent_author_id, parent_permlink_id, depth, community_id, + category_id, + root_author_id, root_permlink_id, + is_muted, is_valid, + author_id, permlink_id, created_at) + SELECT 0 AS parent_id, 0 as parent_author_id, 0 as parent_permlink_id, 0 as depth, + (CASE + WHEN _date > _community_support_start_date THEN + (select hc.id from hive_communities hc where hc.name = _parent_permlink) + ELSE NULL + END) as community_id, + (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink) as category_id, + ha.id as root_author_id, -- use author_id as root one if no parent + hpd.id as root_permlink_id, -- use perlink_id as root one if no parent + false as is_muted, true as is_valid, + ha.id as author_id, hpd.id as permlink_id, _date as created_at + FROM hive_accounts ha, + hive_permlink_data hpd + WHERE ha.name = _author and hpd.permlink = _permlink + + ON CONFLICT ON CONSTRAINT hive_posts_ux1 DO UPDATE SET + --- During post update it is disallowed to change: parent-post, category, community-id + --- then also depth, is_valid and is_muted is impossible to change + --- post edit part + updated_at = _date, + + --- post undelete part (if was deleted) + is_deleted = (CASE hp.is_deleted + WHEN true THEN false + ELSE false + END + ), + is_pinned = (CASE hp.is_deleted + WHEN true THEN false + ELSE hp.is_pinned --- no change + END + ) + + RETURNING hp.id, hp.author_id, hp.permlink_id, _parent_permlink as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited + ; + END IF; + END + $function$ + """ + db.query_no_return(sql) + + sql = """ + DROP FUNCTION if exists delete_hive_post(character varying,character varying,character varying) + ; + CREATE OR REPLACE FUNCTION delete_hive_post( + in _author hive_accounts.name%TYPE, + in _permlink hive_permlink_data.permlink%TYPE) + RETURNS TABLE (id hive_posts.id%TYPE, depth hive_posts.depth%TYPE) + LANGUAGE plpgsql + AS + $function$ + BEGIN + RETURN QUERY UPDATE hive_posts AS hp + SET is_deleted = false + FROM hive_posts hp1 + INNER JOIN hive_accounts ha ON hp1.author_id = ha.id + INNER JOIN hive_permlink_data hpd ON hp1.permlink_id = hpd.id + WHERE hp.id = hp1.id AND ha.name = _author AND hpd.permlink = _permlink + RETURNING hp.id, hp.depth; + END + $function$ + """ + db.query_no_return(sql) + + sql = """ + DROP MATERIALIZED VIEW IF EXISTS hive_posts_a_p + ; + CREATE MATERIALIZED VIEW hive_posts_a_p + AS + SELECT hp.id AS id, + ha_a.name AS author, + hpd_p.permlink AS permlink + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WITH DATA + ; + DROP INDEX IF EXISTS hive_posts_a_p_idx + ; + CREATE unique index hive_posts_a_p_idx ON hive_posts_a_p (author collate "C", permlink collate "C") + """ + db.query_no_return(sql) + def reset_autovac(db): """Initializes/resets per-table autovacuum/autoanalyze params. @@ -353,7 +590,6 @@ def reset_autovac(db): autovac_config = { # vacuum analyze 'hive_accounts': (50000, 100000), - 'hive_posts_cache': (25000, 25000), 'hive_posts': (2500, 10000), 'hive_post_tags': (5000, 10000), 'hive_follows': (5000, 5000), diff --git a/hive/indexer/blocks.py b/hive/indexer/blocks.py index 58af5b48ffa431036cbd6b57f887e73ec428f971..fdb55721ddd4ed2d0152e602d9c254dbb0d02e6f 100644 --- a/hive/indexer/blocks.py +++ b/hive/indexer/blocks.py @@ -1,15 +1,19 @@ """Blocks processor.""" import logging +import json from hive.db.adapter import Db from hive.indexer.accounts import Accounts from hive.indexer.posts import Posts -from hive.indexer.cached_post import CachedPost from hive.indexer.custom_op import CustomOp from hive.indexer.payments import Payments from hive.indexer.follow import Follow +from hive.indexer.votes import Votes +from hive.indexer.post_data_cache import PostDataCache +from hive.indexer.tags import Tags +from time import perf_counter log = logging.getLogger(__name__) @@ -17,6 +21,19 @@ DB = Db.instance() class Blocks: """Processes blocks, dispatches work, manages `hive_blocks` table.""" + blocks_to_flush = [] + ops_stats = {} + + @staticmethod + def merge_ops_stats(od1, od2): + if od2 is not None: + for k, v in od2.items(): + if k in od1: + od1[k] += v + else: + od1[k] = v + + return od1 @classmethod def head_num(cls): @@ -31,20 +48,28 @@ class Blocks: return str(DB.query_one(sql) or '') @classmethod - def process(cls, block): + def process(cls, block, vops_in_block, hived): """Process a single block. Always wrap in a transaction!""" + time_start = perf_counter() #assert is_trx_active(), "Block.process must be in a trx" - return cls._process(block, is_initial_sync=False) + ret = cls._process(block, vops_in_block, hived, is_initial_sync=False) + PostDataCache.flush() + Tags.flush() + Votes.flush() + time_end = perf_counter() + log.info("[PROCESS BLOCK] %fs", time_end - time_start) + return ret @classmethod - def process_multi(cls, blocks, is_initial_sync=False): + def process_multi(cls, blocks, vops, hived, is_initial_sync=False): """Batch-process blocks; wrapped in a transaction.""" + time_start = perf_counter() DB.query("START TRANSACTION") last_num = 0 try: for block in blocks: - last_num = cls._process(block, is_initial_sync) + last_num = cls._process(block, vops, hived, is_initial_sync) except Exception as e: log.error("exception encountered block %d", last_num + 1) raise e @@ -52,19 +77,60 @@ class Blocks: # Follows flushing needs to be atomic because recounts are # expensive. So is tracking follows at all; hence we track # deltas in memory and update follow/er counts in bulk. + PostDataCache.flush() + Tags.flush() + Votes.flush() + cls._flush_blocks() Follow.flush(trx=False) DB.query("COMMIT") + time_end = perf_counter() + log.info("[PROCESS MULTI] %i blocks in %fs", len(blocks), time_end - time_start) + + return cls.ops_stats + + @staticmethod + def prepare_vops(vopsList, date): + vote_ops = [] + comment_payout_ops = {} + for vop in vopsList: + key = None + val = None + + op_type = vop['type'] + op_value = vop['value'] + if op_type == 'curation_reward_operation': + key = "{}/{}".format(op_value['comment_author'], op_value['comment_permlink']) + val = {'reward' : op_value['reward']} + elif op_type == 'author_reward_operation': + key = "{}/{}".format(op_value['author'], op_value['permlink']) + val = {'hbd_payout':op_value['hbd_payout'], 'hive_payout':op_value['hive_payout'], 'vesting_payout':op_value['vesting_payout']} + elif op_type == 'comment_reward_operation': + if('payout' not in op_value or op_value['payout'] is None): + log.error("Broken op: `{}'".format(str(vop))) + key = "{}/{}".format(op_value['author'], op_value['permlink']) + val = {'payout':op_value['payout'], 'author_rewards':op_value['author_rewards']} + elif op_type == 'effective_comment_vote_operation': + vote_ops.append(vop) + + if key is not None and val is not None: + if key in comment_payout_ops: + comment_payout_ops[key].append({op_type:val}) + else: + comment_payout_ops[key] = [{op_type:val}] + + return (vote_ops, comment_payout_ops) + @classmethod - def _process(cls, block, is_initial_sync=False): + def _process(cls, block, virtual_operations, hived, is_initial_sync=False): """Process a single block. Assumes a trx is open.""" #pylint: disable=too-many-branches num = cls._push(block) date = block['timestamp'] + # [DK] we will make two scans, first scan will register all accounts account_names = set() - json_ops = [] for tx_idx, tx in enumerate(block['transactions']): for operation in tx['operations']: op_type = operation['type'] @@ -82,8 +148,24 @@ class Blocks: elif op_type == 'create_claimed_account_operation': account_names.add(op['new_account_name']) + Accounts.register(account_names, date) # register any new names + + # second scan will process all other ops + json_ops = [] + update_comment_pending_payouts = [] + for tx_idx, tx in enumerate(block['transactions']): + for operation in tx['operations']: + op_type = operation['type'] + op = operation['value'] + + if(op_type != 'custom_json_operation'): + if op_type in cls.ops_stats: + cls.ops_stats[op_type] += 1 + else: + cls.ops_stats[op_type] = 1 + # account metadata updates - elif op_type == 'account_update_operation': + if op_type == 'account_update_operation': if not is_initial_sync: Accounts.dirty(op['account']) # full elif op_type == 'account_update2_operation': @@ -97,12 +179,13 @@ class Blocks: Accounts.dirty(op['author']) # lite - stats elif op_type == 'delete_comment_operation': Posts.delete_op(op) + elif op_type == 'comment_options_operation': + Posts.comment_options_op(op) elif op_type == 'vote_operation': if not is_initial_sync: Accounts.dirty(op['author']) # lite - rep Accounts.dirty(op['voter']) # lite - stats - CachedPost.vote(op['author'], op['permlink'], - None, op['voter']) + update_comment_pending_payouts.append([op['author'], op['permlink']]) # misc ops elif op_type == 'transfer_operation': @@ -110,8 +193,39 @@ class Blocks: elif op_type == 'custom_json_operation': json_ops.append(op) - Accounts.register(account_names, date) # register any new names - CustomOp.process_ops(json_ops, num, date) # follow/reblog/community ops + # follow/reblog/community ops + if json_ops: + custom_ops_stats = CustomOp.process_ops(json_ops, num, date) + cls.ops_stats = Blocks.merge_ops_stats(cls.ops_stats, custom_ops_stats) + + if update_comment_pending_payouts: + payout_ops_stat = Posts.update_comment_pending_payouts(hived, update_comment_pending_payouts) + cls.ops_stats = Blocks.merge_ops_stats(cls.ops_stats, payout_ops_stat) + + # virtual ops + comment_payout_ops = {} + vote_ops = [] + + empty_vops = (vote_ops, comment_payout_ops) + + if is_initial_sync: + (vote_ops, comment_payout_ops) = virtual_operations[num] if num in virtual_operations else empty_vops + else: + vops = hived.get_virtual_operations(num) + (vote_ops, comment_payout_ops) = Blocks.prepare_vops(vops, date) + + for v in vote_ops: + Votes.vote_op(v, date) + op_type = v['type'] + if op_type in cls.ops_stats: + cls.ops_stats[op_type] += 1 + else: + cls.ops_stats[op_type] = 1 + + + if comment_payout_ops: + comment_payout_stats = Posts.comment_payout_op(comment_payout_ops, date) + cls.ops_stats = Blocks.merge_ops_stats(cls.ops_stats, comment_payout_stats) return num @@ -162,16 +276,28 @@ class Blocks: """Insert a row in `hive_blocks`.""" num = int(block['block_id'][:8], base=16) txs = block['transactions'] - DB.query("INSERT INTO hive_blocks (num, hash, prev, txs, ops, created_at) " - "VALUES (:num, :hash, :prev, :txs, :ops, :date)", **{ - 'num': num, - 'hash': block['block_id'], - 'prev': block['previous'], - 'txs': len(txs), - 'ops': sum([len(tx['operations']) for tx in txs]), - 'date': block['timestamp']}) + cls.blocks_to_flush.append({ + 'num': num, + 'hash': block['block_id'], + 'prev': block['previous'], + 'txs': len(txs), + 'ops': sum([len(tx['operations']) for tx in txs]), + 'date': block['timestamp']}) return num + @classmethod + def _flush_blocks(cls): + query = """ + INSERT INTO + hive_blocks (num, hash, prev, txs, ops, created_at) + VALUES + """ + values = [] + for block in cls.blocks_to_flush: + values.append("({}, '{}', '{}', {}, {}, '{}')".format(block['num'], block['hash'], block['prev'], block['txs'], block['ops'], block['date'])) + DB.query(query + ",".join(values)) + cls.blocks_to_flush = [] + @classmethod def _pop(cls, blocks): """Pop head blocks to navigate head to a point prior to fork. @@ -215,13 +341,13 @@ class Blocks: # remove all recent records -- core DB.query("DELETE FROM hive_feed_cache WHERE created_at >= :date", date=date) DB.query("DELETE FROM hive_reblogs WHERE created_at >= :date", date=date) - DB.query("DELETE FROM hive_follows WHERE created_at >= :date", date=date) #* + DB.query("DELETE FROM hive_follows WHERE created_at >= :date", date=date) # remove posts: core, tags, cache entries if post_ids: - DB.query("DELETE FROM hive_posts_cache WHERE post_id IN :ids", ids=post_ids) DB.query("DELETE FROM hive_post_tags WHERE post_id IN :ids", ids=post_ids) DB.query("DELETE FROM hive_posts WHERE id IN :ids", ids=post_ids) + DB.query("DELETE FROM hive_posts_data WHERE id IN :ids", ids=post_ids) DB.query("DELETE FROM hive_payments WHERE block_num = :num", num=num) DB.query("DELETE FROM hive_blocks WHERE num = :num", num=num) diff --git a/hive/indexer/cached_post.py b/hive/indexer/cached_post.py index 25f731827daa6cd9eed3dbd72f1757791f30da04..505f1efdf705b32edb10bfd73413d4f036a3c07d 100644 --- a/hive/indexer/cached_post.py +++ b/hive/indexer/cached_post.py @@ -27,7 +27,7 @@ def _keyify(items): return dict(map(lambda x: ("val_%d" % x[0], x[1]), enumerate(items))) class CachedPost: - """Maintain update queue and writing to `hive_posts_cache`.""" + """Maintain update queue and writing to `hive_posts`.""" # cursor signifying upper bound of cached post span _last_id = -1 @@ -116,6 +116,7 @@ class CachedPost: - author/permlink is unique and always references the same post - you can always get_content on any author/permlink you see in an op """ + raise NotImplementedError("Cannot delete from CachedPost!!!") DB.query("DELETE FROM hive_posts_cache WHERE post_id = :id", id=post_id) DB.query("DELETE FROM hive_post_tags WHERE post_id = :id", id=post_id) @@ -230,7 +231,7 @@ class CachedPost: @classmethod def _select_paidout_tuples(cls, date): - """Query hive_posts_cache for payout sweep. + """Query hive_posts for payout sweep. Select all posts which should have been paid out before `date` yet do not have the `is_paidout` flag set. We perform this @@ -241,14 +242,20 @@ class CachedPost: """ from hive.indexer.posts import Posts - sql = """SELECT post_id FROM hive_posts_cache + sql = """SELECT id FROM hive_posts WHERE is_paidout = '0' AND payout_at <= :date""" ids = DB.query_col(sql, date=date) if not ids: return [] - sql = """SELECT id, author, permlink - FROM hive_posts WHERE id IN :ids""" + sql = """ + SELECT + hp.id, ha_a.name as author, hpd_p.permlink as permlink + FROM + hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE hp.id IN :ids""" results = DB.query_all(sql, ids=tuple(ids)) return Posts.save_ids_from_tuples(results) @@ -271,9 +278,15 @@ class CachedPost: def _select_missing_tuples(cls, last_cached_id, limit=1000000): """Fetch posts inserted into main posts table but not cache.""" from hive.indexer.posts import Posts - sql = """SELECT id, author, permlink, promoted FROM hive_posts - WHERE is_deleted = '0' AND id > :id - ORDER BY id LIMIT :limit""" + sql = """ + SELECT + hp.id, ha_a.name as author, hpd_p.permlink as permlink, promoted + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE + hp.is_deleted = '0' AND hp.id > :id + ORDER BY hp.id LIMIT :limit""" results = DB.query_all(sql, id=last_cached_id, limit=limit) return Posts.save_ids_from_tuples(results) @@ -394,7 +407,7 @@ class CachedPost: """Retrieve the latest post_id that was cached.""" if cls._last_id == -1: # after initial query, we maintain last_id w/ _bump_last_id() - sql = "SELECT COALESCE(MAX(post_id), 0) FROM hive_posts_cache" + sql = "SELECT id FROM hive_posts ORDER BY id DESC LIMIT 1" cls._last_id = DB.query_one(sql) return cls._last_id @@ -415,8 +428,12 @@ class CachedPost: return {} # build a map of id->fields for each of those posts - sql = """SELECT id, category, community_id, is_muted, is_valid - FROM hive_posts WHERE id IN :ids""" + sql = """ + SELECT + hp.id, hcd.category as category, community_id, is_muted, is_valid + FROM hive_posts hp + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + WHERE id IN :ids""" core = {r[0]: {'category': r[1], 'community_id': r[2], 'is_muted': r[3], @@ -487,6 +504,7 @@ class CachedPost: # always write, unless simple vote update if level in ['insert', 'payout', 'update']: basic = post_basic(post) + legacy_data = post_legacy(post) values.extend([ ('community_id', post['community_id']), # immutable* ('created_at', post['created']), # immutable* @@ -501,7 +519,22 @@ class CachedPost: ('is_full_power', basic['is_full_power']), ('is_paidout', basic['is_paidout']), ('json', json.dumps(basic['json_metadata'])), - ('raw_json', json.dumps(post_legacy(post))), + #('raw_json', json.dumps(legacy_data)), + # TODO: check if writting fields below on every update is necessary! + ('parent_author', legacy_data['parent_author']), + ('parent_permlink', legacy_data['parent_permlink']), + ('curator_payout_value', legacy_data['curator_payout_value']), + ('root_author', legacy_data['root_author']), + ('root_permlink', legacy_data['root_permlink']), + ('max_accepted_payout', legacy_data['max_accepted_payout']), + ('percent_hbd', legacy_data['percent_hbd']), + ('allow_replies', legacy_data['allow_replies']), + ('allow_votes', legacy_data['allow_votes']), + ('allow_curation_rewards', legacy_data['allow_curation_rewards']), + ('beneficiaries', json.dumps(legacy_data['beneficiaries'])), + ('url', legacy_data['url']), + ('root_title', legacy_data['root_title']), + ('author_permlink', post['author'] + post['permlink']), ]) # if there's a pending promoted value to write, pull it out @@ -671,8 +704,10 @@ class CachedPost: @classmethod def _insert(cls, values): + raise NotImplementedError("Cannot insert from CachedPost") return DB.build_insert('hive_posts_cache', values, pk='post_id') @classmethod def _update(cls, values): + raise NotImplementedError("Cannot update from CachedPost") return DB.build_update('hive_posts_cache', values, pk='post_id') diff --git a/hive/indexer/community.py b/hive/indexer/community.py index 1840731a8cc4dfc0900d1941f035f3d474107c7a..4c4d912d29760bcaa297d9fd8bf84545fb6df9bd 100644 --- a/hive/indexer/community.py +++ b/hive/indexer/community.py @@ -235,10 +235,11 @@ class Community: SELECT community_id, COUNT(*) posts, ROUND(SUM(payout)) payouts, - COUNT(DISTINCT author) authors - FROM hive_posts_cache + COUNT(DISTINCT author_id) authors + FROM hive_posts WHERE community_id IS NOT NULL AND is_paidout = '0' + AND is_deleted = '0' GROUP BY community_id ) p ON community_id = id @@ -400,15 +401,15 @@ class CommunityOp: DB.query("""UPDATE hive_posts SET is_muted = '1' WHERE id = :post_id""", **params) self._notify('mute_post', payload=self.notes) - if not DbState.is_initial_sync(): - CachedPost.update(self.account, self.permlink, self.post_id) + #if not DbState.is_initial_sync(): + # CachedPost.update(self.account, self.permlink, self.post_id) elif action == 'unmutePost': DB.query("""UPDATE hive_posts SET is_muted = '0' WHERE id = :post_id""", **params) self._notify('unmute_post', payload=self.notes) - if not DbState.is_initial_sync(): - CachedPost.update(self.account, self.permlink, self.post_id) + #if not DbState.is_initial_sync(): + # CachedPost.update(self.account, self.permlink, self.post_id) elif action == 'pinPost': DB.query("""UPDATE hive_posts SET is_pinned = '1' diff --git a/hive/indexer/custom_op.py b/hive/indexer/custom_op.py index 6c7eb84f46805f791bc750e689f36fec96ee5d52..5a31ae7c5d46790d15dfc1da09b85132fffde7d5 100644 --- a/hive/indexer/custom_op.py +++ b/hive/indexer/custom_op.py @@ -39,11 +39,23 @@ class CustomOp: @classmethod def process_ops(cls, ops, block_num, block_date): + ops_stats = {} + """Given a list of operation in block, filter and process them.""" for op in ops: if op['id'] not in ['follow', 'community', 'notify']: + opName = str(op['id']) + '-ignored' + if(opName in ops_stats): + ops_stats[opName] += 1 + else: + ops_stats[opName] = 1 continue + if(op['id'] in ops_stats): + ops_stats[op['id']] += 1 + else: + ops_stats[op['id']] = 1 + account = _get_auth(op) if not account: continue @@ -58,6 +70,7 @@ class CustomOp: process_json_community_op(account, op_json, block_date) elif op['id'] == 'notify': cls._process_notify(account, op_json, block_date) + return ops_stats @classmethod def _process_notify(cls, account, op_json, block_date): @@ -119,29 +132,46 @@ class CustomOp: if not all(map(Accounts.exists, [author, blogger])): return - post_id, depth = Posts.get_id_and_depth(author, permlink) - - if depth > 0: - return # prevent comment reblogs - - if not post_id: - log.debug("reblog: post not found: %s/%s", author, permlink) - return - - author_id = Accounts.get_id(author) - blogger_id = Accounts.get_id(blogger) - if 'delete' in op_json and op_json['delete'] == 'delete': - DB.query("DELETE FROM hive_reblogs WHERE account = :a AND " - "post_id = :pid LIMIT 1", a=blogger, pid=post_id) + sql = """ + WITH processing_set AS ( + SELECT hp.id as post_id, ha.id as account_id + FROM hive_posts hp + INNER JOIN hive_accounts ha ON hp.author_id = ha.id + INNER JOIN hive_permlink_data hpd ON hp.permlink_id = hpd.id + WHERE ha.name = :a AND hpd.permlink = :permlink AND hp.depth <= 0 + ) + DELETE FROM hive_reblogs AS hr + WHERE hr.account = :a AND hr.post_id IN (SELECT ps.post_id FROM processing_set ps) + RETURNING hr.post_id, (SELECT ps.account_id FROM processing_set ps) AS account_id + """ + + row = DB.query_row(sql, a=blogger, permlink=permlink) + if row is None: + log.debug("reblog: post not found: %s/%s", author, permlink) + return + if not DbState.is_initial_sync(): - FeedCache.delete(post_id, blogger_id) + result = dict(row) + FeedCache.delete(result['post_id'], result['account_id']) else: - sql = ("INSERT INTO hive_reblogs (account, post_id, created_at) " - "VALUES (:a, :pid, :date) ON CONFLICT (account, post_id) DO NOTHING") - DB.query(sql, a=blogger, pid=post_id, date=block_date) + sql = """ + INSERT INTO hive_reblogs (account, post_id, created_at) + SELECT ha.name, hp.id, :date + FROM hive_accounts ha + INNER JOIN hive_posts hp ON hp.author_id = ha.id + INNER JOIN hive_permlink_data hpd ON hpd.id = hp.permlink_id + WHERE ha.name = :a AND hpd.permlink = :p + ON CONFLICT (account, post_id) DO NOTHING + RETURNING post_id + """ + row = DB.query_row(sql, a=blogger, p=permlink, date=block_date) if not DbState.is_initial_sync(): + author_id = Accounts.get_id(author) + blogger_id = Accounts.get_id(blogger) + result = dict(row) + post_id = result['post_id'] FeedCache.insert(post_id, blogger_id, block_date) Notify('reblog', src_id=blogger_id, dst_id=author_id, post_id=post_id, when=block_date, diff --git a/hive/indexer/feed_cache.py b/hive/indexer/feed_cache.py index e826569c90861d6508dfe25b1c0304f924ed44dd..f263b38339f58543ea7fbd98854166b79fd5a88b 100644 --- a/hive/indexer/feed_cache.py +++ b/hive/indexer/feed_cache.py @@ -22,7 +22,7 @@ class FeedCache: assert not DbState.is_initial_sync(), 'writing to feed cache in sync' sql = """INSERT INTO hive_feed_cache (account_id, post_id, created_at) VALUES (:account_id, :id, :created_at) - ON CONFLICT (account_id, post_id) DO NOTHING""" + ON CONFLICT ON CONSTRAINT hive_feed_cache_ux1 DO NOTHING""" DB.query(sql, account_id=account_id, id=post_id, created_at=created_at) @classmethod @@ -54,7 +54,7 @@ class FeedCache: INSERT INTO hive_feed_cache (account_id, post_id, created_at) SELECT hive_accounts.id, hive_posts.id, hive_posts.created_at FROM hive_posts - JOIN hive_accounts ON hive_posts.author = hive_accounts.name + JOIN hive_accounts ON hive_posts.author_id = hive_accounts.id WHERE depth = 0 AND is_deleted = '0' ON CONFLICT DO NOTHING """) diff --git a/hive/indexer/follow.py b/hive/indexer/follow.py index 632d75ba8636f7b094fd9ab940e92fbcd8126ea6..92deadf60bc027199f45e9073d9b53cda288dfc7 100644 --- a/hive/indexer/follow.py +++ b/hive/indexer/follow.py @@ -16,6 +16,45 @@ DB = Db.instance() FOLLOWERS = 'followers' FOLLOWING = 'following' +FOLLOW_ITEM_INSERT_QUERY = """ + INSERT INTO hive_follows as hf (follower, following, created_at, state, blacklisted, follow_blacklists) + VALUES + ( + :flr, + :flg, + :at, + :state, + (CASE :state + WHEN 3 THEN TRUE + WHEN 4 THEN FALSE + ELSE FALSE + END + ), + (CASE :state + WHEN 3 THEN FALSE + WHEN 4 THEN TRUE + ELSE TRUE + END + ) + ) + ON CONFLICT (follower, following) DO UPDATE + SET + state = (CASE EXCLUDED.state + WHEN 0 THEN 0 -- 0 blocks possibility to update state + ELSE EXCLUDED.state + END), + blacklisted = (CASE EXCLUDED.state + WHEN 3 THEN TRUE + WHEN 5 THEN FALSE + ELSE EXCLUDED.blacklisted + END), + follow_blacklists = (CASE EXCLUDED.state + WHEN 4 THEN TRUE + WHEN 6 THEN FALSE + ELSE EXCLUDED.follow_blacklists + END) + """ + def _flip_dict(dict_to_flip): """Swap keys/values. Returned dict values are array of keys.""" flipped = {} @@ -29,6 +68,8 @@ def _flip_dict(dict_to_flip): class Follow: """Handles processing of incoming follow ups and flushing to db.""" + follow_items_to_flush = dict() + @classmethod def follow_op(cls, account, op_json, date): """Process an incoming follow op.""" @@ -38,41 +79,27 @@ class Follow: # perform delta check new_state = op['state'] - old_state = cls._get_follow_db_state(op['flr'], op['flg']) - if new_state == (old_state or 0): - return - sql = '' - - # insert or update state - if old_state is None: - sql = """INSERT INTO hive_follows (follower, following, - created_at, state, blacklisted, follow_blacklists) VALUES (:flr, :flg, :at, :state, %s)""" - if new_state == 3: - sql = sql % """ true, false """ - elif new_state == 4: - sql = sql % """ false, true """ + old_state = None + if DbState.is_initial_sync(): + # insert or update state + + k = '{}/{}'.format(op['flr'], op['flg']) + + if k in cls.follow_items_to_flush: + old_value = cls.follow_items_to_flush.get(k) + old_value['state'] = op['state'] + cls.follow_items_to_flush[k] = old_value else: - sql = sql % """false, false""" + cls.follow_items_to_flush[k] = dict( + flr=op['flr'], + flg=op['flg'], + state=op['state'], + at=op['at']) + else: - if new_state < 3: - sql = """UPDATE hive_follows SET state = :state - WHERE follower = :flr AND following = :flg""" - elif new_state == 3: - sql = """UPDATE hive_follows SET blacklisted = true - WHERE follower = :flr AND following = :flg""" - elif new_state == 4: - sql = """UPDATE hive_follows SET follow_blacklists = true - WHERE follower = :flr AND following = :flg""" - elif new_state == 5: - sql = """UPDATE hive_follows SET blacklisted = false - WHERE follower = :flr AND following = :flg""" - elif new_state == 6: - sql = """UPDATE hive_follows SET follow_blacklists = false - WHERE follower = :flr AND following = :flg""" - DB.query(sql, **op) - - # track count deltas - if not DbState.is_initial_sync(): + old_state = cls._get_follow_db_state(op['flr'], op['flg']) + # insert or update state + DB.query(FOLLOW_ITEM_INSERT_QUERY, **op) if new_state == 1: Follow.follow(op['flr'], op['flg']) if old_state is None: @@ -141,10 +168,65 @@ class Follow: cls._delta[role][account] = 0 cls._delta[role][account] += direction + @classmethod + def _flush_follow_items(cls): + sql_prefix = """ + INSERT INTO hive_follows as hf (follower, following, created_at, state, blacklisted, follow_blacklists) + VALUES """ + + sql_postfix = """ + ON CONFLICT ON CONSTRAINT hive_follows_pk DO UPDATE + SET + state = (CASE EXCLUDED.state + WHEN 0 THEN 0 -- 0 blocks possibility to update state + ELSE EXCLUDED.state + END), + blacklisted = (CASE EXCLUDED.state + WHEN 3 THEN TRUE + WHEN 5 THEN FALSE + ELSE EXCLUDED.blacklisted + END), + follow_blacklists = (CASE EXCLUDED.state + WHEN 4 THEN TRUE + WHEN 6 THEN FALSE + ELSE EXCLUDED.follow_blacklists + END) + WHERE hf.following = EXCLUDED.following AND hf.follower = EXCLUDED.follower + """ + values = [] + limit = 1000 + count = 0 + for _, follow_item in cls.follow_items_to_flush.items(): + if count < limit: + values.append("({}, {}, '{}', {}, {}, {})".format(follow_item['flr'], follow_item['flg'], + follow_item['at'], follow_item['state'], + follow_item['state'] == 3, + follow_item['state'] == 4)) + count = count + 1 + else: + query = sql_prefix + ",".join(values) + query += sql_postfix + DB.query(query) + values.clear() + values.append("({}, {}, '{}', {}, {}, {})".format(follow_item['flr'], follow_item['flg'], + follow_item['at'], follow_item['state'], + follow_item['state'] == 3, + follow_item['state'] == 4)) + count = 1 + + if len(values) > 0: + query = sql_prefix + ",".join(values) + query += sql_postfix + DB.query(query) + + cls.follow_items_to_flush.clear() + @classmethod def flush(cls, trx=True): """Flushes pending follow count deltas.""" + cls._flush_follow_items() + updated = 0 sqls = [] for col, deltas in cls._delta.items(): diff --git a/hive/indexer/jobs.py b/hive/indexer/jobs.py index c64ff37687834b238500972fee6844782f6519ab..a3b9829e1b044e15ca8fbc67241198a51471aa58 100644 --- a/hive/indexer/jobs.py +++ b/hive/indexer/jobs.py @@ -10,11 +10,12 @@ def _last_post_id(db): return db.query_one(sql) or 0 def _last_cached_post_id(db): - sql = "SELECT post_id FROM hive_posts_cache ORDER BY post_id DESC LIMIT 1" + sql = "SELECT id FROM hive_posts ORDER BY id DESC LIMIT 1" return db.query_one(sql) or 0 def audit_cache_missing(db, steem): """Scan all posts to check for missing cache entries.""" + raise NotImplementedError("Post cache is disabled in this version") last_id = _last_cached_post_id(db) step = 1000000 steps = int(last_id / step) + 1 @@ -42,6 +43,7 @@ def audit_cache_missing(db, steem): def audit_cache_deleted(db): """Scan all posts to check for extraneous cache entries.""" + raise NotImplementedError("Post cache is disabled in this version") last_id = _last_cached_post_id(db) step = 1000000 steps = int(last_id / step) + 1 @@ -66,6 +68,7 @@ def audit_cache_deleted(db): def audit_cache_undelete(db, steem): """Scan all posts to check for posts erroneously deleted.""" + raise NotImplementedError("Post cache is disabled in this version") last_id = _last_post_id(db) step = 1000000 steps = int(last_id / step) + 1 diff --git a/hive/indexer/payments.py b/hive/indexer/payments.py index 08892070b8da7d939cf21f78df406d3330f75aa1..f13cf2f4152780b741c5e8755ee72d2ce9c9cbf3 100644 --- a/hive/indexer/payments.py +++ b/hive/indexer/payments.py @@ -39,10 +39,10 @@ class Payments: DB.query(sql, val=new_amount, id=record['post_id']) # notify cached_post of new promoted balance, and trigger update - if not DbState.is_initial_sync(): - CachedPost.update_promoted_amount(record['post_id'], new_amount) - author, permlink = cls._split_url(op['memo']) - CachedPost.vote(author, permlink, record['post_id']) + #if not DbState.is_initial_sync(): + # CachedPost.update_promoted_amount(record['post_id'], new_amount) + # author, permlink = cls._split_url(op['memo']) + # CachedPost.vote(author, permlink, record['post_id']) @classmethod def _validated(cls, op, tx_idx, num, date): diff --git a/hive/indexer/post_data_cache.py b/hive/indexer/post_data_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..f6a4fab6d0995f792074145fbcde6debbb8c574f --- /dev/null +++ b/hive/indexer/post_data_cache.py @@ -0,0 +1,53 @@ +import logging +from hive.utils.normalize import escape_characters +from hive.db.adapter import Db + +log = logging.getLogger(__name__) +DB = Db.instance() + +class PostDataCache(object): + """ Procides cache for DB operations on post data table in order to speed up initial sync """ + _data = {} + + @classmethod + def is_cached(cls, pid): + """ Check if data is cached """ + return pid in cls._data + + @classmethod + def add_data(cls, pid, post_data): + """ Add data to cache """ + cls._data[pid] = post_data + + @classmethod + def flush(cls): + """ Flush data from cache to db """ + if cls._data: + sql = """ + INSERT INTO + hive_post_data (id, title, preview, img_url, body, json) + VALUES + """ + values = [] + for k, data in cls._data.items(): + title = "''" if not data['title'] else "'{}'".format(escape_characters(data['title'])) + preview = "''" if not data['preview'] else "'{}'".format(escape_characters(data['preview'])) + img_url = "''" if not data['img_url'] else "'{}'".format(escape_characters(data['img_url'])) + body = "''" if not data['body'] else "'{}'".format(escape_characters(data['body'])) + json = "'{}'" if not data['json'] else "'{}'".format(escape_characters(data['json'])) + values.append("({},{},{},{},{},{})".format(k, title, preview, img_url, body, json)) + sql += ','.join(values) + sql += """ + ON CONFLICT (id) + DO + UPDATE SET + title = EXCLUDED.title, + preview = EXCLUDED.preview, + img_url = EXCLUDED.img_url, + body = EXCLUDED.body, + json = EXCLUDED.json + WHERE + hive_post_data.id = EXCLUDED.id + """ + DB.query(sql) + cls._data.clear() diff --git a/hive/indexer/posts.py b/hive/indexer/posts.py index 9a72cd2433dfd7803db45a84480a4f9ece265b7a..6fae17d4bf2b5ce0279b327389f432df916cbefd 100644 --- a/hive/indexer/posts.py +++ b/hive/indexer/posts.py @@ -3,14 +3,18 @@ import logging import collections +from json import dumps, loads + from hive.db.adapter import Db from hive.db.db_state import DbState from hive.indexer.accounts import Accounts -from hive.indexer.cached_post import CachedPost from hive.indexer.feed_cache import FeedCache from hive.indexer.community import Community, START_DATE from hive.indexer.notify import Notify +from hive.indexer.post_data_cache import PostDataCache +from hive.indexer.tags import Tags +from hive.utils.normalize import legacy_amount, asset_to_hbd_hive log = logging.getLogger(__name__) DB = Db.instance() @@ -40,8 +44,13 @@ class Posts: cls._ids[url] = _id else: cls._miss += 1 - sql = """SELECT id FROM hive_posts WHERE - author = :a AND permlink = :p""" + sql = """ + SELECT hp.id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.name = :a AND hpd_p.permlink = :p + """ _id = DB.query_one(sql, a=author, p=permlink) if _id: cls._set_id(url, _id) @@ -72,21 +81,6 @@ class Posts: cls._set_id(url, pid) return tuples - @classmethod - def get_id_and_depth(cls, author, permlink): - """Get the id and depth of @author/permlink post.""" - _id = cls.get_id(author, permlink) - if not _id: - return (None, -1) - depth = DB.query_one("SELECT depth FROM hive_posts WHERE id = :id", id=_id) - return (_id, depth) - - @classmethod - def is_pid_deleted(cls, pid): - """Check if the state of post is deleted.""" - sql = "SELECT is_deleted FROM hive_posts WHERE id = :id" - return DB.query_one(sql, id=pid) - @classmethod def delete_op(cls, op): """Given a delete_comment op, mark the post as deleted. @@ -98,151 +92,313 @@ class Posts: @classmethod def comment_op(cls, op, block_date): """Register new/edited/undeleted posts; insert into feed cache.""" - pid = cls.get_id(op['author'], op['permlink']) - if not pid: - # post does not exist, go ahead and process it. - cls.insert(op, block_date) - elif not cls.is_pid_deleted(pid): - # post exists, not deleted, thus an edit. ignore. - cls.update(op, block_date, pid) - else: - # post exists but was deleted. time to reinstate. - cls.undelete(op, block_date, pid) - @classmethod - def insert(cls, op, date): - """Inserts new post records.""" - sql = """INSERT INTO hive_posts (is_valid, is_muted, parent_id, author, - permlink, category, community_id, depth, created_at) - VALUES (:is_valid, :is_muted, :parent_id, :author, - :permlink, :category, :community_id, :depth, :date)""" - sql += ";SELECT currval(pg_get_serial_sequence('hive_posts','id'))" - post = cls._build_post(op, date) - result = DB.query(sql, **post) - post['id'] = int(list(result)[0][0]) - cls._set_id(op['author']+'/'+op['permlink'], post['id']) + sql = """ + SELECT id, author_id, permlink_id, post_category, parent_id, community_id, is_valid, is_muted, depth, is_edited + FROM process_hive_post_operation((:author)::varchar, (:permlink)::varchar, (:parent_author)::varchar, (:parent_permlink)::varchar, (:date)::timestamp, (:community_support_start_date)::timestamp); + """ + + row = DB.query_row(sql, author=op['author'], permlink=op['permlink'], parent_author=op['parent_author'], + parent_permlink=op['parent_permlink'], date=block_date, community_support_start_date=START_DATE) + + result = dict(row) + + # TODO we need to enhance checking related community post validation and honor is_muted. + error = cls._verify_post_against_community(op, result['community_id'], result['is_valid'], result['is_muted']) + + cls._set_id(op['author']+'/'+op['permlink'], result['id']) + + # add content data to hive_post_data + post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "", + img_url=op['img_url'] if 'img_url' in op else "", body=op['body'], + json=op['json_metadata'] if op['json_metadata'] else '{}') + PostDataCache.add_data(result['id'], post_data) + + md = {} + # At least one case where jsonMetadata was double-encoded: condenser#895 + # jsonMetadata = JSON.parse(jsonMetadata); + try: + md = loads(op['json_metadata']) + if not isinstance(md, dict): + md = {} + except Exception: + pass + + tags = [result['post_category']] + if md and 'tags' in md and isinstance(md['tags'], list): + tags = tags + md['tags'] + tags = map(lambda tag: (str(tag) or '').strip('# ').lower()[:32], tags) + tags = filter(None, tags) + from funcy.seqs import distinct + tags = list(distinct(tags))[:5] + + for tag in tags: + Tags.add_tag(result['id'], tag) if not DbState.is_initial_sync(): - if post['error']: - author_id = Accounts.get_id(post['author']) - Notify('error', dst_id=author_id, when=date, - post_id=post['id'], payload=post['error']).write() - CachedPost.insert(op['author'], op['permlink'], post['id']) - if op['parent_author']: # update parent's child count - CachedPost.recount(op['parent_author'], - op['parent_permlink'], post['parent_id']) - cls._insert_feed_cache(post) + if error: + author_id = result['author_id'] + Notify('error', dst_id=author_id, when=block_date, + post_id=result['id'], payload=error).write() + cls._insert_feed_cache(result, block_date) @classmethod - def undelete(cls, op, date, pid): - """Re-allocates an existing record flagged as deleted.""" - sql = """UPDATE hive_posts SET is_valid = :is_valid, - is_muted = :is_muted, is_deleted = '0', is_pinned = '0', - parent_id = :parent_id, category = :category, - community_id = :community_id, depth = :depth - WHERE id = :id""" - post = cls._build_post(op, date, pid) - DB.query(sql, **post) + def comment_payout_op(cls, ops, date): + ops_stats = {} + sql = """ + UPDATE hive_posts AS ihp SET + total_payout_value = data_source.total_payout_value, + curator_payout_value = data_source.curator_payout_value, + author_rewards = data_source.author_rewards, + author_rewards_hive = data_source.author_rewards_hive, + author_rewards_hbd = data_source.author_rewards_hbd, + author_rewards_vests = data_source.author_rewards_vests, + last_payout = data_source.last_payout, + cashout_time = data_source.cashout_time, + is_paidout = true + + FROM + ( + SELECT ha_a.id as author_id, hpd_p.id as permlink_id, + t.total_payout_value, + t.curator_payout_value, + t.author_rewards, + t.author_rewards_hive, + t.author_rewards_hbd, + t.author_rewards_vests, + t.last_payout, + t.cashout_time + from + ( + VALUES + --- put all constant values here + {} + ) AS T(author, permlink, + total_payout_value, + curator_payout_value, + author_rewards, + author_rewards_hive, + author_rewards_hbd, + author_rewards_vests, + last_payout, + cashout_time) + INNER JOIN hive_accounts ha_a ON ha_a.name = t.author + INNER JOIN hive_permlink_data hpd_p ON hpd_p.permlink = t.permlink + ) as data_source(author_id, permlink_id, total_payout_value) + WHERE ihp.permlink_id = data_source.permlink_id and ihp.author_id = data_source.author_id + """ + + values = [] + values_limit = 1000 + + """ Process comment payment operations """ + for k, v in ops.items(): + author, permlink = k.split("/") + # total payout to curators + curator_rewards_sum = 0 + # author payouts + author_rewards = 0 + author_rewards_hive = 0 + author_rewards_hbd = 0 + author_rewards_vests = 0 + # total payout for comment + comment_author_reward = None + for operation in v: + for op, value in operation.items(): + if op in ops_stats: + ops_stats[op] += 1 + else: + ops_stats[op] = 1 + + if op == 'curation_reward_operation': + curator_rewards_sum = curator_rewards_sum + int(value['reward']['amount']) + elif op == 'author_reward_operation': + author_rewards_hive = value['hive_payout']['amount'] + author_rewards_hbd = value['hbd_payout']['amount'] + author_rewards_vests = value['vesting_payout']['amount'] + elif op == 'comment_reward_operation': + comment_author_reward = value['payout'] + author_rewards = value['author_rewards'] + curator_rewards = {'amount' : str(curator_rewards_sum), 'precision': 6, 'nai': '@@000000037'} + + values.append("('{}', '{}', '{}', '{}', {}, {}, {}, {}, '{}'::timestamp, '{}'::timestamp)".format(author, permlink, + legacy_amount(comment_author_reward), # total_payout_value + legacy_amount(curator_rewards), #curator_payout_value + author_rewards, + author_rewards_hive, + author_rewards_hbd, + author_rewards_vests, + date, #last_payout + date #cashout_time + )) + + if len(values) >= values_limit: + values_str = ','.join(values) + actual_query = sql.format(values_str) + DB.query(actual_query) + values.clear() + + if len(values) > 0: + values_str = ','.join(values) + actual_query = sql.format(values_str) + DB.query(actual_query) + values.clear() + return ops_stats - if not DbState.is_initial_sync(): - if post['error']: - author_id = Accounts.get_id(post['author']) - Notify('error', dst_id=author_id, when=date, - post_id=post['id'], payload=post['error']).write() + @classmethod + def update_child_count(cls, child_id, op='+'): + """ Increase/decrease child count by 1 """ + sql = """ + UPDATE + hive_posts + SET + children = GREATEST(0, ( + SELECT + CASE + WHEN children is NULL THEN 0 + WHEN children=32762 THEN 0 + ELSE children + END + FROM + hive_posts + WHERE id = (SELECT parent_id FROM hive_posts WHERE id = :child_id) + )::int + """ + if op == '+': + sql += """ + 1)""" + else: + sql += """ - 1)""" + sql += """ WHERE id = (SELECT parent_id FROM hive_posts WHERE id = :child_id)""" - CachedPost.undelete(pid, post['author'], post['permlink'], - post['category']) - cls._insert_feed_cache(post) + DB.query(sql, child_id=child_id) + + @classmethod + def comment_options_op(cls, op): + """ Process comment_options_operation """ + max_accepted_payout = legacy_amount(op['max_accepted_payout']) if 'max_accepted_payout' in op else '1000000.000 HBD' + allow_votes = op['allow_votes'] if 'allow_votes' in op else True + allow_curation_rewards = op['allow_curation_rewards'] if 'allow_curation_rewards' in op else True + percent_hbd = op['percent_hbd'] if 'percent_hbd' in op else 10000 + extensions = op['extensions'] if 'extensions' in op else [] + beneficiaries = [] + for extension in extensions: + if 'beneficiaries' in extensions: + beneficiaries = extension['beneficiaries'] + sql = """ + UPDATE + hive_posts hp + SET + max_accepted_payout = :max_accepted_payout, + percent_hbd = :percent_hbd, + allow_votes = :allow_votes, + allow_curation_rewards = :allow_curation_rewards, + beneficiaries = :beneficiaries + WHERE + hp.author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND + hp.permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) + """ + DB.query(sql, author=op['author'], permlink=op['permlink'], max_accepted_payout=max_accepted_payout, + percent_hbd=percent_hbd, allow_votes=allow_votes, allow_curation_rewards=allow_curation_rewards, + beneficiaries=beneficiaries) @classmethod def delete(cls, op): """Marks a post record as being deleted.""" - pid, depth = cls.get_id_and_depth(op['author'], op['permlink']) - DB.query("UPDATE hive_posts SET is_deleted = '1' WHERE id = :id", id=pid) + + sql = """ + SELECT id, depth + FROM delete_hive_post((:author)::varchar, (:permlink)::varchar); + """ + row = DB.query_row(sql, author=op['author'], permlink = op['permlink']) + + result = dict(row) + pid = result['id'] if not DbState.is_initial_sync(): - CachedPost.delete(pid, op['author'], op['permlink']) + depth = result['depth'] + if depth == 0: - # TODO: delete from hive_reblogs -- otherwise feed cache gets populated with deleted posts somwrimas + # TODO: delete from hive_reblogs -- otherwise feed cache gets + # populated with deleted posts somwrimas FeedCache.delete(pid) - else: - # force parent child recount when child is deleted - prnt = cls._get_parent_by_child_id(pid) - CachedPost.recount(prnt['author'], prnt['permlink'], prnt['id']) + # force parent child recount when child is deleted + cls.update_child_count(pid, '-') @classmethod - def update(cls, op, date, pid): - """Handle post updates. - - Here we could also build content diffs, but for now just used - a signal to update cache record. - """ - # pylint: disable=unused-argument - if not DbState.is_initial_sync(): - CachedPost.update(op['author'], op['permlink'], pid) + def update_comment_pending_payouts(cls, hived, posts): + comment_pending_payouts = hived.get_comment_pending_payouts(posts) + for comment_pending_payout in comment_pending_payouts: + if 'cashout_info' in comment_pending_payout: + cpp = comment_pending_payout['cashout_info'] + sql = """UPDATE + hive_posts + SET + total_payout_value = :total_payout_value, + curator_payout_value = :curator_payout_value, + max_accepted_payout = :max_accepted_payout, + author_rewards = :author_rewards, + children_abs_rshares = :children_abs_rshares, + rshares = :net_rshares, + abs_rshares = :abs_rshares, + vote_rshares = :vote_rshares, + net_votes = :net_votes, + active = :active, + last_payout = :last_payout, + cashout_time = :cashout_time, + max_cashout_time = :max_cashout_time, + percent_hbd = :percent_hbd, + reward_weight = :reward_weight, + allow_replies = :allow_replies, + allow_votes = :allow_votes, + allow_curation_rewards = :allow_curation_rewards + WHERE id = ( + SELECT hp.id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.name = :author AND hpd_p.permlink = :permlink + ) + """ + + DB.query(sql, total_payout_value=legacy_amount(cpp['total_payout_value']), + curator_payout_value=legacy_amount(cpp['curator_payout_value']), + max_accepted_payout=legacy_amount(cpp['max_accepted_payout']), + author_rewards=cpp['author_rewards'], + children_abs_rshares=cpp['children_abs_rshares'], + net_rshares=cpp['net_rshares'], + abs_rshares=cpp['abs_rshares'], + vote_rshares=cpp['vote_rshares'], + net_votes=cpp['net_votes'], + active=cpp['active'], + last_payout=cpp['last_payout'], + cashout_time=cpp['cashout_time'], + max_cashout_time=cpp['max_cashout_time'], + percent_hbd=cpp['percent_hbd'], + reward_weight=cpp['reward_weight'], + allow_replies=cpp['allow_replies'], + allow_votes=cpp['allow_votes'], + allow_curation_rewards=cpp['allow_curation_rewards'], + author=comment_pending_payout['author'], permlink=comment_pending_payout['permlink']) @classmethod - def _get_parent_by_child_id(cls, child_id): - """Get parent's `id`, `author`, `permlink` by child id.""" - sql = """SELECT id, author, permlink FROM hive_posts - WHERE id = (SELECT parent_id FROM hive_posts - WHERE id = :child_id)""" - result = DB.query_row(sql, child_id=child_id) - assert result, "parent of %d not found" % child_id - return result + def _insert_feed_cache(cls, result, date): + """Insert the new post into feed cache if it's not a comment.""" + if not result['depth']: + cls._insert_feed_cache4(result['depth'], result['id'], result['author_id'], date) @classmethod - def _insert_feed_cache(cls, post): + def _insert_feed_cache4(cls, post_depth, post_id, author_id, post_date): """Insert the new post into feed cache if it's not a comment.""" - if not post['depth']: - account_id = Accounts.get_id(post['author']) - FeedCache.insert(post['id'], account_id, post['date']) + if not post_depth: + FeedCache.insert(post_id, author_id, post_date) @classmethod - def _build_post(cls, op, date, pid=None): - """Validate and normalize a post operation. - - Post is muted if: - - parent was muted - - author unauthorized - - Post is invalid if: - - parent is invalid - - author unauthorized - """ - # TODO: non-nsfw post in nsfw community is `invalid` - - # if this is a top-level post: - if not op['parent_author']: - parent_id = None - depth = 0 - category = op['parent_permlink'] - community_id = None - if date > START_DATE: - community_id = Community.validated_id(category) - is_valid = True - is_muted = False - - # this is a comment; inherit parent props. - else: - parent_id = cls.get_id(op['parent_author'], op['parent_permlink']) - sql = """SELECT depth, category, community_id, is_valid, is_muted - FROM hive_posts WHERE id = :id""" - (parent_depth, category, community_id, is_valid, - is_muted) = DB.query_row(sql, id=parent_id) - depth = parent_depth + 1 - if not is_valid: error = 'replying to invalid post' - elif is_muted: error = 'replying to muted post' - - # check post validity in specified context + def _verify_post_against_community(cls, op, community_id, is_valid, is_muted): error = None if community_id and is_valid and not Community.is_post_valid(community_id, op): error = 'not authorized' #is_valid = False # TODO: reserved for future blacklist status? is_muted = True + return error - return dict(author=op['author'], permlink=op['permlink'], id=pid, - is_valid=is_valid, is_muted=is_muted, parent_id=parent_id, - depth=depth, category=category, community_id=community_id, - date=date, error=error) diff --git a/hive/indexer/sync.py b/hive/indexer/sync.py index 729dcf5ad8e04b50896862a023ab66001ffe38ec..fffc403decac7d369d52317794295a0e522fdd89 100644 --- a/hive/indexer/sync.py +++ b/hive/indexer/sync.py @@ -5,6 +5,11 @@ import glob from time import perf_counter as perf import os import ujson as json +import time + +import concurrent, threading, queue +from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import Future from funcy.seqs import drop from toolz import partition_all @@ -16,7 +21,6 @@ from hive.steem.block.stream import MicroForkException from hive.indexer.blocks import Blocks from hive.indexer.accounts import Accounts -from hive.indexer.cached_post import CachedPost from hive.indexer.feed_cache import FeedCache from hive.indexer.follow import Follow from hive.indexer.community import Community @@ -26,6 +30,156 @@ from hive.server.common.mutes import Mutes log = logging.getLogger(__name__) +CONTINUE_PROCESSING = True + +def print_ops_stats(prefix, ops_stats): + log.info("############################################################################") + log.info(prefix) + sorted_stats = sorted(ops_stats.items(), key=lambda kv: kv[1], reverse=True) + for (k, v) in sorted_stats: + log.info("`{}': {}".format(k, v)) + + log.info("############################################################################") + +def prepare_vops(vops_by_block): + preparedVops = {} + + for blockNum, blockDict in vops_by_block.items(): + vopsList = blockDict['ops'] + date = blockDict['timestamp'] + preparedVops[blockNum] = Blocks.prepare_vops(vopsList, date) + + return preparedVops + +def _block_provider(node, queue, lbound, ubound, chunk_size): + try: + num = 0 + count = ubound - lbound + log.info("[SYNC] start block %d, +%d to sync", lbound, count) + timer = Timer(count, entity='block', laps=['rps', 'wps']) + while CONTINUE_PROCESSING and lbound < ubound: + to = min(lbound + chunk_size, ubound) + timer.batch_start() + blocks = node.get_blocks_range(lbound, to) + lbound = to + timer.batch_lap() + queue.put(blocks) + num = num + 1 + return num + except KeyboardInterrupt: + log.info("Caught SIGINT") + + except Exception: + log.exception("Exception caught during fetching blocks") + +def _vops_provider(node, queue, lbound, ubound, chunk_size): + try: + num = 0 + count = ubound - lbound + log.info("[SYNC] start vops %d, +%d to sync", lbound, count) + timer = Timer(count, entity='vops-chunk', laps=['rps', 'wps']) + + while CONTINUE_PROCESSING and lbound < ubound: + to = min(lbound + chunk_size, ubound) + timer.batch_start() + vops = node.enum_virtual_ops(lbound, to) + preparedVops = prepare_vops(vops) + lbound = to + timer.batch_lap() + queue.put(preparedVops) + num = num + 1 + return num + except KeyboardInterrupt: + log.info("Caught SIGINT") + + except Exception: + log.exception("Exception caught during fetching vops...") + +def _block_consumer(node, blocksQueue, vopsQueue, is_initial_sync, lbound, ubound, chunk_size): + num = 0 + try: + count = ubound - lbound + timer = Timer(count, entity='block', laps=['rps', 'wps']) + total_ops_stats = {} + time_start = perf() + while lbound < ubound: + if blocksQueue.empty() and CONTINUE_PROCESSING: + log.info("Awaiting any block to process...") + + blocks = [] + if not blocksQueue.empty() or CONTINUE_PROCESSING: + blocks = blocksQueue.get() + blocksQueue.task_done() + + if vopsQueue.empty() and CONTINUE_PROCESSING: + log.info("Awaiting any vops to process...") + + preparedVops = [] + if not vopsQueue.empty() or CONTINUE_PROCESSING: + preparedVops = vopsQueue.get() + vopsQueue.task_done() + + to = min(lbound + chunk_size, ubound) + + timer.batch_start() + + block_start = perf() + ops_stats = dict(Blocks.process_multi(blocks, preparedVops, node, is_initial_sync)) + Blocks.ops_stats.clear() + block_end = perf() + + timer.batch_lap() + timer.batch_finish(len(blocks)) + time_current = perf() + + prefix = ("[SYNC] Got block %d @ %s" % ( + to - 1, blocks[-1]['timestamp'])) + log.info(timer.batch_status(prefix)) + log.info("[SYNC] Time elapsed: %fs", time_current - time_start) + + total_ops_stats = Blocks.merge_ops_stats(total_ops_stats, ops_stats) + + if block_end - block_start > 1.0: + print_ops_stats("Operations present in the processed blocks:", ops_stats) + + lbound = to + + num = num + 1 + + if not CONTINUE_PROCESSING and blocksQueue.empty() and vopsQueue.empty(): + break + + print_ops_stats("All operations present in the processed blocks:", total_ops_stats) + return num + except KeyboardInterrupt: + log.info("Caught SIGINT") + + except Exception: + log.exception("Exception caught during processing blocks...") + +def _node_data_provider(self, is_initial_sync, lbound, ubound, chunk_size): + blocksQueue = queue.Queue(maxsize=10) + vopsQueue = queue.Queue(maxsize=10) + global CONTINUE_PROCESSING + + with ThreadPoolExecutor(max_workers = 4) as pool: + try: + pool.submit(_block_provider, self._steem, blocksQueue, lbound, ubound, chunk_size) + pool.submit(_vops_provider, self._steem, vopsQueue, lbound, ubound, chunk_size) + blockConsumerFuture = pool.submit(_block_consumer, self._steem, blocksQueue, vopsQueue, is_initial_sync, lbound, ubound, chunk_size) + + blockConsumerFuture.result() + if not CONTINUE_PROCESSING and blocksQueue.empty() and vopsQueue.empty(): + pool.shutdown(False) + except KeyboardInterrupt: + log.info(""" ********************************************************** + CAUGHT SIGINT. PLEASE WAIT... PROCESSING DATA IN QUEUES... + ********************************************************** + """) + CONTINUE_PROCESSING = False + blocksQueue.join() + vopsQueue.join() + class Sync: """Manages the sync/index process. @@ -59,6 +213,8 @@ class Sync: if DbState.is_initial_sync(): # resume initial sync self.initial() + if not CONTINUE_PROCESSING: + return DbState.finish_initial_sync() else: @@ -66,7 +222,7 @@ class Sync: Blocks.verify_head(self._steem) # perform cleanup if process did not exit cleanly - CachedPost.recover_missing_posts(self._steem) + # CachedPost.recover_missing_posts(self._steem) #audit_cache_missing(self._db, self._steem) #audit_cache_deleted(self._db) @@ -85,8 +241,8 @@ class Sync: self.from_steemd() # take care of payout backlog - CachedPost.dirty_paidouts(Blocks.head_date()) - CachedPost.flush(self._steem, trx=True) + # CachedPost.dirty_paidouts(Blocks.head_date()) + # CachedPost.flush(self._steem, trx=True) try: # listen for new blocks @@ -102,9 +258,11 @@ class Sync: log.info("[INIT] *** Initial fast sync ***") self.from_checkpoints() self.from_steemd(is_initial_sync=True) + if not CONTINUE_PROCESSING: + return log.info("[INIT] *** Initial cache build ***") - CachedPost.recover_missing_posts(self._steem) + # CachedPost.recover_missing_posts(self._steem) FeedCache.rebuild() Follow.force_recount() @@ -116,12 +274,14 @@ class Sync: exactly one block in JSON format. """ # pylint: disable=no-self-use + last_block = Blocks.head_num() tuplize = lambda path: [int(path.split('/')[-1].split('.')[0]), path] basedir = os.path.dirname(os.path.realpath(__file__ + "/../..")) files = glob.glob(basedir + "/checkpoints/*.json.lst") tuples = sorted(map(tuplize, files), key=lambda f: f[0]) + vops = {} last_read = 0 for (num, path) in tuples: @@ -133,6 +293,7 @@ class Sync: skip_lines = last_block - last_read remaining = drop(skip_lines, f) for lines in partition_all(chunk_size, remaining): + raise RuntimeError("Sync from checkpoint disabled") Blocks.process_multi(map(json.loads, lines), True) last_block = num last_read = num @@ -147,6 +308,10 @@ class Sync: if count < 1: return + if is_initial_sync: + _node_data_provider(self, is_initial_sync, lbound, ubound, chunk_size) + return + log.info("[SYNC] start block %d, +%d to sync", lbound, count) timer = Timer(count, entity='block', laps=['rps', 'wps']) while lbound < ubound: @@ -155,11 +320,13 @@ class Sync: # fetch blocks to = min(lbound + chunk_size, ubound) blocks = steemd.get_blocks_range(lbound, to) + vops = steemd.enum_virtual_ops(lbound, to) + preparedVops = prepare_vops(vops) lbound = to timer.batch_lap() # process blocks - Blocks.process_multi(blocks, is_initial_sync) + Blocks.process_multi(blocks, preparedVops, steemd, is_initial_sync) timer.batch_finish(len(blocks)) _prefix = ("[SYNC] Got block %d @ %s" % ( @@ -174,7 +341,7 @@ class Sync: # edits and pre-payout votes. If the post has not been paid out yet, # then the worst case is it will be synced upon payout. If the post # is already paid out, worst case is to lose an edit. - CachedPost.flush(steemd, trx=True) + #CachedPost.flush(steemd, trx=True) def listen(self): """Live (block following) mode.""" @@ -192,19 +359,15 @@ class Sync: start_time = perf() self._db.query("START TRANSACTION") - num = Blocks.process(block) + num = Blocks.process(block, {}, steemd) follows = Follow.flush(trx=False) accts = Accounts.flush(steemd, trx=False, spread=8) - CachedPost.dirty_paidouts(block['timestamp']) - cnt = CachedPost.flush(steemd, trx=False) self._db.query("COMMIT") ms = (perf() - start_time) * 1000 - log.info("[LIVE] Got block %d at %s --% 4d txs,% 3d posts,% 3d edits," - "% 3d payouts,% 3d votes,% 3d counts,% 3d accts,% 3d follows" + log.info("[LIVE] Got block %d at %s --% 4d txs,% 3d accts,% 3d follows" " --% 5dms%s", num, block['timestamp'], len(block['transactions']), - cnt['insert'], cnt['update'], cnt['payout'], cnt['upvote'], - cnt['recount'], accts, follows, ms, ' SLOW' if ms > 1000 else '') + accts, follows, ms, ' SLOW' if ms > 1000 else '') if num % 1200 == 0: #1hr log.warning("head block %d @ %s", num, block['timestamp']) diff --git a/hive/indexer/tags.py b/hive/indexer/tags.py new file mode 100644 index 0000000000000000000000000000000000000000..e63d198e0ac1b9b6088a3ef67cf955dd69d9a165 --- /dev/null +++ b/hive/indexer/tags.py @@ -0,0 +1,71 @@ +import logging +from hive.db.adapter import Db + +log = logging.getLogger(__name__) +DB = Db.instance() + +from hive.utils.normalize import escape_characters + +class Tags(object): + """ Tags cache """ + _tags = [] + + @classmethod + def add_tag(cls, tid, tag): + """ Add tag to cache """ + cls._tags.append((tid, tag)) + + @classmethod + def flush(cls): + """ Flush tags to table """ + if cls._tags: + limit = 1000 + + sql = """ + INSERT INTO + hive_tag_data (tag) + VALUES {} + ON CONFLICT DO NOTHING + """ + values = [] + for tag in cls._tags: + values.append("('{}')".format(escape_characters(tag[1]))) + if len(values) >= limit: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() + if len(values) > 0: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() + + sql = """ + INSERT INTO + hive_post_tags (post_id, tag_id) + SELECT + data_source.post_id, data_source.tag_id + FROM + ( + SELECT + post_id, htd.id + FROM + ( + VALUES + {} + ) AS T(post_id, tag) + INNER JOIN hive_tag_data htd ON htd.tag = T.tag + ) AS data_source(post_id, tag_id) + ON CONFLICT DO NOTHING + """ + values = [] + for tag in cls._tags: + values.append("({}, '{}')".format(tag[0], escape_characters(tag[1]))) + if len(values) >= limit: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() + if len(values) > 0: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() + cls._tags.clear() diff --git a/hive/indexer/votes.py b/hive/indexer/votes.py new file mode 100644 index 0000000000000000000000000000000000000000..6451a1e347d42d6ea2cf9c144ad16620562717a9 --- /dev/null +++ b/hive/indexer/votes.py @@ -0,0 +1,132 @@ +""" Votes indexing and processing """ + +import logging + +from hive.db.adapter import Db + +log = logging.getLogger(__name__) +DB = Db.instance() + +class Votes: + """ Class for managing posts votes """ + _votes_data = {} + + @classmethod + def get_vote_count(cls, author, permlink): + """ Get vote count for given post """ + sql = """ + SELECT count(hv.id) + FROM hive_votes hv + INNER JOIN hive_accounts ha_a ON ha_a.id = hv.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hv.permlink_id + WHERE ha_a.name = :author AND hpd_p.permlink = :permlink + """ + ret = DB.query_row(sql, author=author, permlink=permlink) + return 0 if ret is None else int(ret.count) + + @classmethod + def get_upvote_count(cls, author, permlink): + """ Get vote count for given post """ + sql = """ + SELECT count(hv.id) + FROM hive_votes hv + INNER JOIN hive_accounts ha_a ON ha_a.id = hv.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hv.permlink_id + WHERE ha_a.name = :author AND hpd_p.permlink = :permlink + vote_percent > 0 + """ + ret = DB.query_row(sql, author=author, permlink=permlink) + return 0 if ret is None else int(ret.count) + + @classmethod + def get_downvote_count(cls, author, permlink): + """ Get vote count for given post """ + sql = """ + SELECT count(hv.id) + FROM hive_votes hv + INNER JOIN hive_accounts ha_a ON ha_a.id = hv.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hv.permlink_id + WHERE ha_a.name = :author AND hpd_p.permlink = :permlink + vote_percent < 0 + """ + ret = DB.query_row(sql, author=author, permlink=permlink) + return 0 if ret is None else int(ret.count) + + inside_flush = False + + @classmethod + def vote_op(cls, vop, date): + """ Process vote_operation """ + voter = vop['value']['voter'] + author = vop['value']['author'] + permlink = vop['value']['permlink'] + + if(cls.inside_flush): + log.info("Adding new vote-info into _votes_data dict") + raise "Fatal error" + + key = voter + "/" + author + "/" + permlink + + cls._votes_data[key] = dict(voter=voter, + author=author, + permlink=permlink, + vote_percent=vop['value']['vote_percent'], + weight=vop['value']['weight'], + rshares=vop['value']['rshares'], + last_update=date) + + @classmethod + def flush(cls): + """ Flush vote data from cache to database """ + cls.inside_flush = True + if cls._votes_data: + sql = """ + INSERT INTO hive_votes + (post_id, voter_id, author_id, permlink_id, weight, rshares, vote_percent, last_update) + select data_source.post_id, data_source.voter_id, data_source.author_id, data_source.permlink_id, data_source.weight, data_source.rshares, data_source.vote_percent, data_source.last_update + from + ( + SELECT hp.id as post_id, ha_v.id as voter_id, ha_a.id as author_id, hpd_p.id as permlink_id, t.weight, t.rshares, t.vote_percent, t.last_update + from + ( + VALUES + -- voter, author, permlink, weight, rshares, vote_percent, last_update + {} + ) AS T(voter, author, permlink, weight, rshares, vote_percent, last_update) + INNER JOIN hive_accounts ha_v ON ha_v.name = t.voter + INNER JOIN hive_accounts ha_a ON ha_a.name = t.author + INNER JOIN hive_permlink_data hpd_p ON hpd_p.permlink = t.permlink + INNER JOIN hive_posts hp ON hp.author_id = ha_a.id AND hp.permlink_id = hpd_p.id + ) as data_source(post_id, voter_id, author_id, permlink_id, weight, rshares, vote_percent, last_update) + ON CONFLICT ON CONSTRAINT hive_votes_ux1 DO + UPDATE + SET + weight = EXCLUDED.weight, + rshares = EXCLUDED.rshares, + vote_percent = EXCLUDED.vote_percent, + last_update = EXCLUDED.last_update, + num_changes = hive_votes.num_changes + 1 + WHERE hive_votes.id = EXCLUDED.id + """ + + values = [] + values_limit = 1000 + + for _, vd in cls._votes_data.items(): + values.append("('{}', '{}', '{}', {}, {}, {}, '{}'::timestamp)".format( + vd['voter'], vd['author'], vd['permlink'], vd['weight'], vd['rshares'], vd['vote_percent'], vd['last_update'])) + + if len(values) >= values_limit: + values_str = ','.join(values) + actual_query = sql.format(values_str) + DB.query(actual_query) + values.clear() + + if len(values) > 0: + values_str = ','.join(values) + actual_query = sql.format(values_str) + DB.query(actual_query) + values.clear() + + cls._votes_data.clear() + cls.inside_flush = False diff --git a/hive/server/bridge_api/cursor.py b/hive/server/bridge_api/cursor.py index 8d9477468a877d142a817fd93c6708710e7c80a9..b6c701f6dc823b8989ab0c5e20e00af125a7cb64 100644 --- a/hive/server/bridge_api/cursor.py +++ b/hive/server/bridge_api/cursor.py @@ -14,7 +14,13 @@ def last_month(): async def _get_post_id(db, author, permlink): """Get post_id from hive db. (does NOT filter on is_deleted)""" - sql = "SELECT id FROM hive_posts WHERE author = :a AND permlink = :p" + sql = """ + SELECT + hp.id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.author = :author AND hpd_p.permlink = :permlink""" post_id = await db.query_one(sql, a=author, p=permlink) assert post_id, 'invalid author/permlink' return post_id @@ -104,7 +110,7 @@ async def pids_by_community(db, ids, sort, seek_id, limit): # setup field, pending, toponly, gray, promoted = definitions[sort] - table = 'hive_posts_cache' + table = 'hive_posts' where = ["community_id IN :ids"] if ids else ["community_id IS NOT NULL AND community_id != 1337319"] # select @@ -117,8 +123,8 @@ async def pids_by_community(db, ids, sort, seek_id, limit): # seek if seek_id: - sval = "(SELECT %s FROM %s WHERE post_id = :seek_id)" % (field, table) - sql = """((%s < %s) OR (%s = %s AND post_id > :seek_id))""" + sval = "(SELECT %s FROM %s WHERE id = :seek_id)" % (field, table) + sql = """((%s < %s) OR (%s = %s AND id > :seek_id))""" where.append(sql % (field, sval, field, sval)) # simpler `%s <= %s` eval has edge case: many posts with payout 0 @@ -129,8 +135,8 @@ async def pids_by_community(db, ids, sort, seek_id, limit): #where.append(sql % (field, sval, field, sval)) # build - sql = ("""SELECT post_id FROM %s WHERE %s - ORDER BY %s DESC, post_id LIMIT :limit + sql = ("""SELECT id FROM %s WHERE %s + ORDER BY %s DESC, id LIMIT :limit """ % (table, ' AND '.join(where), field)) # execute @@ -157,7 +163,7 @@ async def pids_by_category(db, tag, sort, last_id, limit): 'muted': ('payout', True, False, False, False), }[sort] - table = 'hive_posts_cache' + table = 'hive_post' field = params[0] where = [] @@ -172,17 +178,24 @@ async def pids_by_category(db, tag, sort, last_id, limit): # filter by category or tag if tag: if sort in ['payout', 'payout_comments']: - where.append('category = :tag') + where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)') else: - sql = "SELECT post_id FROM hive_post_tags WHERE tag = :tag" - where.append("post_id IN (%s)" % sql) + sql = """ + SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htd.id + WHERE htd.tag = :tag + """ + where.append("id IN (%s)" % sql) if last_id: - sval = "(SELECT %s FROM %s WHERE post_id = :last_id)" % (field, table) - sql = """((%s < %s) OR (%s = %s AND post_id > :last_id))""" + sval = "(SELECT %s FROM %s WHERE id = :last_id)" % (field, table) + sql = """((%s < %s) OR (%s = %s AND id > :last_id))""" where.append(sql % (field, sval, field, sval)) - sql = ("""SELECT post_id FROM %s WHERE %s + sql = ("""SELECT id FROM %s WHERE %s ORDER BY %s DESC, post_id LIMIT :limit """ % (table, ' AND '.join(where), field)) @@ -223,7 +236,7 @@ async def pids_by_blog(db, account: str, start_author: str = '', # ignore community posts which were not reblogged skip = """ SELECT id FROM hive_posts - WHERE author = :account + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :account) AND is_deleted = '0' AND depth = 0 AND community_id IS NOT NULL @@ -305,7 +318,7 @@ async def pids_by_posts(db, account: str, start_permlink: str = '', limit: int = # `depth` in ORDER BY is a no-op, but forces an ix3 index scan (see #189) sql = """ SELECT id FROM hive_posts - WHERE author = :account %s + WHERE author = (SELECT id FROM hive_accounts WHERE name = :account) %s AND is_deleted = '0' AND depth = '0' ORDER BY id DESC @@ -328,7 +341,7 @@ async def pids_by_comments(db, account: str, start_permlink: str = '', limit: in # `depth` in ORDER BY is a no-op, but forces an ix3 index scan (see #189) sql = """ SELECT id FROM hive_posts - WHERE author = :account %s + WHERE author = (SELECT id FROM hive_accounts WHERE name = :account) %s AND is_deleted = '0' AND depth > 0 ORDER BY id DESC, depth @@ -350,13 +363,13 @@ async def pids_by_replies(db, start_author: str, start_permlink: str = '', start_id = None if start_permlink: sql = """ - SELECT parent.author, + SELECT (SELECT name FROM hive_accounts WHERE id = parent.author_id), child.id FROM hive_posts child JOIN hive_posts parent ON child.parent_id = parent.id - WHERE child.author = :author - AND child.permlink = :permlink + WHERE child.author_id = (SELECT id FROM hive_accounts WHERE name = :author) + AND child.permlink = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) """ row = await db.query_row(sql, author=start_author, permlink=start_permlink) @@ -372,7 +385,7 @@ async def pids_by_replies(db, start_author: str, start_permlink: str = '', sql = """ SELECT id FROM hive_posts WHERE parent_id IN (SELECT id FROM hive_posts - WHERE author = :parent + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :parent) AND is_deleted = '0' ORDER BY id DESC LIMIT 10000) %s @@ -390,14 +403,14 @@ async def pids_by_payout(db, account: str, start_author: str = '', start_id = None if start_permlink: start_id = await _get_post_id(db, start_author, start_permlink) - last = "(SELECT payout FROM hive_posts_cache WHERE post_id = :start_id)" - seek = ("""AND (payout < %s OR (payout = %s AND post_id > :start_id))""" + last = "(SELECT payout FROM hive_posts WHERE id = :start_id)" + seek = ("""AND (payout < %s OR (payout = %s AND id > :start_id))""" % (last, last)) sql = """ - SELECT post_id - FROM hive_posts_cache - WHERE author = :account + SELECT id + FROM hive_posts + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :account) AND is_paidout = '0' %s ORDER BY payout DESC, post_id LIMIT :limit diff --git a/hive/server/bridge_api/methods.py b/hive/server/bridge_api/methods.py index b90484bab98b22991bd13961b3d00f357f01186c..8599b23b91518f62a09bb0654aa8cde57f1c4842 100644 --- a/hive/server/bridge_api/methods.py +++ b/hive/server/bridge_api/methods.py @@ -16,30 +16,48 @@ from hive.server.common.mutes import Mutes ROLES = {-2: 'muted', 0: 'guest', 2: 'member', 4: 'mod', 6: 'admin', 8: 'owner'} -SELECT_FRAGMENT = """ - SELECT hive_posts_cache.post_id, hive_posts_cache.author, hive_posts_cache.permlink, - hive_posts_cache.title, hive_posts_cache.body, hive_posts_cache.category, hive_posts_cache.depth, - hive_posts_cache.promoted, hive_posts_cache.payout, hive_posts_cache.payout_at, - hive_posts_cache.is_paidout, hive_posts_cache.children, hive_posts_cache.votes, - hive_posts_cache.created_at, hive_posts_cache.updated_at, hive_posts_cache.rshares, - hive_posts_cache.raw_json, hive_posts_cache.json, hive_accounts.reputation AS author_rep, - hive_posts_cache.is_hidden AS is_hidden, hive_posts_cache.is_grayed AS is_grayed, - hive_posts_cache.total_votes AS total_votes, hive_posts_cache.flag_weight AS flag_weight, - hive_posts_cache.sc_trend AS sc_trend, hive_accounts.id AS acct_author_id, - hive_roles.title as role_title, hive_communities.title AS community_title, hive_roles.role_id AS role_id, +SQL_TEMPLATE = """ + SELECT hp.id, + ha_a.name as author, + hpd_p.permlink as permlink, + (SELECT title FROM hive_post_data WHERE hive_post_data.id = hp.id) as title, + (SELECT body FROM hive_post_data WHERE hive_post_data.id = hp.id) as body, + (SELECT category FROM hive_category_data WHERE hive_category_data.id = hp.category_id) as category, + depth, + promoted, + payout, + payout_at, + is_paidout, + children, + (0) as votes, + hp.created_at, + updated_at, + rshares, + (SELECT json FROM hive_post_data WHERE hive_post_data.id = hp.id) as json, + is_hidden, + is_grayed, + total_votes, + flag_weight, + sc_trend, + ha_a.id AS acct_author_id, + hive_roles.title as role_title, + hive_communities.title AS community_title, + hive_roles.role_id AS role_id hive_posts.is_pinned AS is_pinned - FROM hive_posts_cache JOIN hive_posts ON (hive_posts_cache.post_id = hive_posts.id) - JOIN hive_accounts ON (hive_posts_cache.author = hive_accounts.name) - LEFT OUTER JOIN hive_communities ON (hive_posts_cache.community_id = hive_communities.id) - LEFT OUTER JOIN hive_roles ON (hive_accounts.id = hive_roles.account_id AND hive_posts_cache.community_id = hive_roles.community_id) """ + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT OUTER JOIN hive_communities ON (hp.community_id = hive_communities.id) + LEFT OUTER JOIN hive_roles ON (ha_a.id = hive_roles.account_id AND hp.community_id = hive_roles.community_id) + """ #pylint: disable=too-many-arguments, no-else-return async def _get_post_id(db, author, permlink): """Get post_id from hive db.""" sql = """SELECT id FROM hive_posts - WHERE author = :a - AND permlink = :p + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :a) + AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :p) AND is_deleted = '0'""" post_id = await db.query_one(sql, a=author, p=permlink) assert post_id, 'invalid author/permlink' @@ -89,7 +107,7 @@ async def get_post(context, author, permlink, observer=None): if observer and context: blacklists_for_user = await Mutes.get_blacklists_for_observer(observer, context) - sql = "---bridge_api.get_post\n" + SELECT_FRAGMENT + """ WHERE hive_posts_cache.author = :author AND hive_posts_cache.permlink = :permlink AND NOT hive_posts.is_deleted """ + sql = "---bridge_api.get_post\n" + SQL_TEMPLATE + """ WHERE ha_a.name = :author AND hpd_p.permlink = :permlink AND NOT hive_posts.is_deleted """ result = await db.query_all(sql, author=author, permlink=permlink) assert len(result) == 1, 'invalid author/permlink or post not found in cache' @@ -116,67 +134,75 @@ async def get_ranked_posts(context, sort, start_author='', start_permlink='', pinned_sql = '' if sort == 'trending': - sql = SELECT_FRAGMENT + """ WHERE NOT hive_posts_cache.is_paidout AND hive_posts_cache.depth = 0 AND NOT hive_posts.is_deleted - %s ORDER BY sc_trend desc, post_id LIMIT :limit """ + sql = SQL_TEMPLATE + """ WHERE NOT hp.is_paidout AND hp.depth = 0 AND NOT hive_posts.is_deleted + %s ORDER BY sc_trend desc, hp.id LIMIT :limit """ elif sort == 'hot': - sql = SELECT_FRAGMENT + """ WHERE NOT hive_posts_cache.is_paidout AND hive_posts_cache.depth = 0 AND NOT hive_posts.is_deleted - %s ORDER BY sc_hot desc, post_id LIMIT :limit """ + sql = SQL_TEMPLATE + """ WHERE NOT hp.is_paidout AND hp.depth = 0 AND NOT hive_posts.is_deleted + %s ORDER BY sc_hot desc, hp.id LIMIT :limit """ elif sort == 'created': - sql = SELECT_FRAGMENT + """ WHERE hive_posts_cache.depth = 0 AND NOT hive_posts.is_deleted AND NOT hive_posts_cache.is_grayed - %s ORDER BY hive_posts_cache.created_at DESC, post_id LIMIT :limit """ + sql = SQL_TEMPLATE + """ WHERE hp.depth = 0 AND NOT hive_posts.is_deleted AND NOT hp.is_grayed + %s ORDER BY hp.created_at DESC, hp.id LIMIT :limit """ elif sort == 'promoted': - sql = SELECT_FRAGMENT + """ WHERE hive_posts_cache.depth > 0 AND hive_posts_cache.promoted > 0 AND NOT hive_posts.is_deleted - AND NOT hive_posts_cache.is_paidout %s ORDER BY hive_posts_cache.promoted DESC, post_id LIMIT :limit """ + sql = SQL_TEMPLATE + """ WHERE hp.depth > 0 AND hp.promoted > 0 AND NOT hive_posts.is_deleted + AND NOT hp.is_paidout %s ORDER BY hp.promoted DESC, hp.id LIMIT :limit """ elif sort == 'payout': - sql = SELECT_FRAGMENT + """ WHERE NOT hive_posts_cache.is_paidout AND NOT hive_posts.is_deleted %s + sql = SQL_TEMPLATE + """ WHERE NOT hp.is_paidout AND NOT hive_posts.is_deleted %s AND payout_at BETWEEN now() + interval '12 hours' AND now() + interval '36 hours' - ORDER BY hive_posts_cache.payout DESC, post_id LIMIT :limit """ + ORDER BY hp.payout DESC, hp.id LIMIT :limit """ elif sort == 'payout_comments': - sql = SELECT_FRAGMENT + """ WHERE NOT hive_posts_cache.is_paidout AND NOT hive_posts.is_deleted AND hive_posts_cache.depth > 0 - %s ORDER BY hive_posts_cache.payout DESC, post_id LIMIT :limit """ + sql = SQL_TEMPLATE + """ WHERE NOT hp.is_paidout AND NOT hive_posts.is_deleted AND hp.depth > 0 + %s ORDER BY hp.payout DESC, hp.id LIMIT :limit """ elif sort == 'muted': - sql = SELECT_FRAGMENT + """ WHERE NOT hive_posts_cache.is_paidout AND NOT hive_posts.is_deleted AND hive_posts_cache.is_grayed - AND hive_posts_cache.payout > 0 %s ORDER BY hive_posts_cache.payout DESC, post_id LIMIT :limit """ + sql = SQL_TEMPLATE + """ WHERE NOT hp.is_paidout AND NOT hive_posts.is_deleted AND hp.is_grayed + AND hp.payout > 0 %s ORDER BY hp.payout DESC, hp.id LIMIT :limit """ sql = "---bridge_api.get_ranked_posts\n" + sql if start_author and start_permlink: if sort == 'trending': - sql = sql % """ AND hive_posts_cache.sc_trend <= (SELECT sc_trend FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author=:author) %s """ + sql = sql % """ AND hp.sc_trend <= (SELECT sc_trend FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) + AND hp.id != (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) %s """ elif sort == 'hot': - sql = sql % """ AND hive_posts_cache.sc_hot <= (SELECT sc_hot FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) %s """ + sql = sql % """ AND hp.sc_hot <= (SELECT sc_hot FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) + AND hp.id != (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) %s """ elif sort == 'created': - sql = sql % """ AND hive_posts_cache.post_id < (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) %s """ + sql = sql % """ AND hp.id < (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) %s """ elif sort == 'promoted': - sql = sql % """ AND hive_posts_cache.promoted <= (SELECT promoted FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) %s """ + sql = sql % """ AND hp.promoted <= (SELECT promoted FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) + AND hp.id != (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) %s """ else: - sql = sql % """ AND hive_posts_cache.payout <= (SELECT payout FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) %s """ + sql = sql % """ AND hp.payout <= (SELECT payout FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) + AND hp.id != (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) %s """ else: sql = sql % """ %s """ if not tag or tag == 'all': sql = sql % """ """ elif tag == 'my': - sql = sql % """ AND hive_posts_cache.community_id IN (SELECT community_id FROM hive_subscriptions WHERE account_id = + sql = sql % """ AND hp.community_id IN (SELECT community_id FROM hive_subscriptions WHERE account_id = (SELECT id FROM hive_accounts WHERE name = :observer) ) """ elif tag[:5] == 'hive-': if start_author and start_permlink: - sql = sql % """ AND hive_posts_cache.community_id = (SELECT hive_communities.id FROM hive_communities WHERE name = :community_name ) """ + sql = sql % """ AND hp.community_id = (SELECT hive_communities.id FROM hive_communities WHERE name = :community_name ) """ else: sql = sql % """ AND hive_communities.name = :community_name """ if sort == 'trending' or sort == 'created': - pinned_sql = SELECT_FRAGMENT + """ WHERE is_pinned AND hive_communities.name = :community_name ORDER BY hive_posts_cache.created_at DESC """ + pinned_sql = SQL_TEMPLATE + """ WHERE is_pinned AND hive_communities.name = :community_name ORDER BY hp.created_at DESC """ else: if sort in ['payout', 'payout_comments']: - sql = sql % """ AND hive_posts_cache.category = :tag """ + sql = sql % """ AND hp.category = :tag """ else: - sql = sql % """ AND hive_posts_cache.post_id IN (SELECT post_id FROM hive_post_tags WHERE tag = :tag)""" + sql = sql % """ AND hp.post IN + (SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htd.id + WHERE htd.tag = :tag + ) + """ if not observer: observer = '' @@ -234,7 +260,6 @@ async def append_statistics_to_post(post, row, is_pinned, blacklists_for_user=No else: post['stats']['gray'] = row['is_grayed'] post['stats']['hide'] = 'irredeemables' in post['blacklists'] - if is_pinned: post['stats']['is_pinned'] = True return post @@ -257,7 +282,7 @@ async def get_account_posts(context, sort, account, start_author='', start_perml # pylint: disable=unused-variable observer_id = await get_account_id(db, observer) if observer else None # TODO - sql = "---bridge_api.get_account_posts\n " + SELECT_FRAGMENT + """ %s """ + sql = "---bridge_api.get_account_posts\n " + SQL_TEMPLATE + """ %s """ if sort == 'blog': ids = await cursor.pids_by_blog(db, account, *start, limit) @@ -267,11 +292,11 @@ async def get_account_posts(context, sort, account, start_author='', start_perml post['reblogged_by'] = [account] return posts elif sort == 'posts': - sql = sql % """ WHERE hive_posts_cache.author = :account AND NOT hive_posts.is_deleted AND hive_posts_cache.depth = 0 %s ORDER BY hive_posts_cache.post_id DESC LIMIT :limit""" + sql = sql % """ WHERE ha_a.name = :account AND NOT hp.is_deleted AND hp.depth = 0 %s ORDER BY hp.id DESC LIMIT :limit""" elif sort == 'comments': - sql = sql % """ WHERE hive_posts_cache.author = :account AND NOT hive_posts.is_deleted AND hive_posts_cache.depth > 0 %s ORDER BY hive_posts_cache.post_id DESC, depth LIMIT :limit""" + sql = sql % """ WHERE ha_a.name = :account AND NOT hp.is_deleted AND hp.depth > 0 %s ORDER BY hp.id DESC, depth LIMIT :limit""" elif sort == 'payout': - sql = sql % """ WHERE hive_posts_cache.author = :account AND NOT hive_posts.is_deleted AND NOT hive_posts_cache.is_paidout %s ORDER BY payout DESC, post_id LIMIT :limit""" + sql = sql % """ WHERE ha_a.name = :account AND NOT hp.is_deleted AND NOT hp.is_paidout %s ORDER BY payout DESC, hp.id LIMIT :limit""" elif sort == 'feed': res = await cursor.pids_by_feed_with_reblog(db, account, *start, limit) return await load_posts_reblogs(context['db'], res) @@ -281,7 +306,7 @@ async def get_account_posts(context, sort, account, start_author='', start_perml return await load_posts(context['db'], ids) if start_author and start_permlink: - sql = sql % """ AND hive_posts_cache.post_id < (SELECT post_id FROM hive_posts_cache WHERE author = :author AND permlink = :permlink) """ + sql = sql % """ AND hp.id < (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink)) """ else: sql = sql % """ """ diff --git a/hive/server/bridge_api/objects.py b/hive/server/bridge_api/objects.py index 9846e3f4f2308c54b4b5aa256f6701deb717ef64..8a2639df479094755e31a9edd25862314e49be37 100644 --- a/hive/server/bridge_api/objects.py +++ b/hive/server/bridge_api/objects.py @@ -4,8 +4,9 @@ import logging import ujson as json from hive.server.common.mutes import Mutes from hive.server.common.helpers import json_date - +from hive.server.database_api.methods import find_votes from hive.utils.normalize import sbd_amount +from hive.indexer.votes import Votes log = logging.getLogger(__name__) @@ -43,11 +44,53 @@ async def load_posts_keyed(db, ids, truncate_body=0): assert ids, 'no ids passed to load_posts_keyed' # fetch posts and associated author reps - sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth, - promoted, payout, payout_at, is_paidout, children, votes, - created_at, updated_at, rshares, raw_json, json, - is_hidden, is_grayed, total_votes, flag_weight - FROM hive_posts_cache WHERE post_id IN :ids""" + sql = """ + SELECT hp.id, + community_id, + ha_a.name as author, + hpd_p.permlink as permlink, + hpd.title as title, + hpd.body as body, + hcd.category as category, + depth, + promoted, + payout, + payout_at, + is_paidout, + children, + hpd.votes as votes, + hp.created_at, + updated_at, + rshares, + hpd.json as json, + is_hidden, + is_grayed, + total_votes, + flag_weight, + ha_pa.name as parent_author, + hpd_pp.permlink as parent_permlink, + curator_payout_value, + ha_ra.name as root_author, + hpd_rp.permlink as root_permlink, + max_accepted_payout, + percent_hbd, + allow_replies, + allow_votes, + allow_curation_rewards, + beneficiaries, + url, + root_title + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT JOIN hive_post_data hpd ON hpd.id = hp.id + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + INNER JOIN hive_accounts ha_pa ON ha_pa.id = hp.parent_author_id + INNER JOIN hive_permlink_data hpd_pp ON hpd_pp.id = hp.parent_permlink_id + INNER JOIN hive_accounts ha_ra ON ha_ra.id = hp.root_author_id + INNER JOIN hive_permlink_data hpd_rp ON hpd_rp.id = hp.root_permlink_id + WHERE id IN :ids + """ result = await db.query_all(sql, ids=tuple(ids)) author_map = await _query_author_map(db, result) @@ -63,6 +106,7 @@ async def load_posts_keyed(db, ids, truncate_body=0): row['author_rep'] = author['reputation'] post = _condenser_post_object(row, truncate_body=truncate_body) + post['active_votes'] = await find_votes({'db':db}, {'author':row['author'], 'permlink':row['permlink']}) post['blacklists'] = Mutes.lists(post['author'], author['reputation']) @@ -128,8 +172,14 @@ async def load_posts(db, ids, truncate_body=0): log.info("get_posts do not exist in cache: %s", repr(missed)) for _id in missed: ids.remove(_id) - sql = ("SELECT id, author, permlink, depth, created_at, is_deleted " - "FROM hive_posts WHERE id = :id") + sql = """ + SELECT + hp.id, ha_a.name as author, hpd_p.permlink as permlink, depth, created_at, is_deleted + FROM + hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE id = :id""" post = await db.query_row(sql, id=_id) if not post['is_deleted']: # TODO: This should never happen. See #173 for analysis @@ -175,7 +225,7 @@ def _condenser_profile_object(row): }}} def _condenser_post_object(row, truncate_body=0): - """Given a hive_posts_cache row, create a legacy-style post object.""" + """Given a hive_posts row, create a legacy-style post object.""" paid = row['is_paidout'] # condenser#3424 mitigation @@ -183,14 +233,14 @@ def _condenser_post_object(row, truncate_body=0): row['category'] = 'undefined' post = {} - post['post_id'] = row['post_id'] + post['post_id'] = row['id'] post['author'] = row['author'] post['permlink'] = row['permlink'] post['category'] = row['category'] post['title'] = row['title'] post['body'] = row['body'][0:truncate_body] if truncate_body else row['body'] - post['json_metadata'] = json.loads(row['json']) + post['json_metadata'] = row['json'] post['created'] = json_date(row['created_at']) post['updated'] = json_date(row['updated_at']) @@ -207,38 +257,40 @@ def _condenser_post_object(row, truncate_body=0): post['promoted'] = _amount(row['promoted']) post['replies'] = [] - post['active_votes'] = _hydrate_active_votes(row['votes']) post['author_reputation'] = row['author_rep'] post['stats'] = { 'hide': row['is_hidden'], 'gray': row['is_grayed'], - 'total_votes': row['total_votes'], + 'total_votes': Votes.get_vote_count(row['author'], row['permlink']), 'flag_weight': row['flag_weight']} # TODO: down_weight - # import fields from legacy object - assert row['raw_json'] - assert len(row['raw_json']) > 32 - raw_json = json.loads(row['raw_json']) - # TODO: move to core, or payout_details - post['beneficiaries'] = raw_json['beneficiaries'] - post['max_accepted_payout'] = raw_json['max_accepted_payout'] - post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] # TODO: systag? + #post['author_reputation'] = rep_to_raw(row['author_rep']) + + post['root_author'] = row['root_author'] + post['root_permlink'] = row['root_permlink'] + + post['allow_replies'] = row['allow_replies'] + post['allow_votes'] = row['allow_votes'] + post['allow_curation_rewards'] = row['allow_curation_rewards'] + + post['url'] = row['url'] + post['root_title'] = row['root_title'] + post['beneficiaries'] = row['beneficiaries'] + post['max_accepted_payout'] = row['max_accepted_payout'] + post['percent_hbd'] = row['percent_hbd'] + if paid: - curator_payout = sbd_amount(raw_json['curator_payout_value']) - post['author_payout_value'] = _amount(row['payout'] - curator_payout) + curator_payout = sbd_amount(row['curator_payout_value']) post['curator_payout_value'] = _amount(curator_payout) + post['total_payout_value'] = _amount(row['payout'] - curator_payout) # TODO: re-evaluate if row['depth'] > 0: - post['parent_author'] = raw_json['parent_author'] - post['parent_permlink'] = raw_json['parent_permlink'] - post['title'] = 'RE: ' + raw_json['root_title'] # PostSummary & comment context - #else: - # post['parent_author'] = '' - # post['parent_permlink'] = '' - post['url'] = raw_json['url'] + post['parent_author'] = row['parent_author'] + post['parent_permlink'] = row['parent_permlink'] + post['title'] = 'RE: ' + row['root_title'] # PostSummary & comment context return post @@ -246,13 +298,3 @@ def _amount(amount, asset='HBD'): """Return a steem-style amount string given a (numeric, asset-str).""" assert asset == 'HBD', 'unhandled asset %s' % asset return "%.3f HBD" % amount - -def _hydrate_active_votes(vote_csv): - """Convert minimal CSV representation into steemd-style object.""" - if not vote_csv: return [] - #return [line.split(',')[:2] for line in vote_csv.split("\n")] - votes = [] - for line in vote_csv.split("\n"): - voter, rshares, _, _ = line.split(',') - votes.append(dict(voter=voter, rshares=rshares)) - return votes diff --git a/hive/server/bridge_api/support.py b/hive/server/bridge_api/support.py index b71b4017992a755b144ae19546ccf3ef16a95e18..8eb89d41678b6c0bda264234a6e33296aa4669d0 100644 --- a/hive/server/bridge_api/support.py +++ b/hive/server/bridge_api/support.py @@ -19,12 +19,19 @@ ROLES = {-2: 'muted', 0: 'guest', 2: 'member', 4: 'admin', 6: 'mod', 8: 'admin'} async def get_post_header(context, author, permlink): """Fetch basic post data""" db = context['db'] + sql = """ + SELECT + hp.id, ha_a.name as author, hpd_p.permlink as permlink, hcd.category as category, depth + FROM + hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + WHERE ha_a.author = :author + AND hpd_p.permlink = :permlink + AND is_deleted = '0' + """ - sql = """SELECT id, parent_id, author, permlink, category, depth - FROM hive_posts - WHERE author = :author - AND permlink = :permlink - AND is_deleted = '0'""" row = await db.query_row(sql, author=author, permlink=permlink) if not row: @@ -43,9 +50,15 @@ async def normalize_post(context, post): db = context['db'] # load core md - sql = """SELECT id, category, community_id, is_muted, is_valid - FROM hive_posts - WHERE author = :author AND permlink = :permlink""" + sql = """ + SELECT + hp.id, hcd.category as category, community_id, is_muted, is_valid + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + WHERE ha_a.author = :author AND hpd_p.permlink = :permlink + """ core = await db.query_row(sql, author=post['author'], permlink=post['permlink']) if not core: core = dict(id=None, diff --git a/hive/server/bridge_api/thread.py b/hive/server/bridge_api/thread.py index 0a07f671f82c6858b0e7d411877dfe4af0976e85..532a250e7aba31d936bff6ad4f3b2880474bef98 100644 --- a/hive/server/bridge_api/thread.py +++ b/hive/server/bridge_api/thread.py @@ -23,32 +23,28 @@ async def get_discussion(context, author, permlink, observer=None): permlink = valid_permlink(permlink) sql = """ - WITH RECURSIVE child_posts (id, parent_id) AS ( - SELECT id, parent_id - FROM hive_posts hp - WHERE hp.author = 'et42k' AND hp.permlink = 'iqx-hashtag' + WITH RECURSIVE child_posts (id, parent_id) AS ( + SELECT id, parent_id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :author) + AND permlink_id = (SELECT id FROM hive_permlik_data WHERE permlink = :permlink) AND NOT hp.is_deleted AND NOT hp.is_muted - UNION ALL - SELECT children.id, children.parent_id - FROM hive_posts children - INNER JOIN child_posts ON (children.parent_id = child_posts.id) - WHERE NOT children.is_deleted AND NOT children.is_muted - ) - SELECT child_posts.id, child_posts.parent_id, hive_posts_cache.post_id, hive_posts_cache.author, hive_posts_cache.permlink, - hive_posts_cache.title, hive_posts_cache.body, hive_posts_cache.category, hive_posts_cache.depth, - hive_posts_cache.promoted, hive_posts_cache.payout, hive_posts_cache.payout_at, - hive_posts_cache.is_paidout, hive_posts_cache.children, hive_posts_cache.votes, - hive_posts_cache.created_at, hive_posts_cache.updated_at, hive_posts_cache.rshares, - hive_posts_cache.raw_json, hive_posts_cache.json, hive_accounts.reputation AS author_rep, - hive_posts_cache.is_hidden AS is_hidden, hive_posts_cache.is_grayed AS is_grayed, - hive_posts_cache.total_votes AS total_votes, hive_posts_cache.flag_weight AS flag_weight, - hive_posts_cache.sc_trend AS sc_trend, hive_accounts.id AS acct_author_id - FROM child_posts - inner JOIN hive_posts_cache ON (child_posts.id = hive_posts_cache.post_id) - -- inner JOIN hive_posts ON (hive_posts_cache.post_id = hive_posts.id) - inner JOIN hive_accounts ON (hive_posts_cache.author = hive_accounts.name) - -- WHERE NOT hive_posts.is_deleted AND NOT hive_posts.is_muted - limit 2000 + UNION ALL + SELECT children.id, children.parent_id FROM hive_posts children INNER JOIN child_posts ON (children.parent_id = child_posts.id) + WHERE NOT children.is_deleted AND NOT children.is_muted + ) + SELECT child_posts.id, child_posts.parent_id, hive_posts.id, hive_accounts.name as author, hpd_p.permlink as permlink, + hpd.title as title, hpd.body as body, hcd.category as category, hive_posts.depth, + hive_posts.promoted, hive_posts.payout, hive_posts.payout_at, + hive_posts.is_paidout, hive_posts.children, hive_posts.votes, + hive_posts.created_at, hive_posts.updated_at, hive_posts.rshares, + hive_posts.raw_json, hive_posts.json, hive_accounts.reputation AS author_rep, + hive_posts.is_hidden AS is_hidden, hive_posts.is_grayed AS is_grayed, + hive_posts.total_votes AS total_votes, hive_posts.flag_weight AS flag_weight, + hive_posts.sc_trend AS sc_trend, hive_accounts.id AS acct_author_id + FROM child_posts JOIN hive_accounts ON (hive_posts.author_id = hive_accounts.id) + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hive_posts.permlink_id + INNER JOIN hive_post_data hpd ON hpd.id = hive_posts.id + INNER JOIN hive_category_data hcd ON hcd.id = hp.category_id + WHERE NOT hive_posts.is_deleted AND NOT hive_posts.is_muted """ blacklists_for_user = None @@ -109,8 +105,16 @@ def get_children(parent_id, posts): async def _get_post_id(db, author, permlink): """Given an author/permlink, retrieve the id from db.""" - sql = ("SELECT id FROM hive_posts WHERE author = :a " - "AND permlink = :p AND is_deleted = '0' LIMIT 1") + sql = """ + SELECT + id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.author = :author + AND hpd_p.permlink = :permlink + AND is_deleted = '0' + LIMIT 1""" return await db.query_one(sql, a=author, p=permlink) def _ref(post): diff --git a/hive/server/common/mutes.py b/hive/server/common/mutes.py index d663b5f30ecd8bd434a8adfe80db3a666b819690..53fc167778924a7d696f9a3e346f48f601d0b958 100644 --- a/hive/server/common/mutes.py +++ b/hive/server/common/mutes.py @@ -58,6 +58,7 @@ class Mutes: def load(self): """Reload all accounts from irredeemables endpoint and global lists.""" + return self.accounts = set(_read_url(self.url).decode('utf8').split()) jsn = _read_url(self.blacklist_api_url + "/blacklists") self.blist = set(json.loads(jsn)) @@ -100,6 +101,7 @@ class Mutes: @classmethod def lists(cls, name, rep): """Return blacklists the account belongs to.""" + return[] assert name inst = cls.instance() diff --git a/hive/server/common/objects.py b/hive/server/common/objects.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b3d013d9d09491559bfd7d5c18277ad91feb8e --- /dev/null +++ b/hive/server/common/objects.py @@ -0,0 +1,75 @@ +from hive.server.common.helpers import json_date +from hive.utils.normalize import sbd_amount + +def _amount(amount, asset='HBD'): + """Return a steem-style amount string given a (numeric, asset-str).""" + assert asset == 'HBD', 'unhandled asset %s' % asset + return "%.3f HBD" % amount + +async def query_author_map(db, posts): + """Given a list of posts, returns an author->reputation map.""" + if not posts: return {} + names = tuple({post['author'] for post in posts}) + sql = "SELECT id, name, reputation FROM hive_accounts WHERE name IN :names" + return {r['name']: r for r in await db.query_all(sql, names=names)} + +def condenser_post_object(row, truncate_body=0): + """Given a hive_posts row, create a legacy-style post object.""" + paid = row['is_paidout'] + + # condenser#3424 mitigation + if not row['category']: + row['category'] = 'undefined' + + post = {} + post['post_id'] = row['id'] + post['author'] = row['author'] + post['permlink'] = row['permlink'] + post['category'] = row['category'] + + post['title'] = row['title'] + post['body'] = row['body'][0:truncate_body] if truncate_body else row['body'] + post['json_metadata'] = row['json'] + + post['created'] = json_date(row['created_at']) + post['last_update'] = json_date(row['updated_at']) + post['depth'] = row['depth'] + post['children'] = row['children'] + post['net_rshares'] = row['rshares'] + + post['last_payout'] = json_date(row['payout_at'] if paid else None) + post['cashout_time'] = json_date(None if paid else row['payout_at']) + post['total_payout_value'] = _amount(row['payout'] if paid else 0) + post['curator_payout_value'] = _amount(0) + post['pending_payout_value'] = _amount(0 if paid else row['payout']) + post['promoted'] = _amount(row['promoted']) + + post['replies'] = [] + post['body_length'] = len(row['body']) + + post['root_author'] = row['root_author'] + post['root_permlink'] = row['root_permlink'] + + post['allow_replies'] = row['allow_replies'] + post['allow_votes'] = row['allow_votes'] + post['allow_curation_rewards'] = row['allow_curation_rewards'] + + if row['depth'] > 0: + post['parent_author'] = row['parent_author'] + post['parent_permlink'] = row['parent_permlink'] + else: + post['parent_author'] = '' + post['parent_permlink'] = row['category'] + + post['url'] = row['url'] + post['root_title'] = row['root_title'] + post['beneficiaries'] = row['beneficiaries'] + post['max_accepted_payout'] = row['max_accepted_payout'] + post['percent_hbd'] = row['percent_hbd'] + + if paid: + curator_payout = sbd_amount(row['curator_payout_value']) + post['curator_payout_value'] = _amount(curator_payout) + post['total_payout_value'] = _amount(row['payout'] - curator_payout) + + return post diff --git a/hive/server/common/payout_stats.py b/hive/server/common/payout_stats.py index 7a3e908738ecdee6d98c3384ca6a811b096aab1c..f0c97594f11800525e1043d9fb570edbd1bd560e 100644 --- a/hive/server/common/payout_stats.py +++ b/hive/server/common/payout_stats.py @@ -42,7 +42,7 @@ class PayoutStats: SUM(payout) payout, COUNT(*) posts, NULL authors - FROM hive_posts_cache + FROM hive_posts WHERE is_paidout = '0' GROUP BY community_id, author @@ -52,8 +52,8 @@ class PayoutStats: NULL author, SUM(payout) payout, COUNT(*) posts, - COUNT(DISTINCT(author)) authors - FROM hive_posts_cache + COUNT(DISTINCT(author_id)) authors + FROM hive_posts WHERE is_paidout = '0' GROUP BY community_id """ diff --git a/hive/server/condenser_api/cursor.py b/hive/server/condenser_api/cursor.py index 2e3b42ba437f74b92b74d46987daad292bb51928..fe7ef79802afc260931ffb266521821f625c0aff 100644 --- a/hive/server/condenser_api/cursor.py +++ b/hive/server/condenser_api/cursor.py @@ -4,6 +4,7 @@ from datetime import datetime from dateutil.relativedelta import relativedelta from hive.utils.normalize import rep_to_raw +from json import loads # pylint: disable=too-many-lines @@ -13,8 +14,14 @@ def last_month(): async def get_post_id(db, author, permlink): """Given an author/permlink, retrieve the id from db.""" - sql = ("SELECT id FROM hive_posts WHERE author = :a " - "AND permlink = :p AND is_deleted = '0' LIMIT 1") + sql = """ + SELECT + hp.id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.author = :author AND hpd_p.permlink = :permlink + AND is_deleted = '0' LIMIT 1""" return await db.query_one(sql, a=author, p=permlink) async def get_child_ids(db, post_id): @@ -24,7 +31,13 @@ async def get_child_ids(db, post_id): async def _get_post_id(db, author, permlink): """Get post_id from hive db.""" - sql = "SELECT id FROM hive_posts WHERE author = :a AND permlink = :p" + sql = """ + SELECT + hp.id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.author = :author AND hpd_p.permlink = :permlink""" return await db.query_one(sql, a=author, p=permlink) async def _get_account_id(db, name): @@ -115,7 +128,7 @@ async def get_account_reputations(db, account_lower_bound, limit): seek = "WHERE name >= :start" sql = """SELECT name, reputation - FROM hive_accounts %s + FROM hive_accounts %s ORDER BY name LIMIT :limit""" % seek rows = await db.query_all(sql, start=account_lower_bound, limit=limit) @@ -140,7 +153,7 @@ async def pids_by_query(db, sort, start_author, start_permlink, limit, tag): 'payout_comments': ('payout', True, False, True, False), }[sort] - table = 'hive_posts_cache' + table = 'hive_posts' field = params[0] where = [] @@ -156,26 +169,74 @@ async def pids_by_query(db, sort, start_author, start_permlink, limit, tag): # cid = get_community_id(tag) # where.append('community_id = :cid') if sort in ['payout', 'payout_comments']: - where.append('category = :tag') + where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)') else: if tag[:5] == 'hive-': - where.append('category = :tag') + where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)') if sort in ('trending', 'hot'): where.append('depth = 0') - sql = "SELECT post_id FROM hive_post_tags WHERE tag = :tag" - where.append("post_id IN (%s)" % sql) + sql = """ + SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htd.id + WHERE htd.tag = :tag + """ + where.append("id IN (%s)" % sql) start_id = None - if start_permlink: - start_id = await _get_post_id(db, start_author, start_permlink) - if not start_id: - return [] - - sql = "%s <= (SELECT %s FROM %s WHERE post_id = :start_id)" + if start_permlink and start_author: + sql = "%s <= (SELECT %s FROM %s WHERE id = (SELECT id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :start_author) AND permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :start_permlink)))" where.append(sql % (field, field, table)) - sql = ("SELECT post_id FROM %s WHERE %s ORDER BY %s DESC LIMIT :limit" - % (table, ' AND '.join(where), field)) + sql = """ + SELECT hp.id, + community_id, + ha_a.name as author, + hpd_p.permlink as permlink, + hpd.title as title, + hpd.body as body, + hcd.category as category, + depth, + promoted, + payout, + payout_at, + is_paidout, + children, + hpd.votes as votes, + hp.created_at, + updated_at, + rshares, + hpd.json as json, + is_hidden, + is_grayed, + total_votes, + flag_weight, + ha_pa.name as parent_author, + hpd_pp.permlink as parent_permlink, + curator_payout_value, + ha_ra.name as root_author, + hpd_rp.permlink as root_permlink, + max_accepted_payout, + percent_hbd, + allow_replies, + allow_votes, + allow_curation_rewards, + beneficiaries, + url, + root_title + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT JOIN hive_post_data hpd ON hpd.id = hp.id + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + INNER JOIN hive_accounts ha_pa ON ha_pa.id = hp.parent_author_id + INNER JOIN hive_permlink_data hpd_pp ON hpd_pp.id = hp.parent_permlink_id + INNER JOIN hive_accounts ha_ra ON ha_ra.id = hp.root_author_id + INNER JOIN hive_permlink_data hpd_rp ON hpd_rp.id = hp.root_permlink_id + WHERE %s ORDER BY %s DESC LIMIT :limit + """ % (' AND '.join(where), field) return await db.query_col(sql, tag=tag, start_id=start_id, limit=limit) @@ -261,7 +322,7 @@ async def pids_by_blog_without_reblog(db, account: str, start_permlink: str = '' sql = """ SELECT id FROM hive_posts - WHERE author = :account %s + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :account) %s AND is_deleted = '0' AND depth = 0 ORDER BY id DESC @@ -320,7 +381,7 @@ async def pids_by_account_comments(db, account: str, start_permlink: str = '', l # `depth` in ORDER BY is a no-op, but forces an ix3 index scan (see #189) sql = """ SELECT id FROM hive_posts - WHERE author = :account %s + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :account) %s AND depth > 0 AND is_deleted = '0' ORDER BY id DESC, depth @@ -342,13 +403,13 @@ async def pids_by_replies_to_account(db, start_author: str, start_permlink: str start_id = None if start_permlink: sql = """ - SELECT parent.author, + SELECT (SELECT name FROM hive_accounts WHERE id = parent.author_id), child.id FROM hive_posts child JOIN hive_posts parent ON child.parent_id = parent.id - WHERE child.author = :author - AND child.permlink = :permlink + WHERE child.author_id = (SELECT id FROM hive_accounts WHERE name = :author) + AND child.permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) """ row = await db.query_row(sql, author=start_author, permlink=start_permlink) @@ -364,7 +425,7 @@ async def pids_by_replies_to_account(db, start_author: str, start_permlink: str sql = """ SELECT id FROM hive_posts WHERE parent_id IN (SELECT id FROM hive_posts - WHERE author = :parent + WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :parent) AND is_deleted = '0' ORDER BY id DESC LIMIT 10000) %s @@ -374,3 +435,39 @@ async def pids_by_replies_to_account(db, start_author: str, start_permlink: str """ % seek return await db.query_col(sql, parent=parent_account, start_id=start_id, limit=limit) + +async def get_accounts(db, accounts: list): + """Returns accounts data for accounts given in list""" + ret = [] + + names = ["'{}'".format(a) for a in accounts] + sql = """SELECT created_at, reputation, display_name, about, + location, website, profile_image, cover_image, followers, following, + proxy, post_count, proxy_weight, vote_weight, rank, + lastread_at, active_at, cached_at, raw_json + FROM hive_accounts WHERE name IN ({})""".format(",".join(names)) + + result = await db.query_all(sql) + for row in result: + account_data = dict(loads(row.raw_json)) + account_data['created_at'] = row.created_at.isoformat() + account_data['reputation'] = row.reputation + account_data['display_name'] = row.display_name + account_data['about'] = row.about + account_data['location'] = row.location + account_data['website'] = row.website + account_data['profile_image'] = row.profile_image + account_data['cover_image'] = row.cover_image + account_data['followers'] = row.followers + account_data['following'] = row.following + account_data['proxy'] = row.proxy + account_data['post_count'] = row.post_count + account_data['proxy_weight'] = row.proxy_weight + account_data['vote_weight'] = row.vote_weight + account_data['rank'] = row.rank + account_data['lastread_at'] = row.lastread_at.isoformat() + account_data['active_at'] = row.active_at.isoformat() + account_data['cached_at'] = row.cached_at.isoformat() + ret.append(account_data) + + return ret diff --git a/hive/server/condenser_api/get_state.py b/hive/server/condenser_api/get_state.py index a66502d545021def091e636a94379d970c5cddca..61487be06069d89b6927c94684f57b63e5c56eb5 100644 --- a/hive/server/condenser_api/get_state.py +++ b/hive/server/condenser_api/get_state.py @@ -291,8 +291,8 @@ async def _get_props_lite(db): # convert NAI amounts to legacy nais = ['virtual_supply', 'current_supply', 'current_sbd_supply', - 'pending_rewarded_vesting_steem', 'pending_rewarded_vesting_shares', - 'total_vesting_fund_steem', 'total_vesting_shares'] + 'pending_rewarded_vesting_hive', 'pending_rewarded_vesting_shares', + 'total_vesting_fund_hive', 'total_vesting_shares'] for k in nais: if k in raw: raw[k] = legacy_amount(raw[k]) @@ -303,6 +303,6 @@ async def _get_props_lite(db): sbd_interest_rate=raw['sbd_interest_rate'], head_block_number=raw['head_block_number'], #* total_vesting_shares=raw['total_vesting_shares'], - total_vesting_fund_steem=raw['total_vesting_fund_steem'], + total_vesting_fund_steem=raw['total_vesting_fund_hive'], last_irreversible_block_num=raw['last_irreversible_block_num'], #* ) diff --git a/hive/server/condenser_api/methods.py b/hive/server/condenser_api/methods.py index da3016776cc462eb529dc7f52a5183b49a274e5e..eac4c0c69d579bb848e651b3d0d92a6bde651b6d 100644 --- a/hive/server/condenser_api/methods.py +++ b/hive/server/condenser_api/methods.py @@ -1,5 +1,5 @@ """Steemd/condenser_api compatibility layer API methods.""" - +from json import loads from functools import wraps import hive.server.condenser_api.cursor as cursor @@ -18,22 +18,38 @@ from hive.server.common.mutes import Mutes # pylint: disable=too-many-arguments,line-too-long,too-many-lines -SELECT_FRAGMENT = """ - SELECT hive_posts_cache.post_id, hive_posts_cache.author, hive_posts_cache.permlink, - hive_posts_cache.title, hive_posts_cache.body, hive_posts_cache.category, hive_posts_cache.depth, - hive_posts_cache.promoted, hive_posts_cache.payout, hive_posts_cache.payout_at, - hive_posts_cache.is_paidout, hive_posts_cache.children, hive_posts_cache.votes, - hive_posts_cache.created_at, hive_posts_cache.updated_at, hive_posts_cache.rshares, - hive_posts_cache.raw_json, hive_posts_cache.json, hive_accounts.reputation AS author_rep, - hive_posts_cache.is_hidden AS is_hidden, hive_posts_cache.is_grayed AS is_grayed, - hive_posts_cache.total_votes AS total_votes, hive_posts_cache.flag_weight AS flag_weight, - hive_posts_cache.sc_trend AS sc_trend, hive_accounts.id AS acct_author_id, - hive_posts.is_pinned AS is_pinned - FROM hive_posts_cache JOIN hive_posts ON (hive_posts_cache.post_id = hive_posts.id) - JOIN hive_accounts ON (hive_posts_cache.author = hive_accounts.name)""" - - -# Dummy +SQL_TEMPLATE = """ + SELECT hp.id, + ha_a.name as author, + hpd_p.permlink as permlink, + (SELECT title FROM hive_post_data WHERE hive_post_data.id = hp.id) as title, + (SELECT body FROM hive_post_data WHERE hive_post_data.id = hp.id) as body, + (SELECT category FROM hive_category_data WHERE hive_category_data.id = hp.category_id) as category, + depth, + promoted, + payout, + payout_at, + is_paidout, + children, + (0) as votes, + hp.created_at, + updated_at, + rshares, + (SELECT json FROM hive_post_data WHERE hive_post_data.id = hp.id) as json, + ha_a.reputation AS author_rep, + is_hidden, + is_grayed, + total_votes, + flag_weight, + sc_trend, + author_id, + is_pinned + + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE +""" @return_error_info async def get_account_votes(context, account): @@ -117,18 +133,20 @@ async def get_content(context, author: str, permlink: str, observer=None): db = context['db'] valid_account(author) valid_permlink(permlink) + #force copy + sql = str(SQL_TEMPLATE) + sql += """ WHERE ha_a.name = :author AND hpd_p.permlink = :permlink AND NOT hp.is_deleted """ - sql = """ ---get_content\n""" + SELECT_FRAGMENT + """ - WHERE hive_posts_cache.author = :author AND hive_posts_cache.permlink = :permlink AND NOT hive_posts.is_deleted - """ + post = None result = await db.query_all(sql, author=author, permlink=permlink) - result = dict(result[0]) - post = _condenser_post_object(result, 0) - if not observer: - post['active_votes'] = _mute_votes(post['active_votes'], Mutes.all()) - else: - blacklists_for_user = await Mutes.get_blacklists_for_observer(observer, context) - post['active_votes'] = _mute_votes(post['active_votes'], blacklists_for_user.keys()) + if result: + result = dict(result[0]) + post = _condenser_post_object(result, 0) + if not observer: + post['active_votes'] = _mute_votes(post['active_votes'], Mutes.all()) + else: + blacklists_for_user = await Mutes.get_blacklists_for_observer(observer, context) + post['active_votes'] = _mute_votes(post['active_votes'], blacklists_for_user.keys()) assert post, 'post was not found in cache' return post @@ -140,20 +158,24 @@ async def get_content_replies(context, author: str, permlink: str): valid_account(author) valid_permlink(permlink) - sql = """SELECT post_id, author, permlink, title, body, category, depth, - promoted, payout, payout_at, is_paidout, children, votes, - created_at, updated_at, rshares, raw_json, json - FROM hive_posts_cache WHERE post_id IN ( - SELECT hp2.id FROM hive_posts hp2 - WHERE hp2.is_deleted = '0' AND - hp2.parent_id = (SELECT id FROM hive_posts - WHERE author = :author - AND permlink = :permlink AND is_deleted = '0') - LIMIT :limit - ) - ORDER BY post_id""" - - result=await db.query_all(sql, author=author, permlink = permlink, limit=5000) + #force copy + sql = str(SQL_TEMPLATE) + sql += """ + WHERE + hp.is_deleted = '0' AND + hp.parent_id = ( + SELECT id + FROM hive_posts + WHERE + author_id = (SELECT id FROM hive_accounts WHERE name =:author) + AND permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) + AND is_deleted = '0' + ) + LIMIT :limit + ORDER BY id + """ + + result = await db.query_all(sql, author=author, permlink=permlink, limit=5000) posts = await resultset_to_posts(db=db, resultset=result, truncate_body=0) return posts @@ -196,47 +218,54 @@ async def get_discussions_by(discussion_type, context, start_author: str = '', valid_tag(tag, allow_empty=True) db = context['db'] - sql = "---get_discussions_by_" + discussion_type + "\r\n" + SELECT_FRAGMENT + sql = "---get_discussions_by_" + discussion_type + "\r\n" + str(SQL_TEMPLATE) - sql = sql + """ WHERE NOT hive_posts.is_deleted """ + sql = sql + """ WHERE NOT hp.is_deleted """ if discussion_type == 'trending': - sql = sql + """ AND NOT hive_posts_cache.is_paidout %s ORDER BY sc_trend DESC LIMIT :limit """ + sql = sql + """ AND NOT hp.is_paidout %s ORDER BY sc_trend DESC LIMIT :limit """ elif discussion_type == 'hot': - sql = sql + """ AND NOT hive_posts_cache.is_paidout %s ORDER BY sc_hot DESC LIMIT :limit """ + sql = sql + """ AND NOT hp.is_paidout %s ORDER BY sc_hot DESC LIMIT :limit """ elif discussion_type == 'created': - sql = sql + """ AND hive_posts.depth = 0 %s ORDER BY hive_posts_cache.created_at DESC LIMIT :limit """ + sql = sql + """ AND hp.depth = 0 %s ORDER BY hp.created_at DESC LIMIT :limit """ elif discussion_type == 'promoted': - sql = sql + """ AND NOT hive_posts_cache.is_paidout AND hive_posts.promoted > 0 - %s ORDER BY hive_posts_cache.promoted DESC LIMIT :limit """ + sql = sql + """ AND NOT hp.is_paidout AND hp.promoted > 0 + %s ORDER BY hp.promoted DESC LIMIT :limit """ elif discussion_type == 'payout': - sql = sql + """ AND NOT hive_posts_cache.is_paidout AND hive_posts_cache.depth = 0 - %s ORDER BY hive_posts_cache.payout DESC LIMIT :limit """ + sql = sql + """ AND NOT hp.is_paidout AND hp.depth = 0 + %s ORDER BY hp.payout DESC LIMIT :limit """ elif discussion_type == 'payout_comments': - sql = sql + """ AND NOT hive_posts_cache.is_paidout AND hive_posts_cache.depth > 0 - %s ORDER BY hive_posts_cache.payout DESC LIMIT :limit """ + sql = sql + """ AND NOT hp.is_paidout AND hp.depth > 0 + %s ORDER BY hp.payout DESC LIMIT :limit """ if tag and tag != 'all': if tag[:5] == 'hive-': - sql = sql % """ %s AND hive_posts_cache.category = :tag """ + sql = sql % """ %s AND hp.category = :tag """ else: - sql = sql % """ %s AND hive_posts_cache.post_id IN (SELECT post_id FROM hive_post_tags WHERE tag = :tag) """ + sql = sql % """ %s AND hp.post_id IN + (SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htd.id + WHERE htd.tag = :tag + ) """ if start_author and start_permlink: if discussion_type == 'trending': - sql = sql % """ AND hive_posts_cache.sc_trend <= (SELECT sc_trend FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) """ + sql = sql % """ AND hp.sc_trend <= (SELECT sc_trend FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) + AND hp.post_id != (SELECT post_id FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) """ elif discussion_type == 'hot': - sql = sql % """ AND hive_posts_cache.sc_hot <= (SELECT sc_hot FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) """ + sql = sql % """ AND hp.sc_hot <= (SELECT sc_hot FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) + AND hp.post_id != (SELECT post_id FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) """ elif discussion_type == 'created': - sql = sql % """ AND hive_posts_cache.post_id < (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) """ + sql = sql % """ AND hp.post_id < (SELECT post_id FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) """ elif discussion_type == 'promoted': - sql = sql % """ AND hive_posts_cache.promoted <= (SELECT promoted FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) """ + sql = sql % """ AND hp.promoted <= (SELECT promoted FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) + AND hp.post_id != (SELECT post_id FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) """ else: - sql = sql % """ AND hive_posts_cache.payout <= (SELECT payout FROM hive_posts_cache where permlink = :permlink AND author = :author) - AND hive_posts_cache.post_id != (SELECT post_id FROM hive_posts_cache WHERE permlink = :permlink AND author = :author) """ + sql = sql % """ AND hp.payout <= (SELECT payout FROM hp where permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) + AND hp.post_id != (SELECT post_id FROM hp WHERE permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :permlink) AND author_id = (SELECT id FROM hive_accounts WHERE name = :author)) """ else: sql = sql % """ """ @@ -330,17 +359,19 @@ async def get_discussions_by_blog(context, tag: str = None, start_author: str = valid_permlink(start_permlink, allow_empty=True) valid_limit(limit, 100) - sql = """ ---get_discussions_by_blog """ + SELECT_FRAGMENT + """ - WHERE NOT hive_posts.is_deleted AND hive_posts_cache.post_id IN + #force copy + sql = str(SQL_TEMPLATE) + sql += """ + WHERE NOT hp.is_deleted AND hp.id IN (SELECT post_id FROM hive_feed_cache JOIN hive_accounts ON (hive_feed_cache.account_id = hive_accounts.id) WHERE hive_accounts.name = :author) """ if start_author and start_permlink != '': sql += """ - AND hive_posts_cache.created_at <= (SELECT created_at from hive_posts_cache where author = :start_author AND permlink = :start_permlink) + AND hp.created_at <= (SELECT created_at from hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :start_author) AND permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :start_permlink)) """ sql += """ - ORDER BY hive_posts_cache.created_at DESC + ORDER BY hp.created_at DESC LIMIT :limit """ @@ -355,7 +386,6 @@ async def get_discussions_by_blog(context, tag: str = None, start_author: str = #posts_by_id[row['post_id']] = post posts_by_id.append(post) - @return_error_info @nested_query_compat async def get_discussions_by_feed(context, tag: str = None, start_author: str = '', @@ -385,19 +415,20 @@ async def get_discussions_by_comments(context, start_author: str = None, start_p valid_permlink(start_permlink, allow_empty=True) valid_limit(limit, 100) - sql = """ ---get_discussions_by_comments """ + SELECT_FRAGMENT + """ - WHERE hive_posts_cache.author = :start_author AND hive_posts_cache.depth > 0 - AND NOT hive_posts.is_deleted + #force copy + sql = str(SQL_TEMPLATE) + sql += """ + WHERE ha_a.author = :start_author AND hp.depth > 0 + AND NOT hp.is_deleted """ if start_permlink: sql += """ - AND hive_posts_cache.post_id <= (SELECT hive_posts_cache.post_id FROM - hive_posts_cache WHERE permlink = :start_permlink AND author=:start_author) + AND hp.id <= (SELECT hive_posts.id FROM hive_posts WHERE author_id = (SELECT id FROM hive_accounts WHERE name = :start_author) AND permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = :start_permlink)) """ sql += """ - ORDER BY hive_posts_cache.post_id DESC, depth LIMIT :limit + ORDER BY hp.id DESC, depth LIMIT :limit """ posts = [] @@ -412,13 +443,13 @@ async def get_discussions_by_comments(context, start_author: str = None, start_p return posts - @return_error_info @nested_query_compat async def get_replies_by_last_update(context, start_author: str = None, start_permlink: str = '', limit: int = 20, truncate_body: int = 0): """Get all replies made to any of author's posts.""" assert start_author, '`start_author` cannot be blank' + ids = await cursor.pids_by_replies_to_account( context['db'], valid_account(start_author), @@ -542,3 +573,11 @@ async def _get_blog(db, account: str, start_index: int, limit: int = None): idx -= 1 return out + +@return_error_info +async def get_accounts(context, accounts: list): + """Returns accounts data for accounts given in list""" + assert accounts, "Empty parameters are not supported" + assert len(accounts) < 1000, "Query exceeds limit" + + return await cursor.get_accounts(context['db'], accounts) diff --git a/hive/server/condenser_api/objects.py b/hive/server/condenser_api/objects.py index 725d804bbafae4f4eec459beccd3a9e0511de3c6..3858448dd1761dca93ef75f0e5b6cf30ad25dac6 100644 --- a/hive/server/condenser_api/objects.py +++ b/hive/server/condenser_api/objects.py @@ -6,6 +6,7 @@ import ujson as json from hive.utils.normalize import sbd_amount, rep_to_raw from hive.server.common.mutes import Mutes from hive.server.common.helpers import json_date +from hive.server.database_api.methods import find_votes log = logging.getLogger(__name__) @@ -40,10 +41,53 @@ async def load_posts_keyed(db, ids, truncate_body=0): assert ids, 'no ids passed to load_posts_keyed' # fetch posts and associated author reps - sql = """SELECT post_id, author, permlink, title, body, category, depth, - promoted, payout, payout_at, is_paidout, children, votes, - created_at, updated_at, rshares, raw_json, json - FROM hive_posts_cache WHERE post_id IN :ids""" + sql = """ + SELECT hp.id, + community_id, + ha_a.name as author, + hpd_p.permlink as permlink, + hpd.title as title, + hpd.body as body, + hcd.category as category, + depth, + promoted, + payout, + payout_at, + is_paidout, + children, + hpd.votes as votes, + hp.created_at, + updated_at, + rshares, + hpd.json as json, + is_hidden, + is_grayed, + total_votes, + flag_weight, + ha_pa.name as parent_author, + hpd_pp.permlink as parent_permlink, + curator_payout_value, + ha_ra.name as root_author, + hpd_rp.permlink as root_permlink, + max_accepted_payout, + percent_hbd, + allow_replies, + allow_votes, + allow_curation_rewards, + beneficiaries, + url, + root_title, + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT JOIN hive_post_data hpd ON hpd.id = hp.id + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + INNER JOIN hive_accounts ha_pa ON ha_pa.id = hp.parent_author_id + INNER JOIN hive_permlink_data hpd_pp ON hpd_pp.id = hp.parent_permlink_id + INNER JOIN hive_accounts ha_ra ON ha_ra.id = hp.root_author_id + INNER JOIN hive_permlink_data hpd_rp ON hpd_rp.id = hp.root_permlink_id + WHERE post_id IN :ids""" + result = await db.query_all(sql, ids=tuple(ids)) author_reps = await _query_author_rep_map(db, result) @@ -53,8 +97,8 @@ async def load_posts_keyed(db, ids, truncate_body=0): row = dict(row) row['author_rep'] = author_reps[row['author']] post = _condenser_post_object(row, truncate_body=truncate_body) - post['active_votes'] = _mute_votes(post['active_votes'], muted_accounts) - posts_by_id[row['post_id']] = post + post['active_votes'] = await find_votes({'db':db}, {'author':row['author'], 'permlink':row['permlink']}) + posts_by_id[row['id']] = post return posts_by_id @@ -77,8 +121,14 @@ async def load_posts(db, ids, truncate_body=0): log.info("get_posts do not exist in cache: %s", repr(missed)) for _id in missed: ids.remove(_id) - sql = ("SELECT id, author, permlink, depth, created_at, is_deleted " - "FROM hive_posts WHERE id = :id") + sql = """ + SELECT + hp.id, ha_a.name as author, hpd_p.permlink as permlink, depth, created_at, is_deleted + FROM + hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE id = :id""" post = await db.query_row(sql, id=_id) if not post['is_deleted']: # TODO: This should never happen. See #173 for analysis @@ -97,7 +147,7 @@ async def resultset_to_posts(db, resultset, truncate_body=0): row = dict(row) row['author_rep'] = author_reps[row['author']] post = _condenser_post_object(row, truncate_body=truncate_body) - post['active_votes'] = _mute_votes(post['active_votes'], muted_accounts) + post['active_votes'] = await find_votes({'db':db}, {'author':row['author'], 'permlink':row['permlink']}) posts.append(post) return posts @@ -129,7 +179,7 @@ def _condenser_account_object(row): }})} def _condenser_post_object(row, truncate_body=0): - """Given a hive_posts_cache row, create a legacy-style post object.""" + """Given a hive_posts row, create a legacy-style post object.""" paid = row['is_paidout'] # condenser#3424 mitigation @@ -137,7 +187,7 @@ def _condenser_post_object(row, truncate_body=0): row['category'] = 'undefined' post = {} - post['post_id'] = row['post_id'] + post['post_id'] = row['id'] post['author'] = row['author'] post['permlink'] = row['permlink'] post['category'] = row['category'] @@ -161,54 +211,36 @@ def _condenser_post_object(row, truncate_body=0): post['replies'] = [] post['body_length'] = len(row['body']) - post['active_votes'] = _hydrate_active_votes(row['votes']) post['author_reputation'] = rep_to_raw(row['author_rep']) - # import fields from legacy object - assert row['raw_json'] - assert len(row['raw_json']) > 32 - raw_json = json.loads(row['raw_json']) + post['root_author'] = row['root_author'] + post['root_permlink'] = row['root_permlink'] + + post['allow_replies'] = row['allow_replies'] + post['allow_votes'] = row['allow_votes'] + post['allow_curation_rewards'] = row['allow_curation_rewards'] if row['depth'] > 0: - post['parent_author'] = raw_json['parent_author'] - post['parent_permlink'] = raw_json['parent_permlink'] + post['parent_author'] = row['parent_author'] + post['parent_permlink'] = row['parent_permlink'] else: post['parent_author'] = '' post['parent_permlink'] = row['category'] - post['url'] = raw_json['url'] - post['root_title'] = raw_json['root_title'] - post['beneficiaries'] = raw_json['beneficiaries'] - post['max_accepted_payout'] = raw_json['max_accepted_payout'] - post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] + post['url'] = row['url'] + post['root_title'] = row['root_title'] + post['beneficiaries'] = row['beneficiaries'] + post['max_accepted_payout'] = row['max_accepted_payout'] + post['percent_hbd'] = row['percent_hbd'] if paid: - curator_payout = sbd_amount(raw_json['curator_payout_value']) + curator_payout = sbd_amount(row['curator_payout_value']) post['curator_payout_value'] = _amount(curator_payout) post['total_payout_value'] = _amount(row['payout'] - curator_payout) - # not used by condenser, but may be useful - #post['net_votes'] = post['total_votes'] - row['up_votes'] - #post['allow_replies'] = raw_json['allow_replies'] - #post['allow_votes'] = raw_json['allow_votes'] - #post['allow_curation_rewards'] = raw_json['allow_curation_rewards'] - return post def _amount(amount, asset='HBD'): """Return a steem-style amount string given a (numeric, asset-str).""" assert asset == 'HBD', 'unhandled asset %s' % asset return "%.3f HBD" % amount - -def _hydrate_active_votes(vote_csv): - """Convert minimal CSV representation into steemd-style object.""" - if not vote_csv: - return [] - votes = [] - for line in vote_csv.split("\n"): - voter, rshares, percent, reputation = line.split(',') - votes.append(dict(voter=voter, - rshares=rshares, - percent=percent, - reputation=rep_to_raw(reputation))) - return votes diff --git a/hive/server/condenser_api/tags.py b/hive/server/condenser_api/tags.py index 9a4f5b5b3384a5dd004940dfbddcc163cbc0d5fe..7c687cfdde938238c23ada35668643c7cd65bcc6 100644 --- a/hive/server/condenser_api/tags.py +++ b/hive/server/condenser_api/tags.py @@ -10,8 +10,8 @@ async def get_top_trending_tags_summary(context): # Same results, more overhead: #return [tag['name'] for tag in await get_trending_tags('', 50)] sql = """ - SELECT category - FROM hive_posts_cache + SELECT (SELECT category FROM hive_category_data WHERE id = category_id) as category + FROM hive_posts WHERE is_paidout = '0' GROUP BY category ORDER BY SUM(payout) DESC @@ -31,19 +31,19 @@ async def get_trending_tags(context, start_tag: str = '', limit: int = 250): seek = """ HAVING SUM(payout) <= ( SELECT SUM(payout) - FROM hive_posts_cache + FROM hive_posts WHERE is_paidout = '0' - AND category = :start_tag) + AND category_id = (SELECT id FROM hive_category_data WHERE category = :start_tag)) """ else: seek = '' sql = """ - SELECT category, + SELECT (SELECT category FROM hive_category_data WHERE id = category_id) as category, COUNT(*) AS total_posts, SUM(CASE WHEN depth = 0 THEN 1 ELSE 0 END) AS top_posts, SUM(payout) AS total_payouts - FROM hive_posts_cache + FROM hive_posts WHERE is_paidout = '0' GROUP BY category %s ORDER BY SUM(payout) DESC diff --git a/hive/server/database_api/__init__.py b/hive/server/database_api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..609e45795e4f2fd34c029573df6487bd9d366dec --- /dev/null +++ b/hive/server/database_api/__init__.py @@ -0,0 +1 @@ +"""Hive database_api methods and support.""" diff --git a/hive/server/database_api/methods.py b/hive/server/database_api/methods.py new file mode 100644 index 0000000000000000000000000000000000000000..5b8cf22c1f35c5c1d0d11c20c3f51899573425ce --- /dev/null +++ b/hive/server/database_api/methods.py @@ -0,0 +1,233 @@ +# pylint: disable=too-many-arguments,line-too-long,too-many-lines +from hive.server.common.helpers import return_error_info, valid_limit, valid_account, valid_permlink +from hive.server.common.objects import condenser_post_object + +SQL_TEMPLATE = """ + SELECT hp.id, + community_id, + ha_a.name as author, + hpd_p.permlink as permlink, + (SELECT title FROM hive_post_data WHERE hive_post_data.id = hp.id) as title, + (SELECT body FROM hive_post_data WHERE hive_post_data.id = hp.id) as body, + (SELECT category FROM hive_category_data WHERE hive_category_data.id = hp.category_id) as category, + depth, + promoted, + payout, + payout_at, + is_paidout, + children, + (0) as votes, + hp.created_at, + updated_at, + rshares, + (SELECT json FROM hive_post_data WHERE hive_post_data.id = hp.id) as json, + is_hidden, + is_grayed, + total_votes, + flag_weight, + ha_pa.name as parent_author, + hpd_pp.permlink as parent_permlink, + curator_payout_value, + ha_ra.name as root_author, + hpd_rp.permlink as root_permlink, + max_accepted_payout, + percent_hbd, + allow_replies, + allow_votes, + allow_curation_rewards, + beneficiaries, + url, + root_title + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + INNER JOIN hive_accounts ha_pa ON ha_pa.id = hp.parent_author_id + INNER JOIN hive_permlink_data hpd_pp ON hpd_pp.id = hp.parent_permlink_id + INNER JOIN hive_accounts ha_ra ON ha_ra.id = hp.root_author_id + INNER JOIN hive_permlink_data hpd_rp ON hpd_rp.id = hp.root_permlink_id + WHERE +""" + +async def get_post_id_by_author_and_permlink(db, author: str, permlink: str, limit: int): + """Return post ids for given author and permlink""" + sql = """ + SELECT hp.id + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.name >= :author AND hpd_p.permlink >= :permlink + ORDER BY ha_a.name ASC + LIMIT :limit + """ + result = await db.query_row(sql, author=author, permlink=permlink, limit=limit) + if result is not None: + return int(result.get('id', 0)) + return 0 + +@return_error_info +async def list_comments(context, start: list, limit: int, order: str): + """Returns all comments, starting with the specified options.""" + + supported_order_list = ['by_cashout_time', 'by_permlink', 'by_root', 'by_parent', 'by_update', 'by_author_last_update'] + assert order in supported_order_list, "Unsupported order, valid orders: {}".format(", ".join(supported_order_list)) + limit = valid_limit(limit, 1000) + db = context['db'] + + comments = [] + if order == 'by_cashout_time': + assert len(start) == 3, "Expecting three arguments" + author = start[1] + permlink = start[2] + post_id = 0 + if author or permlink: + post_id = await get_post_id_by_author_and_permlink(db, author, permlink, 1) + + sql = str(SQL_TEMPLATE) + sql += "hp.payout_at >= :start AND hp.id >= :post_id ORDER BY hp.payout_at ASC, hp.id ASC LIMIT :limit" + + result = await db.query_all(sql, start=start[0], limit=limit, post_id=post_id) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + elif order == 'by_permlink': + assert len(start) == 2, "Expecting two arguments" + + sql = str(SQL_TEMPLATE) + sql += """ hp.id IN (SELECT hp1.id FROM hive_posts_a_p hp1 WHERE hp1.author >= :author COLLATE "C" + AND hp1.permlink >= :permlink COLLATE "C" ORDER BY hp1.author COLLATE "C" ASC LIMIT :limit)""" + + result = await db.query_all(sql, author=start[0], permlink=start[1], limit=limit) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + elif order == 'by_root': + assert len(start) == 4, "Expecting 4 arguments" + raise NotImplementedError('by_root') + + sql = str(SQL_TEMPLATE) + sql += "get_rows_by_root(:root_author, :root_permlink, :child_author, :child_permlink) ORDER BY post_id ASC LIMIT :limit" + + result = await db.query_all(sql, root_author=start[0], root_permlink=start[1], child_author=start[2], child_permlink=start[3], limit=limit) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + elif order == 'by_parent': + assert len(start) == 4, "Expecting 4 arguments" + raise NotImplementedError('by_parent') + + sql = str(SQL_TEMPLATE) + sql += "get_rows_by_parent(:parent_author, :parent_permlink, :child_author, :child_permlink) LIMIT :limit" + + result = await db.query_all(sql, parent_author=start[0], parent_permlink=start[1], child_author=start[2], child_permlink=start[3], limit=limit) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + elif order == 'by_update': + assert len(start) == 4, "Expecting 4 arguments" + + child_author = start[2] + child_permlink = start[3] + + post_id = 0 + if author or permlink: + post_id = await get_post_id_by_author_and_permlink(db, child_author, child_permlink, 1) + + sql = str(SQL_TEMPLATE) + sql += "ha_pa.name >= :parent_author AND hp.updated_at >= :updated_at AND hp.id >= :post_id ORDER BY ha_pa.name ASC, updated_at ASC, hp.id ASC LIMIT :limit" + + result = await db.query_all(sql, parent_author=start[0], updated_at=start[1], post_id=post_id, limit=limit) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + + elif order == 'by_author_last_update': + assert len(start) == 4, "Expecting 4 arguments" + + author = start[2] + permlink = start[3] + + post_id = 0 + if author or permlink: + post_id = await get_post_id_by_author_and_permlink(db, author, permlink, 1) + + sql = str(SQL_TEMPLATE) + sql += "ha_a.name >= :author AND hp.updated_at >= :updated_at AND hp.id >= :post_id ORDER BY ha_a.name ASC, hp.updated_at ASC, hp.id ASC LIMIT :limit" + + result = await db.query_all(sql, author=start[0], updated_at=start[1], post_id=post_id, limit=limit) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + + return comments + +@return_error_info +async def find_comments(context, start: list, limit: int, order: str): + """ Search for comments: limit and order is ignored in hive code """ + comments = [] + + assert len(start) <= 1000, "Parameters count is greather than max allowed (1000)" + db = context['db'] + + # make a copy + sql = str(SQL_TEMPLATE) + + idx = 0 + for arg in start: + if idx > 0: + sql += " OR " + sql += "(ha_a.name = '{}' AND hpd_p.permlink = '{}')".format(arg[0], arg[1]) + idx += 1 + + result = await db.query_all(sql) + for row in result: + cpo = condenser_post_object(dict(row)) + cpo['active_votes'] = find_votes(context, {'author':cpo['author'], 'permlink':cpo['permlink']}) + comments.append(cpo) + + return comments + +@return_error_info +async def find_votes(context, params: dict): + """ Returns all votes for the given post """ + valid_account(params['author']) + valid_permlink(params['permlink']) + db = context['db'] + sql = """ + SELECT + ha_v.name as voter, + ha_a.name as author, + hpd.permlink as permlink, + weight, + rshares, + vote_percent, + last_update, + num_changes + FROM + hive_votes + INNER JOIN hive_accounts ha_v ON (ha_v.id = hv.voter_id) + INNER JOIN hive_accounts ha_a ON (ha_a.id = hv.author_id) + INNER JOIN hive_permlink_data hpd ON (hpd.id = hv.permlink_id) + WHERE + ha_a.name = :author AND hpd.permlink = :permlink + """ + ret = [] + rows = db.query_all(sql, author=params['author'], permlink=params['permlink']) + for row in rows: + ret.append(dict(voter=row.voter, author=row.author, permlink=row.permlink, + weight=row.weight, rshares=row.rshares, vote_percent=row.vote_percent, + last_update=row.last_update, num_changes=row.num_changes)) + return ret + +@return_error_info +async def list_votes(context, start: list, limit: int, order: str): + """ Returns all votes, starting with the specified voter and/or author and permlink. """ + supported_order_list = ["by_comment_voter", "by_voter_comment", "by_comment_voter", "by_voter_comment"] + assert order in supported_order_list, "Order {} is not supported".format(order) + limit = valid_limit(limit, 1000) + assert len(start) == 3, "Expecting 3 elements in start array" diff --git a/hive/server/follow_api/__init__.py b/hive/server/follow_api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hive/server/follow_api/methods.py b/hive/server/follow_api/methods.py new file mode 100644 index 0000000000000000000000000000000000000000..58e38914a031a683f56d11f47e6c7216f90aa967 --- /dev/null +++ b/hive/server/follow_api/methods.py @@ -0,0 +1,16 @@ +from hive.server.common.helpers import return_error_info + +@return_error_info +async def get_feed_entries(context, account: str, start_entry_id: int, limit: int): + """ Returns a list of entries in an account’s feed. """ + raise NotImplementedError() + +@return_error_info +async def get_feed(context, account: str, start_entry_id: int, limit: int): + """ Returns a list of items in an account’s feed. """ + raise NotImplementedError() + +@return_error_info +async def get_blog_authors(context, blog_account: str): + """ Returns a list of authors that have had their content reblogged on a given blog account. """ + raise NotImplementedError() diff --git a/hive/server/hive_api/common.py b/hive/server/hive_api/common.py index 17da19005c18222f45273537606a31a6a93b9c0a..915c624ef5a465c0d8727aed788c695ac9b44bfe 100644 --- a/hive/server/hive_api/common.py +++ b/hive/server/hive_api/common.py @@ -23,7 +23,14 @@ async def url_to_id(db, url): async def get_post_id(db, author, permlink): """Get post_id based on author/permlink.""" - sql = "SELECT id FROM hive_posts WHERE author = :a AND permlink = :p" + sql = """ + SELECT + hp.id, ha_a.name as author, hpd_p.permlink as permlink + FROM + hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + WHERE ha_a.name = :a AND hpd_p.permlik = :p""" _id = await db.query_one(sql, a=author, p=permlink) assert _id, 'post id not found' return _id diff --git a/hive/server/hive_api/community.py b/hive/server/hive_api/community.py index 47963d1ce30661bd7e23a315713acfbed5aa497b..9deaa0164ac0bb67fb68389fda23788509ed3bbc 100644 --- a/hive/server/hive_api/community.py +++ b/hive/server/hive_api/community.py @@ -353,8 +353,16 @@ async def top_community_muted(context, community): async def _top_community_posts(db, community, limit=50): # TODO: muted equivalent - sql = """SELECT author, votes, payout FROM hive_posts_cache - WHERE category = :community AND is_paidout = '0' - AND post_id IN (SELECT id FROM hive_posts WHERE is_muted = '0') - ORDER BY payout DESC LIMIT :limit""" + sql = """ + SELECT ha_a.name as author, + hpd.votes as votes, + payout + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + LEFT JOIN hive_post_data hpd ON hpd.id = hp.id + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + WHERE hcdcategory = :community AND is_paidout = '0' + AND post_id IN (SELECT id FROM hive_posts WHERE is_muted = '0') + ORDER BY payout DESC LIMIT :limit""" + return await db.query_all(sql, community=community, limit=limit) diff --git a/hive/server/hive_api/notify.py b/hive/server/hive_api/notify.py index 3b1a19e1ac18c5a5d678fb1de7f0d488bfc2b177..192cde0366ba0f84cf5a22065961aade18e65ec1 100644 --- a/hive/server/hive_api/notify.py +++ b/hive/server/hive_api/notify.py @@ -90,7 +90,9 @@ async def post_notifications(context, author, permlink, min_score=25, last_id=No def _notifs_sql(where): sql = """SELECT hn.id, hn.type_id, hn.score, hn.created_at, src.name src, dst.name dst, - hp.author, hp.permlink, hc.name community, + (SELECT name FROM hive_accounts WHERE id = hp.author_id), + (SELECT permlink FROM hive_permlink_data WHERE id = hp.permlink_id), + hc.name community, hc.title community_title, payload FROM hive_notifs hn LEFT JOIN hive_accounts src ON hn.src_id = src.id diff --git a/hive/server/hive_api/objects.py b/hive/server/hive_api/objects.py index c8c74a37ed8f28951d1f9243b31d60e90b893a2f..d74cdac4407934506c5d3d90df890d1b9276e7a0 100644 --- a/hive/server/hive_api/objects.py +++ b/hive/server/hive_api/objects.py @@ -68,10 +68,22 @@ async def comments_by_id(db, ids, observer=None): """Given an array of post ids, returns comment objects keyed by id.""" assert ids, 'no ids passed to comments_by_id' - sql = """SELECT post_id, author, permlink, body, depth, - payout, payout_at, is_paidout, created_at, updated_at, - rshares, is_hidden, is_grayed, votes - FROM hive_posts_cache WHERE post_id IN :ids""" #votes + sql = """SELECT hp.id, + (SELECT name FROM hive_accounts ha WHERE ha.id = hp.author_id) as author, + (SELECT permlink FROM hive_permlink_data hpd WHERE hpd.id = hp.permlink_id) as permlink, + (SELECT body FROM hive_post_data hpa WHERE hpa.id = hp.id) as body, + hp.depth, + hp.payout, + hp.payout_at, + hp.is_paidout, + hp.created_at, + hp.updated_at, + hp.rshares, + hp.is_hidden, + hp.is_grayed, + hp.votes + FROM hive_posts hp + WHERE hp.id IN :ids""" #votes result = await db.query_all(sql, ids=tuple(ids)) authors = set() @@ -79,7 +91,7 @@ async def comments_by_id(db, ids, observer=None): for row in result: top_votes, observer_vote = _top_votes(row, 5, observer) post = { - 'id': row['post_id'], + 'id': row['id'], 'author': row['author'], 'url': row['author'] + '/' + row['permlink'], 'depth': row['depth'], @@ -108,11 +120,29 @@ async def posts_by_id(db, ids, observer=None, lite=True): """Given a list of post ids, returns lite post objects in the same order.""" # pylint: disable=too-many-locals - sql = """SELECT post_id, author, permlink, title, img_url, payout, promoted, - created_at, payout_at, is_nsfw, rshares, votes, - is_muted, is_invalid, %s - FROM hive_posts_cache WHERE post_id IN :ids""" - fields = ['preview'] if lite else ['body', 'updated_at', 'json'] + sql = """ + SELECT + hp.id, + ha_a.name as author, + hpd_p.permlink as permlink, + hpd.title as title, + hpd.img_url, + hp.payout, + hp.promoted, + hp.created_at, + hp.payout_at, + hp.is_nsfw, + hp.rshares, + hp.votes, + hp.is_muted, + hp.is_invalid, + %s + FROM hive_posts hp + INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id + INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id + LEFT JOIN hive_post_data hpd ON hpd.id = hp.id + WHERE id IN :ids""" + fields = ['hpd.preview'] if lite else ['hpd.body', 'updated_at', 'hpd.json'] sql = sql % (', '.join(fields)) reblogged_ids = await _reblogged_ids(db, observer, ids) if observer else [] @@ -125,7 +155,7 @@ async def posts_by_id(db, ids, observer=None, lite=True): for row in await db.query_all(sql, ids=tuple(ids)): assert not row['is_muted'] assert not row['is_invalid'] - pid = row['post_id'] + pid = row['id'] top_votes, observer_vote = _top_votes(row, 5, observer) obj = { @@ -158,7 +188,7 @@ async def posts_by_id(db, ids, observer=None, lite=True): } authors.add(obj['author']) - by_id[row['post_id']] = obj + by_id[row['id']] = obj # in rare cases of cache inconsistency, recover and warn missed = set(ids) - by_id.keys() @@ -172,8 +202,12 @@ async def posts_by_id(db, ids, observer=None, lite=True): 'accounts': await accounts_by_name(db, authors, observer, lite=True)} async def _append_flags(db, posts): - sql = """SELECT id, parent_id, community_id, category, is_muted, is_valid - FROM hive_posts WHERE id IN :ids""" + sql = """ + SELECT + id, parent_id, community_id, hcd.category as category, is_muted, is_valid + FROM hive_posts hp + LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id + WHERE id IN :ids""" for row in await db.query_all(sql, ids=tuple(posts.keys())): post = posts[row['id']] post['parent_id'] = row['parent_id'] diff --git a/hive/server/hive_api/thread.py b/hive/server/hive_api/thread.py index 27d98423528e2874e31e9c827ee013d99a155ee1..b3f03f7c96deb08faf22ea6e029b9adbfe28d391 100644 --- a/hive/server/hive_api/thread.py +++ b/hive/server/hive_api/thread.py @@ -29,7 +29,7 @@ async def fetch_more_children(context, root_id, last_sibling_id, sort='top', valid_limit(limit, 50), observer) -_SORTS = dict(hot='sc_hot', top='payout', new='post_id') +_SORTS = dict(hot='sc_hot', top='payout', new='id') async def _fetch_children(db, root_id, start_id, sort, limit, observer=None): """Fetch truncated children from tree.""" mutes = set() @@ -41,10 +41,10 @@ async def _fetch_children(db, root_id, start_id, sort, limit, observer=None): # find most relevant ids in subset seek = '' if start_id: - seek = """AND %s < (SELECT %s FROM hive_posts_cache - WHERE post_id = :start_id)""" % (field, field) - sql = """SELECT post_id FROM hive_posts_cache - WHERE post_id IN :ids %s ORDER BY %s DESC + seek = """AND %s < (SELECT %s FROM hive_posts + WHERE id = :start_id)""" % (field, field) + sql = """SELECT id FROM hive_posts + WHERE id IN :ids %s ORDER BY %s DESC LIMIT :limit""" % (seek, field) relevant_ids = await db.query_col(sql, ids=tuple(parent.keys()), start_id=start_id, limit=limit) diff --git a/hive/server/serve.py b/hive/server/serve.py index c83053f749c4fd70b9bb991e3799f1f39bd26558..ad75d2c0d6634e08e762cef19a314e23b79bb609 100644 --- a/hive/server/serve.py +++ b/hive/server/serve.py @@ -26,6 +26,11 @@ from hive.server.hive_api import community as hive_api_community from hive.server.hive_api import notify as hive_api_notify from hive.server.hive_api import stats as hive_api_stats +from hive.server.follow_api import methods as follow_api +from hive.server.tags_api import methods as tags_api + +from hive.server.database_api import methods as database_api + from hive.server.db import Db # pylint: disable=too-many-lines @@ -73,6 +78,7 @@ def build_methods(): condenser_api.get_blog_entries, condenser_api.get_account_reputations, condenser_api.get_reblogged_by, + condenser_api.get_accounts )}) # dummy methods -- serve informational error @@ -90,6 +96,9 @@ def build_methods(): 'follow_api.get_blog': condenser_api.get_blog, 'follow_api.get_blog_entries': condenser_api.get_blog_entries, 'follow_api.get_reblogged_by': condenser_api.get_reblogged_by, + 'follow_api.get_feed_entries': follow_api.get_feed_entries, + 'follow_api.get_feed': follow_api.get_feed, + 'follow_api.get_blog_authors': follow_api.get_blog_authors }) # tags_api aliases @@ -105,6 +114,12 @@ def build_methods(): 'tags_api.get_discussions_by_author_before_date': condenser_api.get_discussions_by_author_before_date, 'tags_api.get_post_discussions_by_payout': condenser_api.get_post_discussions_by_payout, 'tags_api.get_comment_discussions_by_payout': condenser_api.get_comment_discussions_by_payout, + 'tags_api.get_active_votes' : tags_api.get_active_votes, + 'tags_api.get_tags_used_by_author' : tags_api.get_tags_used_by_author, + 'tags_api.get_discussions_by_active' : tags_api.get_discussions_by_active, + 'tags_api.get_discussions_by_cashout' : tags_api.get_discussions_by_cashout, + 'tags_api.get_discussions_by_votes' : tags_api.get_discussions_by_votes, + 'tags_api.get_discussions_by_children' : tags_api.get_discussions_by_children }) # legacy `call` style adapter @@ -136,6 +151,12 @@ def build_methods(): hive_api_community.list_all_subscriptions, )}) + # database_api methods + methods.add(**{ + 'database_api.list_comments' : database_api.list_comments, + 'database_api.find_comments' : database_api.find_comments + }) + return methods def truncate_response_log(logger): diff --git a/hive/server/tags_api/__init__.py b/hive/server/tags_api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e21a4b22e906d06b68cafb38173f99bad13e7d4d --- /dev/null +++ b/hive/server/tags_api/__init__.py @@ -0,0 +1 @@ +""" Tags api """ \ No newline at end of file diff --git a/hive/server/tags_api/methods.py b/hive/server/tags_api/methods.py new file mode 100644 index 0000000000000000000000000000000000000000..3f20f52ed6c68f0ddf0e6367bb8b0517ec37feaf --- /dev/null +++ b/hive/server/tags_api/methods.py @@ -0,0 +1,60 @@ +from hive.server.common.helpers import ( + return_error_info, + valid_account, + valid_permlink) + + +@return_error_info +async def get_active_votes(context, author: str, permlink: str): + """ Returns all votes for the given post. """ + valid_account(author) + valid_permlink(permlink) + db = context['db'] + sql = """ + SELECT + ha_v.name as voter + ha_a.name as author + hpd.permlink as permlink + weight + rshares + vote_percent + last_update + num_changes + FROM + hive_votes hv + INNER JOIN hive_accounts ha_v ON ha_v.id = hv.voter_id + INNER JOIN hive_accounts ha_a ON ha_a.id = hv.author_id + INNER JOIN hive_permlink_data hpd ON hpd.id = hv.permlink_id + WHERE ha_a.name = :author AND hpd.permlink = :permlink + """ + ret = await db.query_all(sql, author=author, permlink=permlink) + return ret + +@return_error_info +async def get_tags_used_by_author(context, author: str): + """ Returns a list of tags used by an author. """ + raise NotImplementedError() + +@return_error_info +async def get_discussions_by_active(context, tag: str, limit: int, filter_tags: list, + select_authors: list, select_tags: list, truncate_body: int): + """ Returns a list of discussions based on active. """ + raise NotImplementedError() + +@return_error_info +async def get_discussions_by_cashout(context, tag: str, limit: int, filter_tags: list, + select_authors: list, select_tags: list, truncate_body: int): + """ Returns a list of discussions by cashout. """ + raise NotImplementedError() + +@return_error_info +async def get_discussions_by_votes(context, tag: str, limit: int, filter_tags: list, + select_authors: list, select_tags: list, truncate_body: int): + """ Returns a list of discussions by votes. """ + raise NotImplementedError() + +@return_error_info +async def get_discussions_by_children(context, tag: str, limit: int, filter_tags: list, + select_authors: list, select_tags: list, truncate_body: int): + """ Returns a list of discussions by children. """ + raise NotImplementedError() diff --git a/hive/steem/client.py b/hive/steem/client.py index f8efb7b3cd03a28ce601a5307ef00d6556a0adb4..84a3f5ccf91bae03fe1d7dbb704b75657896c05d 100644 --- a/hive/steem/client.py +++ b/hive/steem/client.py @@ -1,4 +1,5 @@ """Tight and reliable steem API client for hive indexer.""" +import logging from time import perf_counter as perf from decimal import Decimal @@ -8,6 +9,8 @@ from hive.utils.normalize import parse_amount, steem_amount, vests_amount from hive.steem.http_client import HttpClient from hive.steem.block.stream import BlockStream +logger = logging.getLogger(__name__) + class SteemClient: """Handles upstream calls to jussi/steemd, with batching and retrying.""" # dangerous default value of url but it should be fine since we are not writting to it @@ -21,10 +24,11 @@ class SteemClient: self._max_workers = max_workers self._client = dict() for endpoint, endpoint_url in url.items(): - print("Endpoint {} will be routed to node {}".format(endpoint, endpoint_url)) + logger.info("Endpoint %s will be routed to node %s" % (endpoint, endpoint_url)) self._client[endpoint] = HttpClient(nodes=[endpoint_url]) - def get_accounts(self, accounts): + def get_accounts(self, acc): + accounts = [v for v in acc if v != ''] """Fetch multiple accounts by name.""" assert accounts, "no accounts passed to get_accounts" assert len(accounts) <= 1000, "max 1000 accounts" @@ -44,11 +48,7 @@ class SteemClient: def get_content_batch(self, tuples): """Fetch multiple comment objects.""" - posts = self.__exec_batch('get_content', tuples) - # TODO: how are we ensuring sequential results? need to set and sort id. - for post in posts: # sanity-checking jussi responses - assert 'author' in post, "invalid post: %s" % post - return posts + raise NotImplementedError("get_content is not implemented in hived") def get_block(self, num, strict=True): """Fetches a single block. @@ -94,7 +94,8 @@ class SteemClient: 'confidential_sbd_supply', 'total_reward_fund_steem', 'total_reward_shares2'] for key in unused: - del dgpo[key] + if key in dgpo: + del dgpo[key] return { 'dgpo': dgpo, @@ -104,7 +105,7 @@ class SteemClient: @staticmethod def _get_steem_per_mvest(dgpo): - steem = steem_amount(dgpo['total_vesting_fund_steem']) + steem = steem_amount(dgpo['total_vesting_fund_hive']) mvests = vests_amount(dgpo['total_vesting_shares']) / Decimal(1e6) return "%.6f" % (steem / mvests) @@ -112,15 +113,20 @@ class SteemClient: # TODO: add latest feed price: get_feed_history.price_history[0] feed = self.__exec('get_feed_history')['current_median_history'] units = dict([parse_amount(feed[k])[::-1] for k in ['base', 'quote']]) - price = units['HBD'] / units['HIVE'] + if 'TBD' in units and 'TESTS' in units: + price = units['TBD'] / units['TESTS'] + else: + price = units['HBD'] / units['HIVE'] return "%.6f" % price def _get_steem_price(self): orders = self.__exec('get_order_book', [1]) - ask = Decimal(orders['asks'][0]['real_price']) - bid = Decimal(orders['bids'][0]['real_price']) - price = (ask + bid) / 2 - return "%.6f" % price + if orders['asks'] and orders['bids']: + ask = Decimal(orders['asks'][0]['real_price']) + bid = Decimal(orders['bids'][0]['real_price']) + price = (ask + bid) / 2 + return "%.6f" % price + return "0" def get_blocks_range(self, lbound, ubound): """Retrieves blocks in the range of [lbound, ubound).""" @@ -136,6 +142,56 @@ class SteemClient: return [blocks[x] for x in block_nums] + def get_virtual_operations(self, block): + """ Get virtual ops from block """ + result = self.__exec('get_ops_in_block', {"block_num":block, "only_virtual":True}) + tracked_ops = ['curation_reward_operation', 'author_reward_operation', 'comment_reward_operation', 'effective_comment_vote_operation'] + ret = [] + result = result['ops'] if 'ops' in result else [] + for vop in result: + if vop['op']['type'] in tracked_ops: + ret.append(vop['op']) + return ret + + def enum_virtual_ops(self, begin_block, end_block): + """ Get virtual ops for range of blocks """ + ret = {} + + from_block = begin_block + + #According to definition of hive::plugins::acount_history::enum_vops_filter: + + author_reward_operation = 0x000002 + curation_reward_operation = 0x000004 + comment_reward_operation = 0x000008 + effective_comment_vote_operation = 0x400000 + + tracked_ops_filter = curation_reward_operation | author_reward_operation | comment_reward_operation | effective_comment_vote_operation + tracked_ops = ['curation_reward_operation', 'author_reward_operation', 'comment_reward_operation', 'effective_comment_vote_operation'] + + resume_on_operation = 0 + + while from_block < end_block: + call_result = self.__exec('enum_virtual_ops', {"block_range_begin":from_block, "block_range_end":end_block + , "group_by_block": True, "operation_begin": resume_on_operation, "limit": 1000, "filter": tracked_ops_filter + }) + + ret = {opb["block"] : {"timestamp":opb["timestamp"], "ops":[op["op"] for op in opb["ops"]]} for opb in call_result["ops_by_block"]} + + resume_on_operation = call_result['next_operation_begin'] if 'next_operation_begin' in call_result else 0 + + next_block = call_result['next_block_range_begin'] + + # Move to next block only if operations from current one have been processed completely. + from_block = next_block + + return ret + + def get_comment_pending_payouts(self, comments): + """ Get comment pending payout data """ + ret = self.__exec('get_comment_pending_payouts', {'comments':comments}) + return ret['cashout_infos'] + def __exec(self, method, params=None): """Perform a single steemd call.""" start = perf() diff --git a/hive/steem/http_client.py b/hive/steem/http_client.py index c0bb7ebf99d0bf9d021b0e8318d63ef02fcf4662..63bcca4e9b26a3d1b53133088e6efa19a477e8b0 100644 --- a/hive/steem/http_client.py +++ b/hive/steem/http_client.py @@ -84,11 +84,13 @@ class HttpClient(object): METHOD_API = dict( lookup_accounts='condenser_api', get_block='block_api', - get_content='condenser_api', get_accounts='condenser_api', get_order_book='condenser_api', get_feed_history='condenser_api', get_dynamic_global_properties='database_api', + get_comment_pending_payouts='database_api', + get_ops_in_block='account_history_api', + enum_virtual_ops='account_history_api' ) def __init__(self, nodes, **kwargs): @@ -145,7 +147,8 @@ class HttpClient(object): body_data = json.dumps(body, ensure_ascii=False).encode('utf8') tries = 0 - while tries < 100: + # changed number of tries to 25 + while tries < 25: tries += 1 secs = -1 info = None diff --git a/hive/utils/normalize.py b/hive/utils/normalize.py index 4c8158535a25522d9889b5fc5b3f2cb2217e9c98..5df8c885f5042ab5a6609b1e512fd399ec735731 100644 --- a/hive/utils/normalize.py +++ b/hive/utils/normalize.py @@ -13,6 +13,16 @@ NAI_MAP = { '@@000000037': 'VESTS', } +def escape_characters(text): + """ Escape special charactes """ + ret = str(text) + ret = ret.replace("\\", "\\\\") + ret = ret.replace("'", "''") + ret = ret.replace("%", '%%') + ret = ret.replace("_", "\\_") + ret = ret.replace(":", "\\:") + return ret + def vests_amount(value): """Returns a decimal amount, asserting units are VESTS""" return parse_amount(value, 'VESTS') @@ -180,3 +190,13 @@ def int_log_level(str_log_level): if not isinstance(log_level, int): raise ValueError('Invalid log level: %s' % str_log_level) return log_level + +def asset_to_hbd_hive(price, asset): + """ Converts hive to hbd and hbd to hive based on price """ + if asset['nai'] == price['base']['nai']: + result = int(asset['amount']) * int(price['quote']['amount']) / int(price['base']['amount']) + return {'amount' : result, 'nai' : price['quote']['nai'], 'precision' : price['quote']['precision']} + elif asset['nai'] == price['quote']['nai']: + result = int(asset['amount']) * int(price['base']['amount']) / int(price['quote']['amount']) + return {'amount' : result, 'nai' : price['base']['nai'], 'precision' : price['base']['precision']} + raise ValueError("Asset not supported") diff --git a/hive/utils/post.py b/hive/utils/post.py index 0d2b316f94e966c680e4ad78c70e414f170b3327..9c3b8f1a176ef3aaa69e58354b4941c060c2b642 100644 --- a/hive/utils/post.py +++ b/hive/utils/post.py @@ -7,6 +7,7 @@ import ujson as json from funcy.seqs import first, distinct from hive.utils.normalize import sbd_amount, rep_log10, safe_img_url, parse_time, utc_timestamp +from hive.indexer.votes import Votes def mentions(body): """Given a post body, return proper @-mentioned account names.""" @@ -31,7 +32,7 @@ def post_to_internal(post, post_id, level='insert', promoted=None): #post['gray'] = core['is_muted'] #post['hide'] = not core['is_valid'] - values = [('post_id', post_id)] + values = [('id', post_id)] # immutable; write only once (*edge case: undeleted posts) if level == 'insert': @@ -44,6 +45,7 @@ def post_to_internal(post, post_id, level='insert', promoted=None): # always write, unless simple vote update if level in ['insert', 'payout', 'update']: basic = post_basic(post) + legacy_data = post_legacy(post) values.extend([ ('community_id', post['community_id']), # immutable* ('created_at', post['created']), # immutable* @@ -58,7 +60,20 @@ def post_to_internal(post, post_id, level='insert', promoted=None): ('is_full_power', basic['is_full_power']), ('is_paidout', basic['is_paidout']), ('json', json.dumps(basic['json_metadata'])), - ('raw_json', json.dumps(post_legacy(post))), + #('raw_json', json.dumps(legacy_data)), + ('parent_author', legacy_data['parent_author']), + ('parent_permlink', legacy_data['parent_permlink']), + ('curator_payout_value', legacy_data['curator_payout_value']), + ('root_author', legacy_data['root_author']), + ('root_permlink', legacy_data['root_permlink']), + ('max_accepted_payout', legacy_data['max_accepted_payout']), + ('percent_hbd', legacy_data['percent_hbd']), + ('allow_replies', legacy_data['allow_replies']), + ('allow_votes', legacy_data['allow_votes']), + ('allow_curation_rewards', legacy_data['allow_curation_rewards']), + ('beneficiaries', legacy_data['beneficiaries']), + ('url', legacy_data['url']), + ('root_title', legacy_data['root_title']), ]) # if there's a pending promoted value to write, pull it out @@ -85,7 +100,7 @@ def post_to_internal(post, post_id, level='insert', promoted=None): ('sc_trend', payout['sc_trend']), ('sc_hot', payout['sc_hot']), ('flag_weight', stats['flag_weight']), - ('total_votes', stats['total_votes']), + ('total_votes', Votes.get_vote_count(post['author'], post['permlink']),), ('up_votes', stats['up_votes']), ('is_hidden', stats['hide']), ('is_grayed', stats['gray']), @@ -148,7 +163,7 @@ def post_basic(post): is_payout_declined = True # payout entirely in SP - is_full_power = int(post['percent_steem_dollars']) == 0 + is_full_power = int(post['percent_hbd']) == 0 return { 'json_metadata': md, @@ -171,7 +186,7 @@ def post_legacy(post): """ _legacy = ['id', 'url', 'root_comment', 'root_author', 'root_permlink', 'root_title', 'parent_author', 'parent_permlink', - 'max_accepted_payout', 'percent_steem_dollars', + 'max_accepted_payout', 'percent_hbd', 'curator_payout_value', 'allow_replies', 'allow_votes', 'allow_curation_rewards', 'beneficiaries'] return {k: v for k, v in post.items() if k in _legacy} diff --git a/hive/utils/stats.py b/hive/utils/stats.py index fc7d3a5543196e5d08658efeb25726a3b5a14a8a..499c34c5a9aabd14f5ed8674fb1b1bf873db9b92 100644 --- a/hive/utils/stats.py +++ b/hive/utils/stats.py @@ -65,8 +65,16 @@ class StatsAbstract: log.info('%7s %9s %9s %9s', '-pct-', '-ttl-', '-avg-', '-cnt-') for call, ms, reqs in self.table(40): + try: + avg = ms/reqs + millisec = ms/self._ms + except ZeroDivisionError as ex: + avg = 0.0 + millisec = 0.0 + if reqs == 0: + reqs = 1 log.info("% 6.1f%% % 7dms % 9.2f % 8dx -- %s", - 100 * ms/self._ms, ms, ms/reqs, reqs, call) + 100 * millisec, ms, avg, reqs, call) self.clear() @@ -89,6 +97,9 @@ class SteemStats(StatsAbstract): 'get_order_book': 20, 'get_feed_history': 20, 'lookup_accounts': 1000, + 'get_comment_pending_payouts':1000, + 'get_ops_in_block':500, + 'enum_virtual_ops':1000 } def __init__(self): @@ -110,21 +121,27 @@ class SteemStats(StatsAbstract): class DbStats(StatsAbstract): """Tracks database query timings.""" SLOW_QUERY_MS = 250 + LOGGING_TRESHOLD = 50 def __init__(self): super().__init__('db') def check_timing(self, call, ms, batch_size): """Warn if any query is slower than defined threshold.""" - if ms > self.SLOW_QUERY_MS: - out = "[SQL][%dms] %s" % (ms, call[:250]) - log.warning(colorize(out)) + if ms > self.LOGGING_TRESHOLD: + log.warning("[SQL][%dms] %s", ms, call) + if ms > self.SLOW_QUERY_MS: + out = "[SQL][%dms] %s" % (ms, call[:250]) + log.warning(colorize(out)) class Stats: """Container for steemd and db timing data.""" PRINT_THRESH_MINS = 1 + COLLECT_DB_STATS = 0 + COLLECT_NODE_STATS = 0 + _db = DbStats() _steemd = SteemStats() _secs = 0.0 @@ -134,14 +151,16 @@ class Stats: @classmethod def log_db(cls, sql, secs): """Log a database query. Incoming SQL is normalized.""" - cls._db.add(_normalize_sql(sql), secs * 1000) - cls.add_secs(secs) + if cls.COLLECT_DB_STATS: + cls._db.add(_normalize_sql(sql), secs * 1000) + cls.add_secs(secs) @classmethod def log_steem(cls, method, secs, batch_size=1): """Log a steemd call.""" - cls._steemd.add(method, secs * 1000, batch_size) - cls.add_secs(secs) + if cls.COLLECT_NODE_STATS: + cls._steemd.add(method, secs * 1000, batch_size) + cls.add_secs(secs) @classmethod def log_idle(cls, secs): diff --git a/scripts/update_hivemind_db.sql b/scripts/update_hivemind_db.sql new file mode 100644 index 0000000000000000000000000000000000000000..20bad2b3261cf1afb16d25e33d3b6d2821b092e3 --- /dev/null +++ b/scripts/update_hivemind_db.sql @@ -0,0 +1,391 @@ +-- This script will upgrade hivemind database to new version +-- Authors: Dariusz Kędzierski +-- Created: 26-04-2020 +-- Last edit: 26-05-2020 + +CREATE TABLE IF NOT EXISTS hive_db_version ( + version VARCHAR(50) PRIMARY KEY, + notes VARCHAR(1024) +); + +DO $$ + BEGIN + RAISE NOTICE 'Upgrading database to version 1.0'; + IF EXISTS (SELECT version FROM hive_db_version WHERE version = '1.0') + THEN + RAISE EXCEPTION 'Database already in version 1.0'; + END IF; + END +$$ LANGUAGE plpgsql; + +-- Upgrade to version 1.0 +-- in this version we will move data from raw_json into separate columns +-- also will split hive_posts_cache to parts and then move all data to proper tables +-- also it will add needed indexes and procedures + +-- Update version info +INSERT INTO hive_db_version (version, notes) VALUES ('1.0', 'https://gitlab.syncad.com/blocktrades/hivemind/issues/5'); + +-- add special author value, empty author to accounts table +-- RAISE NOTICE 'add special author value, empty author to accounts table'; +INSERT INTO hive_accounts (name, created_at) VALUES ('', '1990-01-01T00:00:00'); + +-- Table to hold permlink dictionary, permlink is unique +-- RAISE NOTICE 'Table to hold permlink dictionary, permlink is unique'; +CREATE TABLE IF NOT EXISTS hive_permlink_data ( + id BIGSERIAL PRIMARY KEY NOT NULL, + permlink VARCHAR(255) NOT NULL CONSTRAINT hive_permlink_data_permlink UNIQUE +); +-- Populate hive_permlink_data +-- insert special permlink, empty permlink +-- RAISE NOTICE 'insert special permlink, empty permlink'; +INSERT INTO hive_permlink_data (permlink) VALUES (''); +-- run on permlink field of hive_posts_cache +-- RAISE NOTICE 'run on permlink field of hive_posts_cache'; +INSERT INTO hive_permlink_data (permlink) SELECT permlink FROM hive_posts ON CONFLICT (permlink) DO NOTHING; +-- we should also scan parent_permlink and root_permlink but we will do that on raw_json scan + +-- Table to hold category data, category is unique +-- RAISE NOTICE 'Table to hold category data, category is unique'; +CREATE TABLE IF NOT EXISTS hive_category_data ( + id SERIAL PRIMARY KEY NOT NULL, + category VARCHAR(255) NOT NULL CONSTRAINT hive_category_data_category UNIQUE +); +-- Populate hive_category_data +-- insert special category, empty category +-- RAISE NOTICE 'insert special category, empty category'; +INSERT INTO hive_category_data (category) VALUES (''); +-- run on category field of hive_posts_cache +-- RAISE NOTICE 'run on category field of hive_posts_cache'; +INSERT INTO hive_category_data (category) SELECT category FROM hive_posts ON CONFLICT (category) DO NOTHING; +-- Create indexes +CREATE INDEX IF NOT EXISTS hive_category_data_category_idx ON hive_category_data (category ASC); +CREATE INDEX IF NOT EXISTS hive_category_data_category_c_idx ON hive_category_data (category COLLATE "C" ASC); + +-- Table to hold post data +-- RAISE NOTICE 'Table to hold post data'; +CREATE TABLE IF NOT EXISTS hive_posts_new ( + id INT NOT NULL, + parent_id INT, + author_id INT NOT NULL, + permlink_id BIGINT NOT NULL, + category_id INT NOT NULL, + community_id INT, + created_at DATE NOT NULL, + depth SMALLINT DEFAULT '0', + is_deleted BOOLEAN DEFAULT '0', + is_pinned BOOLEAN DEFAULT '0', + is_muted BOOLEAN DEFAULT '0', + is_valid BOOLEAN DEFAULT '1', + promoted NUMERIC(10, 3) DEFAULT '0.0', + + -- important/index + children SMALLINT DEFAULT '0', + + -- basic/extended-stats + author_rep NUMERIC(6) DEFAULT '0.0', + flag_weight NUMERIC(6) DEFAULT '0.0', + total_votes INT DEFAULT '0', + up_votes INT DEFAULT '0', + + -- core stats/indexes + payout NUMERIC(10, 3) DEFAULT '0.0', + payout_at DATE DEFAULT '1970-01-01T00:00:00', + updated_at DATE DEFAULT '1970-01-01T00:00:00', + is_paidout BOOLEAN DEFAULT '0', + + -- ui flags/filters + is_nsfw BOOLEAN DEFAULT '0', + is_declined BOOLEAN DEFAULT '0', + is_full_power BOOLEAN DEFAULT '0', + is_hidden BOOLEAN DEFAULT '0', + is_grayed BOOLEAN DEFAULT '0', + + -- important indexes + rshares BIGINT DEFAULT '-1', + sc_trend NUMERIC(6) DEFAULT '0.0', + sc_hot NUMERIC(6) DEFAULT '0.0', + + total_payout_value VARCHAR(30) DEFAULT '', + author_rewards BIGINT DEFAULT '0', + + author_rewards_hive BIGINT DEFAULT '0', + author_rewards_hbd BIGINT DEFAULT '0', + author_rewards_vests BIGINT DEFAULT '0', + + children_abs_rshares BIGINT DEFAULT '0', + abs_rshares BIGINT DEFAULT '0', + vote_rshares BIGINT DEFAULT '0', + net_votes INT DEFAULT '0', + active DATE DEFAULT '1970-01-01T00:00:00', + last_payout DATE DEFAULT '1970-01-01T00:00:00', + cashout_time DATE DEFAULT '1970-01-01T00:00:00', + max_cashout_time DATE DEFAULT '1970-01-01T00:00:00', + reward_weight INT DEFAULT '0', + + -- columns from raw_json + parent_author_id INT DEFAULT '-1', + parent_permlink_id BIGINT DEFAULT '-1', + curator_payout_value VARCHAR(30) DEFAULT '', + root_author_id INT DEFAULT '-1', + root_permlink_id BIGINT DEFAULT '-1', + max_accepted_payout VARCHAR(30) DEFAULT '1000000.000 HBD', + percent_hbd INT DEFAULT '10000', + allow_replies BOOLEAN DEFAULT '1', + allow_votes BOOLEAN DEFAULT '1', + allow_curation_rewards BOOLEAN DEFAULT '1', + beneficiaries JSON DEFAULT '[]', + url TEXT DEFAULT '', + root_title VARCHAR(255) DEFAULT '' +); + +CREATE INDEX IF NOT EXISTS hive_posts_author_id_idx ON hive_posts_new (author_id); +CREATE INDEX IF NOT EXISTS hive_posts_permlink_id_idx ON hive_posts_new (permlink_id); + +-- Table to hold bulk post data +-- RAISE NOTICE 'Table to hold bulk post data'; +CREATE TABLE IF NOT EXISTS hive_post_data ( + id INT PRIMARY KEY NOT NULL, + title VARCHAR(255) NOT NULL, + preview VARCHAR(1024) NOT NULL, + img_url VARCHAR(1024) NOT NULL, + body TEXT, + json TEXT +); + +CREATE TABLE IF NOT EXISTS hive_votes ( + id BIGSERIAL PRIMARY KEY NOT NULL, + voter_id INT NOT NULL REFERENCES hive_accounts (id) ON DELETE RESTRICT, + author_id INT NOT NULL REFERENCES hive_accounts (id) ON DELETE RESTRICT, + permlink_id INT NOT NULL REFERENCES hive_permlink_data (id) ON DELETE RESTRICT, + weight BIGINT DEFAULT '0', + rshares BIGINT DEFAULT '0', + vote_percent INT DEFAULT '0', + last_update DATE DEFAULT '1970-01-01T00:00:00', + num_changes INT DEFAULT '0' +); + +CREATE INDEX IF NOT EXISTS hive_votes_voter_id_idx ON hive_votes (voter_id); +CREATE INDEX IF NOT EXISTS hive_votes_author_id_idx ON hive_votes (author_id); +CREATE INDEX IF NOT EXISTS hive_votes_permlink_id_idx ON hive_votes (permlink_id); +CREATE INDEX IF NOT EXISTS hive_votes_upvote_idx ON hive_votes (vote_percent) WHERE vote_percent > 0; +CREATE INDEX IF NOT EXISTS hive_votes_downvote_idx ON hive_votes (vote_percent) WHERE vote_percent < 0; + +-- Copy data from hive_posts table to new table +-- RAISE NOTICE 'Copy data from hive_posts table to new table'; +INSERT INTO hive_posts_new ( + id, + parent_id, + author_id, + permlink_id, + category_id, + community_id, + created_at, + depth, + is_deleted, + is_pinned, + is_muted, + is_valid, + promoted +) +SELECT + hp.id, + hp.parent_id, + (SELECT id FROM hive_accounts WHERE name = hp.author) as author_id, + (SELECT id FROM hive_permlink_data WHERE permlink = hp.permlink) as permlink_id, + (SELECT id FROM hive_category_data WHERE category = hp.category) as category_id, + hp.community_id, + hp.created_at, + hp.depth, + hp.is_deleted, + hp.is_pinned, + hp.is_muted, + hp.is_valid, + hp.promoted +FROM + hive_posts hp; + +-- Copy standard data to new posts table +-- RAISE NOTICE 'Copy standard data to new posts table'; +UPDATE hive_posts_new hpn SET ( + children, author_rep, flag_weight, total_votes, up_votes, payout, + payout_at, updated_at, is_paidout, is_nsfw, is_declined, is_full_power, + is_hidden, is_grayed, rshares, sc_trend, sc_hot) += + (SELECT + children, author_rep, flag_weight, total_votes, up_votes, payout, + payout_at, updated_at, is_paidout, is_nsfw, is_declined, is_full_power, + is_hidden, is_grayed, rshares, sc_trend, sc_hot FROM hive_posts_cache hpc WHERE hpn.id = hpc.post_id); + +-- Populate table hive_post_data with bulk data from hive_posts_cache +-- RAISE NOTICE 'Populate table hive_post_data with bulk data from hive_posts_cache'; +INSERT INTO hive_post_data (id, title, preview, img_url, body, votes, json) SELECT post_id, title, preview, img_url, body, json FROM hive_posts_cache; + +-- Populate hive_votes table +-- RAISE NOTICE 'Populate table hive_votes with bulk data from hive_posts_cache'; +INSERT INTO + hive_votes (voter_id, author_id, permlink_id, rshares, vote_percent) +SELECT + (SELECT id from hive_accounts WHERE name = vote_data.regexp_split_to_array[1]) AS voter_id, + (SELECT author_id FROM hive_posts WHERE id = vote_data.id) AS author_id, + (SELECT permlink_id FROM hive_posts WHERE id = vote_data.id) AS permlink_id, + (vote_data.regexp_split_to_array[2])::bigint AS rshares, + (vote_data.regexp_split_to_array[3])::int AS vote_percent +FROM + (SELECT + votes.id, regexp_split_to_array(votes.regexp_split_to_table::text, E',') + FROM + (SELECT id, regexp_split_to_table(votes::text, E'\n') + FROM hive_posts_cache WHERE votes IS NOT NULL AND votes != '') + AS votes) +AS vote_data; + + +-- Helper type for use with json_populate_record +-- RAISE NOTICE 'Creating legacy_comment_data table'; +CREATE TABLE legacy_comment_data ( + id BIGINT, + raw_json TEXT, + parent_author VARCHAR(16), + parent_permlink VARCHAR(255), + curator_payout_value VARCHAR(30), + root_author VARCHAR(16), + root_permlink VARCHAR(255), + max_accepted_payout VARCHAR(30), + percent_hbd INT, + allow_replies BOOLEAN, + allow_votes BOOLEAN, + allow_curation_rewards BOOLEAN, + beneficiaries JSON, + url TEXT, + root_title VARCHAR(255) +); + +-- RAISE NOTICE 'Creating legacy_comment_type table'; +CREATE TYPE legacy_comment_type AS ( + id BIGINT, + parent_author VARCHAR(16), + parent_permlink VARCHAR(255), + curator_payout_value VARCHAR(30), + root_author VARCHAR(16), + root_permlink VARCHAR(255), + max_accepted_payout VARCHAR(16), + percent_hbd INT, + allow_replies BOOLEAN, + allow_votes BOOLEAN, + allow_curation_rewards BOOLEAN, + beneficiaries JSON, + url TEXT, + root_title VARCHAR(255) +); + +-- RAISE NOTICE 'Copying raw_json data to temporaty table'; +INSERT INTO legacy_comment_data (id, raw_json) SELECT post_id, raw_json FROM hive_posts_cache; + +update legacy_comment_data lcd set (parent_author, parent_permlink, + curator_payout_value, root_author, root_permlink, max_accepted_payout, + percent_hbd, allow_replies, allow_votes, allow_curation_rewards, + beneficiaries, url, root_title) += +(SELECT parent_author, parent_permlink, + curator_payout_value, root_author, root_permlink, max_accepted_payout, + percent_steem_dollars, allow_replies, allow_votes, allow_curation_rewards, + beneficiaries, url, root_title from json_populate_record(null::legacy_comment_type, lcd.raw_json::json) +); + +-- RAISE NOTICE 'Copying parent_permlink data to proper colums'; +INSERT INTO hive_permlink_data (permlink) SELECT parent_permlink FROM legacy_comment_data ON CONFLICT (permlink) DO NOTHING; + +-- RAISE NOTICE 'Copying root_permlink data to proper colums'; +INSERT INTO hive_permlink_data (permlink) SELECT root_permlink FROM legacy_comment_data ON CONFLICT (permlink) DO NOTHING; + +-- RAISE NOTICE 'Moving raw json data data to proper colums in hive_posts'; +UPDATE hive_posts_new hpn SET + parent_author_id = (SELECT id FROM hive_accounts WHERE name = lcd.parent_author), + parent_permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = lcd.parent_permlink), + curator_payout_value = lcd.curator_payout_value, + root_author_id = (SELECT id FROM hive_accounts WHERE name = lcd.root_author), + root_permlink_id = (SELECT id FROM hive_permlink_data WHERE permlink = lcd.root_permlink), + max_accepted_payout = lcd.max_accepted_payout, + percent_hbd = lcd.percent_hbd, + allow_replies = lcd.allow_replies, + allow_votes = lcd.allow_votes, + allow_curation_rewards = lcd.allow_curation_rewards, + beneficiaries = lcd.beneficiaries, + url = lcd.url, + root_title = lcd.root_title +FROM (SELECT id, parent_author, parent_permlink, curator_payout_value, root_author, root_permlink, + max_accepted_payout, percent_hbd, allow_replies, allow_votes, allow_curation_rewards, + beneficiaries, url, root_title FROM legacy_comment_data) AS lcd +WHERE lcd.id = hpn.id; + +-- Drop and rename tables after data migration +-- RAISE NOTICE 'Droping tables'; +DROP TYPE IF EXISTS legacy_comment_type; +DROP TABLE IF EXISTS legacy_comment_data; +DROP TABLE IF EXISTS hive_posts_cache; +-- before deleting hive_posts we need to remove constraints +ALTER TABLE hive_payments DROP CONSTRAINT hive_payments_fk3; +ALTER TABLE hive_reblogs DROP CONSTRAINT hive_reblogs_fk2; +DROP TABLE IF EXISTS hive_posts; +-- now rename table +-- RAISE NOTICE 'Renaming hive_posts_new to hive_posts'; +ALTER TABLE hive_posts_new RENAME TO hive_posts; +-- in order to make id column a primary key we will need a sequence +CREATE SEQUENCE hive_posts_serial OWNED BY hive_posts.id; +-- and init that sequence from largest id + 1 +SELECT setval('hive_posts_serial', (SELECT max(id)+1 FROM hive_posts), false); +-- now set that sequence as id sequence for hive_posts +ALTER TABLE hive_posts ALTER COLUMN id set default nextval('hive_posts_serial'); +-- finally add primary key +ALTER TABLE hive_posts ADD PRIMARY KEY (id); +-- put constraints back +ALTER TABLE hive_payments ADD CONSTRAINT hive_payments_fk3 FOREIGN KEY (post_id) REFERENCES hive_posts(id); +ALTER TABLE hive_reblogs ADD CONSTRAINT hive_reblogs_fk2 FOREIGN KEY (post_id) REFERENCES hive_posts(id); + +ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_fk1 FOREIGN KEY (author_id) REFERENCES hive_accounts(id); +ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_fk3 FOREIGN KEY (parent_id) REFERENCES hive_posts(id); +ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_fk4 FOREIGN KEY (permlink_id) REFERENCES hive_permlink_data(id); +ALTER TABLE hive_posts ADD CONSTRAINT hive_posts_ux1 UNIQUE (author_id, permlink_id); + +-- Make indexes in hive_posts +-- RAISE NOTICE 'Creating indexes'; + +CREATE INDEX IF NOT EXISTS hive_posts_depth_idx ON hive_posts (depth); +CREATE INDEX IF NOT EXISTS hive_posts_parent_id_idx ON hive_posts (parent_id); +CREATE INDEX IF NOT EXISTS hive_posts_community_id_idx ON hive_posts (community_id); + +CREATE INDEX IF NOT EXISTS hive_posts_category_id_idx ON hive_posts (category_id); +CREATE INDEX IF NOT EXISTS hive_posts_payout_at_idx ON hive_posts (payout_at); +CREATE INDEX IF NOT EXISTS hive_posts_payout_at_idx2 ON hive_posts (payout_at) WHERE is_paidout = '0'; + +CREATE INDEX IF NOT EXISTS hive_posts_payout_idx ON hive_posts (payout); + +CREATE INDEX IF NOT EXISTS hive_posts_promoted_idx ON hive_posts (promoted); + +CREATE INDEX IF NOT EXISTS hive_posts_sc_trend_idx ON hive_posts (sc_trend); +CREATE INDEX IF NOT EXISTS hive_posts_sc_hot_idx ON hive_posts (sc_hot); + +CREATE INDEX IF NOT EXISTS hive_posts_created_at_idx ON hive_posts (created_at); + +-- Create a materialized view and associated index to significantly speedup query for hive_posts +DROP MATERIALIZED VIEW IF EXISTS hive_posts_a_p; + +CREATE MATERIALIZED VIEW hive_posts_a_p +AS +SELECT hp.id AS id, + ha_a.name AS author, + hpd_p.permlink AS permlink +FROM hive_posts hp +INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id +INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id +WITH DATA +; + +DROP INDEX IF EXISTS hive_posts_a_p_idx; + +CREATE unique index hive_posts_a_p_idx +ON hive_posts_a_p +(author collate "C", permlink collate "C") +; + diff --git a/tests/manual_tests/__init__.py b/tests/manual_tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/manual_tests/list_comments_by_author_last_update_test.py b/tests/manual_tests/list_comments_by_author_last_update_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c950f92f1488b3cf4ad5c91165a30f765371b0 --- /dev/null +++ b/tests/manual_tests/list_comments_by_author_last_update_test.py @@ -0,0 +1,20 @@ +#!/usr/bin/python3 + +from .test_base import run_test + +if __name__ == '__main__': + reference_hive_node_url = 'https://api.hive.blog' + test_hive_node_url = 'http://127.0.0.1:8080' + + payload = { + "jsonrpc":"2.0", + "method":"database_api.list_comments", + "params" : { + "start" : ['steemit', '1970-01-01T00:00:00', '', ''], + "limit" : 10, + "order" : 'by_author_last_update' + }, + "id":1 + } + + run_test(reference_hive_node_url, test_hive_node_url, payload, ['author', 'permlink', 'updated_at']) diff --git a/tests/manual_tests/list_comments_by_cashout_test.py b/tests/manual_tests/list_comments_by_cashout_test.py new file mode 100644 index 0000000000000000000000000000000000000000..69847e06e03cbdc5f137bbd22d38437477186dd3 --- /dev/null +++ b/tests/manual_tests/list_comments_by_cashout_test.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 +from .test_base import run_test + +if __name__ == '__main__': + reference_hive_node_url = 'https://api.hive.blog' + test_hive_node_url = 'http://127.0.0.1:8080' + + payload = { + "jsonrpc":"2.0", + "method":"database_api.list_comments", + "params" : { + "start" : ['1970-01-01T00:00:00', '', ''], + "limit" : 10, + "order" : 'by_cashout_time' + }, + "id":1 + } + + run_test(reference_hive_node_url, test_hive_node_url, payload, ['author', 'permlink', 'parent_author', 'parent_permlink', 'created']) diff --git a/tests/manual_tests/list_comments_by_parent_test.py b/tests/manual_tests/list_comments_by_parent_test.py new file mode 100644 index 0000000000000000000000000000000000000000..00a0ce719728e4df0cee8c6150ff6a4f87696325 --- /dev/null +++ b/tests/manual_tests/list_comments_by_parent_test.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 +from .test_base import run_test + +if __name__ == '__main__': + reference_hive_node_url = 'https://api.hive.blog' + test_hive_node_url = 'http://127.0.0.1:8080' + + payload = { + "jsonrpc":"2.0", + "method":"database_api.list_comments", + "params" : { + "start" : ['steemit', 'firstpost', '', ''], + "limit" : 10, + "order" : 'by_parent' + }, + "id":1 + } + + run_test(reference_hive_node_url, test_hive_node_url, payload, ['author', 'permlink', 'parent_author', 'parent_permlink', 'created']) diff --git a/tests/manual_tests/list_comments_by_permlink.py b/tests/manual_tests/list_comments_by_permlink.py new file mode 100644 index 0000000000000000000000000000000000000000..8822f581b0634803c3b234a65c896c34570d4c65 --- /dev/null +++ b/tests/manual_tests/list_comments_by_permlink.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 +from test_base import run_test + +if __name__ == '__main__': + reference_hive_node_url = 'https://api.hive.blog' + test_hive_node_url = 'http://127.0.0.1:8080' + + payload = { + "jsonrpc" : "2.0", + "method" : "database_api.list_comments", + "params" : { + "start" : ['', ''], + "limit" : 10, + "order" : 'by_permlink' + }, + "id" : 1 + } + + run_test(reference_hive_node_url, test_hive_node_url, payload, ['author', 'permlink']) diff --git a/tests/manual_tests/list_comments_by_root_test.py b/tests/manual_tests/list_comments_by_root_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5b9a6a0926141ecbd3d81d7f317953037bc4f1e4 --- /dev/null +++ b/tests/manual_tests/list_comments_by_root_test.py @@ -0,0 +1,20 @@ +#!/usr/bin/python3 +from .test_base import run_test + +if __name__ == '__main__': + reference_hive_node_url = 'https://api.hive.blog' + test_hive_node_url = 'http://127.0.0.1:8080' + + payload = { + "jsonrpc" : "2.0", + "method" : "database_api.list_comments", + "params" : { + "start" : ['steemit', 'firstpost', '', ''], + "limit" : 20, + "order" : 'by_root' + }, + "id":1 + } + + run_test(reference_hive_node_url, test_hive_node_url, payload, ['author', 'permlink', 'root_author', 'root_permlink', 'created']) + diff --git a/tests/manual_tests/list_comments_by_update_test.py b/tests/manual_tests/list_comments_by_update_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5c5bcd2742be431cbfd17f78599de43923a866da --- /dev/null +++ b/tests/manual_tests/list_comments_by_update_test.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 +from .test_base import run_test + +if __name__ == '__main__': + reference_hive_node_url = 'https://api.hive.blog' + test_hive_node_url = 'http://127.0.0.1:8080' + + payload = { + "jsonrpc":"2.0", + "method":"database_api.list_comments", + "params" : { + "start" : ['steemit', '1970-01-01T00:00:00', '', ''], + "limit" : 10, + "order" : 'by_update' + }, + "id":1 + } + + run_test(reference_hive_node_url, test_hive_node_url, payload, ['author', 'permlink', 'parent_author', 'parent_permlink', 'updated_at']) diff --git a/tests/manual_tests/test_base.py b/tests/manual_tests/test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..0e35eb29f1319d0850fc9160ebec8b326a5bb615 --- /dev/null +++ b/tests/manual_tests/test_base.py @@ -0,0 +1,24 @@ +def run_test(reference_node_url, test_node_url, payload, table_keys): + import prettytable + from requests import post + from json import dumps + + print("Querying reference node") + resp = post(reference_node_url, dumps(payload)) + + json = resp.json() + table = prettytable.PrettyTable() + table.field_names = table_keys + for row in json['result']['comments']: + table.add_row([row[key] for key in table_keys]) + print(table) + + print("Querying test node") + resp = post(test_node_url, dumps(payload)) + + json = resp.json() + table = prettytable.PrettyTable() + table.field_names = table_keys + for row in json['result']: + table.add_row([row[key] for key in table_keys]) + print(table) diff --git a/tests/server/test_server_database_api.py b/tests/server/test_server_database_api.py new file mode 100644 index 0000000000000000000000000000000000000000..3bcb65bd289ddff068464de1b95ef22383528f73 --- /dev/null +++ b/tests/server/test_server_database_api.py @@ -0,0 +1,52 @@ +import pytest +from hive.server.database_api.methods import list_comments +from hive.steem.client import SteemClient + +@pytest.fixture +def client(): + return SteemClient(url='https://api.hive.blog') + +def test_list_comments_by_cashout_time(client): + reference_data = await client.list_comments({"start":["1990-01-01T00:00:00","steemit","firstpost"],"limit":10,"order":"by_cashout_time"}) + test_data = await list_comments(["1990-01-01T00:00:00","steemit","firstpost"],10,"by_cashout_time") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = ['author','permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] + assert reference_data[idx]['cashout_time'] == test_data[idx]['payout_at'] + +def test_list_comments_by_permlink(client): + reference_data = await client.list_comments({"start":["steemit","firstpost"],"limit":10,"order":"by_permlink"}) + test_data = await list_comments(["steemit","firstpost"],10,"by_permlink") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = ['author','permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] + +def test_list_comments_by_root(client): + reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_root"}) + test_data = await list_comments(["steemit","firstpost","",""],10,"by_root") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = ['author','permlink','root_author','root_permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] + +def test_list_comments_by_parent(client): + reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_parent"}) + test_data = await list_comments(["steemit","firstpost","",""],10,"by_parent") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = ['author','permlink','parent_author','parent_permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] diff --git a/tests/utils/test_utils_post.py b/tests/utils/test_utils_post.py index be5159db1abc33056a71eae0b0b8cbc0e09e714f..b4621f9b62c82fe05d1d46e9b42195b673d731ea 100644 --- a/tests/utils/test_utils_post.py +++ b/tests/utils/test_utils_post.py @@ -73,7 +73,7 @@ POST_1 = { "parent_author": "", "parent_permlink": "spam", "pending_payout_value": "0.000 HBD", - "percent_steem_dollars": 10000, + "percent_hbd": 10000, "permlink": "june-spam", "promoted": "0.000 HBD", "reblogged_by": [], @@ -121,7 +121,7 @@ POST_2 = { "parent_author": "", "parent_permlink": "spam", "pending_payout_value": "0.000 HBD", - "percent_steem_dollars": 10000, + "percent_hbd": 10000, "permlink": "june-spam", "promoted": "0.000 HBD", "reblogged_by": [], @@ -182,7 +182,7 @@ def test_post_legacy(): 'max_accepted_payout': '1000000.000 HBD', 'parent_author': '', 'parent_permlink': 'spam', - 'percent_steem_dollars': 10000, + 'percent_hbd': 10000, 'root_author': 'test-safari', 'root_permlink': 'june-spam', 'root_title': 'June Spam',