diff --git a/hive/db/schema.py b/hive/db/schema.py index b6caeea5baa8314ed2abde5fd2aae7f8c71acbd1..a85383af39580a4ca1187e09860b3c68f2ea23ed 100644 --- a/hive/db/schema.py +++ b/hive/db/schema.py @@ -40,7 +40,7 @@ def build_metadata(): sa.Column('display_name', sa.String(20)), sa.Column('about', sa.String(160)), sa.Column('location', sa.String(30)), - sa.Column('website', sa.String(100)), + sa.Column('website', sa.String(1024)), sa.Column('profile_image', sa.String(1024), nullable=False, server_default=''), sa.Column('cover_image', sa.String(1024), nullable=False, server_default=''), @@ -197,7 +197,22 @@ def build_metadata(): sa.Column('body', TEXT), sa.Column('votes', TEXT), sa.Column('json', sa.Text), - sa.Column('raw_json', sa.Text), + #sa.Column('raw_json', sa.Text), + + sa.Column('legacy_id', sa.Integer, nullable=False, server_default='-1'), + sa.Column('parent_author', sa.String(16), nullable=False, server_default=''), + sa.Column('parent_permlink', sa.String(255), nullable=False, server_default=''), + sa.Column('curator_payout_value', sa.String(16), nullable=False, server_default=''), + sa.Column('root_author', sa.String(16), nullable=False, server_default=''), + sa.Column('root_permlink', sa.String(255), nullable=False, server_default=''), + sa.Column('max_accepted_payout', sa.String(16), nullable=False, server_default=''), + sa.Column('percent_steem_dollars', sa.Integer, nullable=False, server_default='-1'), + sa.Column('allow_replies', BOOLEAN, nullable=False, server_default='1'), + sa.Column('allow_votes', BOOLEAN, nullable=False, server_default='1'), + sa.Column('allow_curation_rewards', BOOLEAN, nullable=False, server_default='1'), + sa.Column('beneficiaries', sa.JSON, nullable=False, server_default=''), + sa.Column('url', sa.Text, nullable=False, server_default=''), + sa.Column('root_title', sa.String(255), nullable=False, server_default=''), # index: misc sa.Index('hive_posts_cache_ix3', 'payout_at', 'post_id', postgresql_where=sql_text("is_paidout = '0'")), # core: payout sweep diff --git a/hive/indexer/cached_post.py b/hive/indexer/cached_post.py index 25f731827daa6cd9eed3dbd72f1757791f30da04..fbcd5e03b23608b732f3121f27652bb53f8f18fe 100644 --- a/hive/indexer/cached_post.py +++ b/hive/indexer/cached_post.py @@ -487,6 +487,7 @@ class CachedPost: # always write, unless simple vote update if level in ['insert', 'payout', 'update']: basic = post_basic(post) + legacy_data = post_legacy(post) values.extend([ ('community_id', post['community_id']), # immutable* ('created_at', post['created']), # immutable* @@ -501,7 +502,21 @@ class CachedPost: ('is_full_power', basic['is_full_power']), ('is_paidout', basic['is_paidout']), ('json', json.dumps(basic['json_metadata'])), - ('raw_json', json.dumps(post_legacy(post))), + #('raw_json', json.dumps(legacy_data)), + ('legacy_id', legacy_data['id']), + ('parent_author', legacy_data['parent_author']), + ('parent_permlink', legacy_data['parent_permlink']), + ('curator_payout_value', legacy_data['curator_payout_value']), + ('root_author', legacy_data['root_author']), + ('root_permlink', legacy_data['root_permlink']), + ('max_accepted_payout', legacy_data['max_accepted_payout']), + ('percent_steem_dollars', legacy_data['percent_steem_dollars']), + ('allow_replies', legacy_data['allow_replies']), + ('allow_votes', legacy_data['allow_votes']), + ('allow_curation_rewards', legacy_data['allow_curation_rewards']), + ('beneficiaries', json.dumps(legacy_data['beneficiaries'])), + ('url', legacy_data['url']), + ('root_title', legacy_data['root_title']), ]) # if there's a pending promoted value to write, pull it out diff --git a/hive/server/bridge_api/objects.py b/hive/server/bridge_api/objects.py index 9846e3f4f2308c54b4b5aa256f6701deb717ef64..204078ca11a7e7df5fa591c3561c670e71ff895f 100644 --- a/hive/server/bridge_api/objects.py +++ b/hive/server/bridge_api/objects.py @@ -45,8 +45,11 @@ async def load_posts_keyed(db, ids, truncate_body=0): # fetch posts and associated author reps sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth, promoted, payout, payout_at, is_paidout, children, votes, - created_at, updated_at, rshares, raw_json, json, - is_hidden, is_grayed, total_votes, flag_weight + created_at, updated_at, rshares, json, + is_hidden, is_grayed, total_votes, flag_weight, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title FROM hive_posts_cache WHERE post_id IN :ids""" result = await db.query_all(sql, ids=tuple(ids)) author_map = await _query_author_map(db, result) @@ -216,29 +219,37 @@ def _condenser_post_object(row, truncate_body=0): 'total_votes': row['total_votes'], 'flag_weight': row['flag_weight']} # TODO: down_weight - # import fields from legacy object - assert row['raw_json'] - assert len(row['raw_json']) > 32 - raw_json = json.loads(row['raw_json']) - # TODO: move to core, or payout_details - post['beneficiaries'] = raw_json['beneficiaries'] - post['max_accepted_payout'] = raw_json['max_accepted_payout'] - post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] # TODO: systag? + #post['author_reputation'] = rep_to_raw(row['author_rep']) + + post['legacy_id'] = row['legacy_id'] + + post['root_author'] = row['root_author'] + post['root_permlink'] = row['root_permlink'] + + post['allow_replies'] = row['allow_replies'] + post['allow_votes'] = row['allow_votes'] + post['allow_curation_rewards'] = row['allow_curation_rewards'] + + post['url'] = row['url'] + post['root_title'] = row['root_title'] + post['beneficiaries'] = json.loads(row['beneficiaries']) + post['max_accepted_payout'] = row['max_accepted_payout'] + post['percent_steem_dollars'] = row['percent_steem_dollars'] + if paid: - curator_payout = sbd_amount(raw_json['curator_payout_value']) - post['author_payout_value'] = _amount(row['payout'] - curator_payout) + curator_payout = sbd_amount(row['curator_payout_value']) post['curator_payout_value'] = _amount(curator_payout) + post['total_payout_value'] = _amount(row['payout'] - curator_payout) + + # not used by condenser, but may be useful + # post['net_votes'] = post['total_votes'] - row['up_votes'] # TODO: re-evaluate if row['depth'] > 0: - post['parent_author'] = raw_json['parent_author'] - post['parent_permlink'] = raw_json['parent_permlink'] - post['title'] = 'RE: ' + raw_json['root_title'] # PostSummary & comment context - #else: - # post['parent_author'] = '' - # post['parent_permlink'] = '' - post['url'] = raw_json['url'] + post['parent_author'] = row['parent_author'] + post['parent_permlink'] = row['parent_permlink'] + post['title'] = 'RE: ' + row['root_title'] # PostSummary & comment context return post diff --git a/hive/server/common/objects.py b/hive/server/common/objects.py new file mode 100644 index 0000000000000000000000000000000000000000..1e731d960d6f2520247bf56c4325e6b6a37d01e2 --- /dev/null +++ b/hive/server/common/objects.py @@ -0,0 +1,96 @@ +from hive.server.common.helpers import json_date +from hive.utils.normalize import sbd_amount, rep_to_raw +import ujson as json + +def _amount(amount, asset='HBD'): + """Return a steem-style amount string given a (numeric, asset-str).""" + assert asset == 'HBD', 'unhandled asset %s' % asset + return "%.3f HBD" % amount + +def _hydrate_active_votes(vote_csv): + """Convert minimal CSV representation into steemd-style object.""" + if not vote_csv: + return [] + votes = [] + for line in vote_csv.split("\n"): + voter, rshares, percent, reputation = line.split(',') + votes.append(dict(voter=voter, + rshares=rshares, + percent=percent, + reputation=rep_to_raw(reputation))) + return votes + +async def query_author_map(db, posts): + """Given a list of posts, returns an author->reputation map.""" + if not posts: return {} + names = tuple({post['author'] for post in posts}) + sql = "SELECT id, name, reputation FROM hive_accounts WHERE name IN :names" + return {r['name']: r for r in await db.query_all(sql, names=names)} + +def condenser_post_object(row, truncate_body=0): + """Given a hive_posts_cache row, create a legacy-style post object.""" + paid = row['is_paidout'] + + # condenser#3424 mitigation + if not row['category']: + row['category'] = 'undefined' + + post = {} + post['post_id'] = row['post_id'] + post['author'] = row['author'] + post['permlink'] = row['permlink'] + post['category'] = row['category'] + + post['title'] = row['title'] + post['body'] = row['body'][0:truncate_body] if truncate_body else row['body'] + post['json_metadata'] = row['json'] + + post['created'] = json_date(row['created_at']) + post['last_update'] = json_date(row['updated_at']) + post['depth'] = row['depth'] + post['children'] = row['children'] + post['net_rshares'] = row['rshares'] + + post['last_payout'] = json_date(row['payout_at'] if paid else None) + post['cashout_time'] = json_date(None if paid else row['payout_at']) + post['total_payout_value'] = _amount(row['payout'] if paid else 0) + post['curator_payout_value'] = _amount(0) + post['pending_payout_value'] = _amount(0 if paid else row['payout']) + post['promoted'] = _amount(row['promoted']) + + post['replies'] = [] + post['body_length'] = len(row['body']) + post['active_votes'] = _hydrate_active_votes(row['votes']) + #post['author_reputation'] = rep_to_raw(row['author_rep']) + + post['legacy_id'] = row['legacy_id'] + + post['root_author'] = row['root_author'] + post['root_permlink'] = row['root_permlink'] + + post['allow_replies'] = row['allow_replies'] + post['allow_votes'] = row['allow_votes'] + post['allow_curation_rewards'] = row['allow_curation_rewards'] + + if row['depth'] > 0: + post['parent_author'] = row['parent_author'] + post['parent_permlink'] = row['parent_permlink'] + else: + post['parent_author'] = '' + post['parent_permlink'] = row['category'] + + post['url'] = row['url'] + post['root_title'] = row['root_title'] + post['beneficiaries'] = row['beneficiaries'] + post['max_accepted_payout'] = row['max_accepted_payout'] + post['percent_steem_dollars'] = row['percent_steem_dollars'] + + if paid: + curator_payout = sbd_amount(row['curator_payout_value']) + post['curator_payout_value'] = _amount(curator_payout) + post['total_payout_value'] = _amount(row['payout'] - curator_payout) + + # not used by condenser, but may be useful + # post['net_votes'] = post['total_votes'] - row['up_votes'] + + return post diff --git a/hive/server/condenser_api/cursor.py b/hive/server/condenser_api/cursor.py index 2e3b42ba437f74b92b74d46987daad292bb51928..56a79b632c85dc33b72c0f11a19f4c8c8a83287d 100644 --- a/hive/server/condenser_api/cursor.py +++ b/hive/server/condenser_api/cursor.py @@ -4,6 +4,7 @@ from datetime import datetime from dateutil.relativedelta import relativedelta from hive.utils.normalize import rep_to_raw +from json import loads # pylint: disable=too-many-lines @@ -374,3 +375,39 @@ async def pids_by_replies_to_account(db, start_author: str, start_permlink: str """ % seek return await db.query_col(sql, parent=parent_account, start_id=start_id, limit=limit) + +async def get_accounts(db, accounts: list): + """Returns accounts data for accounts given in list""" + ret = [] + + names = ["'{}'".format(a) for a in accounts] + sql = """SELECT created_at, reputation, display_name, about, + location, website, profile_image, cover_image, followers, following, + proxy, post_count, proxy_weight, vote_weight, rank, + lastread_at, active_at, cached_at, raw_json + FROM hive_accounts WHERE name IN ({})""".format(",".join(names)) + + result = await db.query_all(sql) + for row in result: + account_data = dict(loads(row.raw_json)) + account_data['created_at'] = row.created_at.isoformat() + account_data['reputation'] = row.reputation + account_data['display_name'] = row.display_name + account_data['about'] = row.about + account_data['location'] = row.location + account_data['website'] = row.website + account_data['profile_image'] = row.profile_image + account_data['cover_image'] = row.cover_image + account_data['followers'] = row.followers + account_data['following'] = row.following + account_data['proxy'] = row.proxy + account_data['post_count'] = row.post_count + account_data['proxy_weight'] = row.proxy_weight + account_data['vote_weight'] = row.vote_weight + account_data['rank'] = row.rank + account_data['lastread_at'] = row.lastread_at.isoformat() + account_data['active_at'] = row.active_at.isoformat() + account_data['cached_at'] = row.cached_at.isoformat() + ret.append(account_data) + + return ret diff --git a/hive/server/condenser_api/methods.py b/hive/server/condenser_api/methods.py index fe0a5bb9cf730659f0430aa8c888eca5e98795f2..64f8815d354a18e0d6aec622b61ba34621465dc7 100644 --- a/hive/server/condenser_api/methods.py +++ b/hive/server/condenser_api/methods.py @@ -1,5 +1,5 @@ """Steemd/condenser_api compatibility layer API methods.""" - +from json import loads from functools import wraps import hive.server.condenser_api.cursor as cursor @@ -118,7 +118,10 @@ async def get_content_replies(context, author: str, permlink: str): sql = """SELECT post_id, author, permlink, title, body, category, depth, promoted, payout, payout_at, is_paidout, children, votes, - created_at, updated_at, rshares, raw_json, json + created_at, updated_at, rshares, json, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title FROM hive_posts_cache WHERE post_id IN ( SELECT hp2.id FROM hive_posts hp2 WHERE hp2.is_deleted = '0' AND @@ -404,3 +407,12 @@ async def _get_blog(db, account: str, start_index: int, limit: int = None): idx -= 1 return out + +@return_error_info +async def get_accounts(context, accounts: list): + """Returns accounts data for accounts given in list""" + print("Hivemind native get_accounts") + assert accounts, "Empty parameters are not supported" + assert len(accounts) < 1000, "Query exceeds limit" + + return await cursor.get_accounts(context['db'], accounts) diff --git a/hive/server/condenser_api/objects.py b/hive/server/condenser_api/objects.py index 725d804bbafae4f4eec459beccd3a9e0511de3c6..838df98db0d7ba729ad302862fd1c8a645b871d8 100644 --- a/hive/server/condenser_api/objects.py +++ b/hive/server/condenser_api/objects.py @@ -42,7 +42,10 @@ async def load_posts_keyed(db, ids, truncate_body=0): # fetch posts and associated author reps sql = """SELECT post_id, author, permlink, title, body, category, depth, promoted, payout, payout_at, is_paidout, children, votes, - created_at, updated_at, rshares, raw_json, json + created_at, updated_at, rshares, json, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title FROM hive_posts_cache WHERE post_id IN :ids""" result = await db.query_all(sql, ids=tuple(ids)) author_reps = await _query_author_rep_map(db, result) @@ -164,34 +167,35 @@ def _condenser_post_object(row, truncate_body=0): post['active_votes'] = _hydrate_active_votes(row['votes']) post['author_reputation'] = rep_to_raw(row['author_rep']) - # import fields from legacy object - assert row['raw_json'] - assert len(row['raw_json']) > 32 - raw_json = json.loads(row['raw_json']) + post['legacy_id'] = row['legacy_id'] + + post['root_author'] = row['root_author'] + post['root_permlink'] = row['root_permlink'] + + post['allow_replies'] = row['allow_replies'] + post['allow_votes'] = row['allow_votes'] + post['allow_curation_rewards'] = row['allow_curation_rewards'] if row['depth'] > 0: - post['parent_author'] = raw_json['parent_author'] - post['parent_permlink'] = raw_json['parent_permlink'] + post['parent_author'] = row['parent_author'] + post['parent_permlink'] = row['parent_permlink'] else: post['parent_author'] = '' post['parent_permlink'] = row['category'] - post['url'] = raw_json['url'] - post['root_title'] = raw_json['root_title'] - post['beneficiaries'] = raw_json['beneficiaries'] - post['max_accepted_payout'] = raw_json['max_accepted_payout'] - post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] + post['url'] = row['url'] + post['root_title'] = row['root_title'] + post['beneficiaries'] = json.loads(row['beneficiaries']) + post['max_accepted_payout'] = row['max_accepted_payout'] + post['percent_steem_dollars'] = row['percent_steem_dollars'] if paid: - curator_payout = sbd_amount(raw_json['curator_payout_value']) + curator_payout = sbd_amount(row['curator_payout_value']) post['curator_payout_value'] = _amount(curator_payout) post['total_payout_value'] = _amount(row['payout'] - curator_payout) # not used by condenser, but may be useful - #post['net_votes'] = post['total_votes'] - row['up_votes'] - #post['allow_replies'] = raw_json['allow_replies'] - #post['allow_votes'] = raw_json['allow_votes'] - #post['allow_curation_rewards'] = raw_json['allow_curation_rewards'] + # post['net_votes'] = post['total_votes'] - row['up_votes'] return post diff --git a/hive/server/database_api/__init__.py b/hive/server/database_api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..609e45795e4f2fd34c029573df6487bd9d366dec --- /dev/null +++ b/hive/server/database_api/__init__.py @@ -0,0 +1 @@ +"""Hive database_api methods and support.""" diff --git a/hive/server/database_api/methods.py b/hive/server/database_api/methods.py new file mode 100644 index 0000000000000000000000000000000000000000..9aef5a97a293807d48165fa19091aa5e9a66a8c7 --- /dev/null +++ b/hive/server/database_api/methods.py @@ -0,0 +1,90 @@ +# pylint: disable=too-many-arguments,line-too-long,too-many-lines +from hive.server.common.helpers import return_error_info, valid_limit +from hive.server.common.objects import condenser_post_object + +@return_error_info +async def list_comments(context, start: list, limit: int, order: str): + """Returns all comments, starting with the specified options.""" + print("Hivemind native list_comments") + supported_order_list = ['by_cashout_time', 'by_permlink', 'by_root', 'by_parent'] + assert order in supported_order_list, "Unsupported order, valid orders {}".format(supported_order_list) + limit = valid_limit(limit, 1000) + db = context['db'] + + comments = [] + if order == 'by_cashout_time': + assert len(start) == 3, "Expecting three arguments" + payout_time = start[0] + author = start[1] + permlink = start[2] + post_id = 0 + if author or permlink: + sql = """SELECT post_id FROM hive_posts_cache WHERE author >= :author AND permlink >= :permlink LIMIT 1""" + result = await db.query_row(sql, author=author, permlink=permlink) + post_id = result.post_id + sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth, + promoted, payout, payout_at, is_paidout, children, votes, + created_at, updated_at, rshares, json, + is_hidden, is_grayed, total_votes, flag_weight, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title + FROM hive_posts_cache WHERE payout_at >= :start AND post_id >= :post_id ORDER BY payout_at ASC, post_id ASC LIMIT :limit""" + result = await db.query_all(sql, start=payout_time, limit=limit, post_id=post_id) + for row in result: + comments.append(condenser_post_object(dict(row))) + elif order == 'by_permlink': + assert len(start) == 2, "Expecting two arguments" + author = start[0] + permlink = start[1] + sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth, + promoted, payout, payout_at, is_paidout, children, votes, + created_at, updated_at, rshares, json, + is_hidden, is_grayed, total_votes, flag_weight, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title + FROM hive_posts_cache WHERE author >= :author AND permlink >= :permlink ORDER BY author ASC, permlink ASC, post_id ASC LIMIT :limit""" + result = await db.query_all(sql, author=author, permlink=permlink, limit=limit) + for row in result: + comments.append(condenser_post_object(dict(row))) + elif order == 'by_root': + assert len(start) == 4, "Expecting 4 arguments" + root_author = start[0] + root_permlink = start[1] + + child_author = start[2] + child_permlink = start[3] + + sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth, + promoted, payout, payout_at, is_paidout, children, votes, + created_at, updated_at, rshares, json, + is_hidden, is_grayed, total_votes, flag_weight, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title + FROM get_rows_by_root(:root_author, :root_permlink, :child_author, :child_permlink) ORDER BY post_id ASC LIMIT :limit""" + result = await db.query_all(sql, root_author=root_author, root_permlink=root_permlink, child_author=child_author, child_permlink=child_permlink, limit=limit) + for row in result: + comments.append(condenser_post_object(dict(row))) + elif order == 'by_parent': + assert len(start) == 4, "Expecting 4 arguments" + + parent_author = start[0] + parent_permlink = start[1] + + child_author = start[2] + child_permlink = start[3] + + sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth, + promoted, payout, payout_at, is_paidout, children, votes, + created_at, updated_at, rshares, json, + is_hidden, is_grayed, total_votes, flag_weight, + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title + FROM get_rows_by_parent(:parent_author, :parent_permlink, :child_author, :child_permlink) LIMIT :limit""" + result = await db.query_all(sql, parent_author=parent_author, parent_permlink=parent_permlink, child_author=child_author, child_permlink=child_permlink, limit=limit) + for row in result: + comments.append(condenser_post_object(dict(row))) + return comments diff --git a/hive/server/serve.py b/hive/server/serve.py index a43c8e8da1ecb40a9c8b95be30038f3e9b9020e2..873387dba2df1a3b9759d0d1242e10f783697474 100644 --- a/hive/server/serve.py +++ b/hive/server/serve.py @@ -26,6 +26,8 @@ from hive.server.hive_api import community as hive_api_community from hive.server.hive_api import notify as hive_api_notify from hive.server.hive_api import stats as hive_api_stats +from hive.server.database_api import methods as database_api + from hive.server.db import Db # pylint: disable=too-many-lines @@ -73,6 +75,7 @@ def build_methods(): condenser_api.get_blog_entries, condenser_api.get_account_reputations, condenser_api.get_reblogged_by, + condenser_api.get_accounts )}) # dummy methods -- serve informational error @@ -135,6 +138,11 @@ def build_methods(): hive_api_community.list_all_subscriptions, )}) + # database_api methods + methods.add(**{ + 'database_api.list_comments' : database_api.list_comments + }) + return methods def truncate_response_log(logger): diff --git a/hive/utils/post.py b/hive/utils/post.py index 0d2b316f94e966c680e4ad78c70e414f170b3327..9f299710f34a957d826f6ed2f8101cecc9023c68 100644 --- a/hive/utils/post.py +++ b/hive/utils/post.py @@ -44,6 +44,7 @@ def post_to_internal(post, post_id, level='insert', promoted=None): # always write, unless simple vote update if level in ['insert', 'payout', 'update']: basic = post_basic(post) + legacy_data = post_legacy(post) values.extend([ ('community_id', post['community_id']), # immutable* ('created_at', post['created']), # immutable* @@ -58,7 +59,21 @@ def post_to_internal(post, post_id, level='insert', promoted=None): ('is_full_power', basic['is_full_power']), ('is_paidout', basic['is_paidout']), ('json', json.dumps(basic['json_metadata'])), - ('raw_json', json.dumps(post_legacy(post))), + #('raw_json', json.dumps(legacy_data)), + ('legacy_id', legacy_data['id']), + ('parent_author', legacy_data['parent_author']), + ('parent_permlink', legacy_data['parent_permlink']), + ('curator_payout_value', legacy_data['curator_payout_value']), + ('root_author', legacy_data['root_author']), + ('root_permlink', legacy_data['root_permlink']), + ('max_accepted_payout', legacy_data['max_accepted_payout']), + ('percent_steem_dollars', legacy_data['percent_steem_dollars']), + ('allow_replies', legacy_data['allow_replies']), + ('allow_votes', legacy_data['allow_votes']), + ('allow_curation_rewards', legacy_data['allow_curation_rewards']), + ('beneficiaries', legacy_data['beneficiaries']), + ('url', legacy_data['url']), + ('root_title', legacy_data['root_title']), ]) # if there's a pending promoted value to write, pull it out diff --git a/scripts/update_hivemind_db.sql b/scripts/update_hivemind_db.sql new file mode 100644 index 0000000000000000000000000000000000000000..d15a676dfbeb95b93ccb7ae9c4c0e3178ac91d29 --- /dev/null +++ b/scripts/update_hivemind_db.sql @@ -0,0 +1,134 @@ +-- This script will upgrade hivemind database to new version +-- Authors: Dariusz Kędzierski +-- Created: 26-04-2020 + +CREATE TABLE IF NOT EXISTS hive_db_version ( + version VARCHAR(50) PRIMARY KEY, + notes VARCHAR(1024) +); + +-- Upgrade to version 1.0 +-- in this version we will move data from raw_json into separate columns +DO $$ + DECLARE + -- We will perform our operations in baches to conserve memory and CPU + batch_size INTEGER := 100000; + + -- Get last id from hive_posts_cache + last_id INTEGER := 0; + + current_id INTEGER := 0; + + row RECORD; + BEGIN + RAISE NOTICE 'Upgrading database to version 1.0'; + IF NOT EXISTS (SELECT version FROM hive_db_version WHERE version = '1.0') + THEN + -- Update version info + INSERT INTO hive_db_version (version, notes) VALUES ('1.0', 'https://gitlab.syncad.com/blocktrades/hivemind/issues/5'); + -- Alter hive_comments_cache and add columns originally stored in raw_json + RAISE NOTICE 'Attempting to alter table hive_posts_cache'; + ALTER TABLE hive_posts_cache + ADD COLUMN legacy_id INT NOT NULL DEFAULT -1, + ADD COLUMN parent_author VARCHAR(16) NOT NULL DEFAULT '', + ADD COLUMN parent_permlink VARCHAR(255) NOT NULL DEFAULT '', + ADD COLUMN curator_payout_value VARCHAR(16) NOT NULL DEFAULT '', + ADD COLUMN root_author VARCHAR(16) NOT NULL DEFAULT '', + ADD COLUMN root_permlink VARCHAR(255) NOT NULL DEFAULT '', + ADD COLUMN max_accepted_payout VARCHAR(16) NOT NULL DEFAULT '', + ADD COLUMN percent_steem_dollars INT NOT NULL DEFAULT -1, + ADD COLUMN allow_replies BOOLEAN NOT NULL DEFAULT TRUE, + ADD COLUMN allow_votes BOOLEAN NOT NULL DEFAULT TRUE, + ADD COLUMN allow_curation_rewards BOOLEAN NOT NULL DEFAULT TRUE, + ADD COLUMN beneficiaries JSON NOT NULL DEFAULT '[]', + ADD COLUMN url TEXT NOT NULL DEFAULT '', + ADD COLUMN root_title VARCHAR(255) NOT NULL DEFAULT ''; + RAISE NOTICE 'Done...'; + + -- Helper type for use with json_populate_record + CREATE TYPE legacy_comment_type AS ( + id INT, + parent_author VARCHAR(16), + parent_permlink VARCHAR(255), + curator_payout_value VARCHAR(16), + root_author VARCHAR(16), + root_permlink VARCHAR(255), + max_accepted_payout VARCHAR(16), + percent_steem_dollars INT, + allow_replies BOOLEAN, + allow_votes BOOLEAN, + allow_curation_rewards BOOLEAN, + beneficiaries JSON, + url TEXT, + root_title VARCHAR(255) + ); + + SELECT INTO last_id post_id FROM hive_posts_cache ORDER BY post_id DESC LIMIT 1; + + RAISE NOTICE 'Attempting to parse % rows in batches %', last_id, batch_size; + + WHILE current_id < last_id LOOP + RAISE NOTICE 'Processing batch: % <= post_id < % (of %)', current_id, current_id + batch_size, last_id; + FOR row IN SELECT post_id, raw_json FROM hive_posts_cache WHERE post_id >= current_id AND post_id < current_id + batch_size LOOP + UPDATE hive_posts_cache SET ( + legacy_id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title + ) = ( + SELECT id, parent_author, parent_permlink, curator_payout_value, + root_author, root_permlink, max_accepted_payout, percent_steem_dollars, + allow_replies, allow_votes, allow_curation_rewards, url, root_title + FROM json_populate_record(null::legacy_comment_type, row.raw_json::json) + ) + WHERE post_id = row.post_id; + current_id := row.post_id; + END LOOP; + END LOOP; + RAISE NOTICE 'Done...'; + -- Creating indexes + RAISE NOTICE 'Creating author_permlink_idx'; + CREATE INDEX IF NOT EXISTS author_permlink_idx ON hive_posts_cache (author ASC, permlink ASC); + RAISE NOTICE 'Creating root_author_permlink_idx'; + CREATE INDEX IF NOT EXISTS root_author_permlink_idx ON hive_posts_cache (root_author ASC, root_permlink ASC); + RAISE NOTICE 'Creating parent_permlink_idx'; + CREATE INDEX IF NOT EXISTS parent_author_permlink_idx ON hive_posts_cache (parent_author ASC, parent_permlink ASC); + RAISE NOTICE 'Creating author_permlink_post_id_idx'; + CREATE INDEX IF NOT EXISTS author_permlink_post_id_idx ON hive_posts_cache (author ASC, permlink ASC, post_id ASC); + RAISE NOTICE 'Creating post_id_author_permlink_idx'; + CREATE INDEX IF NOT EXISTS post_id_author_permlink_idx ON hive_posts_cache (post_id ASC, author ASC, permlink ASC); + + -- Creating functions + -- for list_comments by_root + CREATE OR REPLACE FUNCTION get_rows_by_root(root_a VARCHAR, root_p VARCHAR, child_a VARCHAR, child_p VARCHAR, query_limit INT DEFAULT 1000) RETURNS SETOF hive_posts_cache AS $$ + DECLARE + root_row hive_posts_cache; + child_row hive_posts_cache; + query_count INT := 0; + BEGIN + FOR root_row IN SELECT * FROM hive_posts_cache WHERE author >= root_a AND permlink >= root_p ORDER BY post_id ASC, author ASC, permlink ASC + LOOP + EXIT WHEN query_count >= query_limit; + FOR child_row IN SELECT * FROM hive_posts_cache WHERE author >= child_a AND permlink >= child_p AND root_author = root_row.root_author AND root_permlink = root_row.root_permlink ORDER BY post_id ASC, author ASC, permlink ASC + LOOP + EXIT WHEN query_count >= query_limit; + RETURN NEXT child_row; + query_count := query_count + 1; + END LOOP; + END LOOP; + RETURN; + END + $$ LANGUAGE plpgsql; + -- for list_comments by_parent + CREATE OR REPLACE FUNCTION get_rows_by_parent(parent_a VARCHAR, parent_p VARCHAR, child_a VARCHAR, child_p VARCHAR, query_limit INT DEFAULT 1000) RETURNS SETOF hive_posts_cache AS $$ + DECLARE + child_id INT := 0; + BEGIN + SELECT INTO child_id post_id FROM hive_posts_cache WHERE author >= child_a AND permlink >= child_p ORDER BY post_id ASC LIMIT 1; + RETURN QUERY SELECT * FROM hive_posts_cache WHERE parent_author = parent_a AND parent_permlink = parent_p AND post_id >= child_id ORDER BY post_id ASC, author ASC, permlink ASC LIMIT query_limit; + END + $$ LANGUAGE plpgsql; + ELSE + RAISE NOTICE 'Database already in version 1.0'; + END IF; + END +$$; diff --git a/tests/server/test_server_database_api.py b/tests/server/test_server_database_api.py new file mode 100644 index 0000000000000000000000000000000000000000..9e8c1691db94931b604106c2f8343463df189cea --- /dev/null +++ b/tests/server/test_server_database_api.py @@ -0,0 +1,52 @@ +import pytest +from hive.server.database_api.methods import list_comments +from hive.steem.client import SteemClient + +@pytest.fixture +def client(): + return SteemClient(url='https://api.hive.blog') + +def test_list_comments_by_cashout_time(client): + reference_data = await client.list_comments({"start":["1990-01-01T00:00:00","steemit","firstpost"],"limit":10,"order":"by_cashout_time"}) + test_data = await list_comments(["1990-01-01T00:00:00","steemit","firstpost"],10,"by_cashout_time") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = keys = ['author','permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] + assert reference_data[idx]['cashout_time'] == test_data[idx]['payout_at'] + +def test_list_comments_by_permlink(client): + reference_data = await client.list_comments({"start":["steemit","firstpost"],"limit":10,"order":"by_permlink"}) + test_data = await list_comments(["steemit","firstpost"],10,"by_permlink") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = keys = ['author','permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] + +def test_list_comments_by_root(client): + reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_root"}) + test_data = await list_comments(["steemit","firstpost","",""],10,"by_root") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = keys = ['author','permlink','root_author','root_permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key] + +def test_list_comments_by_parent(client): + reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_parent"}) + test_data = await list_comments(["steemit","firstpost","",""],10,"by_parent") + assert reference_data + assert test_data + assert len(reference_data) == len(test_data) + to_compare = keys = ['author','permlink','parent_author','parent_permlink'] + for idx in range(len(reference_data)): + for key in to_compare: + assert reference_data[idx][key] == test_data[idx][key]