[WIP] list_commments implmemenation: added native database_api.list_comments,...

[WIP] list_commments implmemenation: added native database_api.list_comments, raw_json data moved to proper table columns in hive_posts_cache, engine changed to support new columns. Added script to perform database upgrade.

[WIP] list_commments implmemenation: added native database_api.list_comments,...
e85b1a61 · Dariusz Kędzierski · 0c851a07 · e85b1a61 · e85b1a61 · e85b1a61
Commit e85b1a61 authored 4 years ago by Dariusz Kędzierski
--- a/hive/db/schema.py
+++ b/hive/db/schema.py
@@ -40,7 +40,7 @@ def build_metadata():
        sa.Column('display_name', sa.String(20)),
        sa.Column('about', sa.String(160)),
        sa.Column('location', sa.String(30)),
-        sa.Column('website', sa.String(100)),
+        sa.Column('website', sa.String(1024)),
        sa.Column('profile_image', sa.String(1024), nullable=False, server_default=''),
        sa.Column('cover_image', sa.String(1024), nullable=False, server_default=''),

@@ -197,7 +197,22 @@ def build_metadata():
        sa.Column('body', TEXT),
        sa.Column('votes', TEXT),
        sa.Column('json', sa.Text),
-        sa.Column('raw_json', sa.Text),
+        #sa.Column('raw_json', sa.Text),
+
+        sa.Column('legacy_id', sa.Integer, nullable=False, server_default='-1'),
+        sa.Column('parent_author', sa.String(16), nullable=False, server_default=''),
+        sa.Column('parent_permlink',  sa.String(255), nullable=False, server_default=''),
+        sa.Column('curator_payout_value', sa.String(16), nullable=False, server_default=''),
+        sa.Column('root_author',  sa.String(16), nullable=False, server_default=''),
+        sa.Column('root_permlink',  sa.String(255), nullable=False, server_default=''),
+        sa.Column('max_accepted_payout',  sa.String(16), nullable=False, server_default=''),
+        sa.Column('percent_steem_dollars', sa.Integer, nullable=False, server_default='-1'),
+        sa.Column('allow_replies', BOOLEAN, nullable=False, server_default='1'),
+        sa.Column('allow_votes', BOOLEAN, nullable=False, server_default='1'),
+        sa.Column('allow_curation_rewards', BOOLEAN, nullable=False, server_default='1'),
+        sa.Column('beneficiaries',  sa.JSON, nullable=False, server_default=''),
+        sa.Column('url', sa.Text, nullable=False, server_default=''),
+        sa.Column('root_title', sa.String(255), nullable=False, server_default=''),

        # index: misc
        sa.Index('hive_posts_cache_ix3',  'payout_at', 'post_id',           postgresql_where=sql_text("is_paidout = '0'")),         # core: payout sweep

--- a/hive/indexer/cached_post.py
+++ b/hive/indexer/cached_post.py
@@ -487,6 +487,7 @@ class CachedPost:
        # always write, unless simple vote update
        if level in ['insert', 'payout', 'update']:
            basic = post_basic(post)
+            legacy_data = post_legacy(post)
            values.extend([
                ('community_id',  post['community_id']), # immutable*
                ('created_at',    post['created']),    # immutable*
@@ -501,7 +502,21 @@ class CachedPost:
                ('is_full_power', basic['is_full_power']),
                ('is_paidout',    basic['is_paidout']),
                ('json',          json.dumps(basic['json_metadata'])),
-                ('raw_json',      json.dumps(post_legacy(post))),
+                #('raw_json',      json.dumps(legacy_data)),
+                ('legacy_id',               legacy_data['id']),
+                ('parent_author',           legacy_data['parent_author']),
+                ('parent_permlink',         legacy_data['parent_permlink']),
+                ('curator_payout_value',    legacy_data['curator_payout_value']),
+                ('root_author',             legacy_data['root_author']),
+                ('root_permlink',           legacy_data['root_permlink']),
+                ('max_accepted_payout',     legacy_data['max_accepted_payout']),
+                ('percent_steem_dollars',   legacy_data['percent_steem_dollars']),
+                ('allow_replies',           legacy_data['allow_replies']),
+                ('allow_votes',             legacy_data['allow_votes']),
+                ('allow_curation_rewards',  legacy_data['allow_curation_rewards']),
+                ('beneficiaries',           json.dumps(legacy_data['beneficiaries'])),
+                ('url',                     legacy_data['url']),
+                ('root_title',              legacy_data['root_title']),
            ])

        # if there's a pending promoted value to write, pull it out

--- a/hive/server/bridge_api/objects.py
+++ b/hive/server/bridge_api/objects.py
@@ -45,8 +45,11 @@ async def load_posts_keyed(db, ids, truncate_body=0):
    # fetch posts and associated author reps
    sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
                    promoted, payout, payout_at, is_paidout, children, votes,
-                    created_at, updated_at, rshares, raw_json, json,
-                    is_hidden, is_grayed, total_votes, flag_weight
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
               FROM hive_posts_cache WHERE post_id IN :ids"""
    result = await db.query_all(sql, ids=tuple(ids))
    author_map = await _query_author_map(db, result)
@@ -216,29 +219,37 @@ def _condenser_post_object(row, truncate_body=0):
        'total_votes': row['total_votes'],
        'flag_weight': row['flag_weight']} # TODO: down_weight

-    # import fields from legacy object
-    assert row['raw_json']
-    assert len(row['raw_json']) > 32
-    raw_json = json.loads(row['raw_json'])

-    # TODO: move to core, or payout_details
-    post['beneficiaries'] = raw_json['beneficiaries']
-    post['max_accepted_payout'] = raw_json['max_accepted_payout']
-    post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] # TODO: systag?
+    #post['author_reputation'] = rep_to_raw(row['author_rep'])
+
+    post['legacy_id'] = row['legacy_id']
+
+    post['root_author'] = row['root_author']
+    post['root_permlink'] = row['root_permlink']
+
+    post['allow_replies'] = row['allow_replies']
+    post['allow_votes'] = row['allow_votes']
+    post['allow_curation_rewards'] = row['allow_curation_rewards']
+
+    post['url'] = row['url']
+    post['root_title'] = row['root_title']
+    post['beneficiaries'] = json.loads(row['beneficiaries'])
+    post['max_accepted_payout'] = row['max_accepted_payout']
+    post['percent_steem_dollars'] = row['percent_steem_dollars']
+
    if paid:
-        curator_payout = sbd_amount(raw_json['curator_payout_value'])
-        post['author_payout_value'] = _amount(row['payout'] - curator_payout)
+        curator_payout = sbd_amount(row['curator_payout_value'])
        post['curator_payout_value'] = _amount(curator_payout)
+        post['total_payout_value'] = _amount(row['payout'] - curator_payout)
+
+    # not used by condenser, but may be useful
+    # post['net_votes'] = post['total_votes'] - row['up_votes']

    # TODO: re-evaluate
    if row['depth'] > 0:
-        post['parent_author'] = raw_json['parent_author']
-        post['parent_permlink'] = raw_json['parent_permlink']
-        post['title'] = 'RE: ' + raw_json['root_title'] # PostSummary & comment context
-    #else:
-    #    post['parent_author'] = ''
-    #    post['parent_permlink'] = ''
-    post['url'] = raw_json['url']
+        post['parent_author'] = row['parent_author']
+        post['parent_permlink'] = row['parent_permlink']
+        post['title'] = 'RE: ' + row['root_title'] # PostSummary & comment context

    return post


--- a/hive/server/common/objects.py
+++ b/hive/server/common/objects.py
+from hive.server.common.helpers import json_date
+from hive.utils.normalize import sbd_amount, rep_to_raw
+import ujson as json
+
+def _amount(amount, asset='HBD'):
+    """Return a steem-style amount string given a (numeric, asset-str)."""
+    assert asset == 'HBD', 'unhandled asset %s' % asset
+    return "%.3f HBD" % amount
+
+def _hydrate_active_votes(vote_csv):
+    """Convert minimal CSV representation into steemd-style object."""
+    if not vote_csv:
+        return []
+    votes = []
+    for line in vote_csv.split("\n"):
+        voter, rshares, percent, reputation = line.split(',')
+        votes.append(dict(voter=voter,
+                          rshares=rshares,
+                          percent=percent,
+                          reputation=rep_to_raw(reputation)))
+    return votes
+
+async def query_author_map(db, posts):
+    """Given a list of posts, returns an author->reputation map."""
+    if not posts: return {}
+    names = tuple({post['author'] for post in posts})
+    sql = "SELECT id, name, reputation FROM hive_accounts WHERE name IN :names"
+    return {r['name']: r for r in await db.query_all(sql, names=names)}
+
+def condenser_post_object(row, truncate_body=0):
+    """Given a hive_posts_cache row, create a legacy-style post object."""
+    paid = row['is_paidout']
+
+    # condenser#3424 mitigation
+    if not row['category']:
+        row['category'] = 'undefined'
+
+    post = {}
+    post['post_id'] = row['post_id']
+    post['author'] = row['author']
+    post['permlink'] = row['permlink']
+    post['category'] = row['category']
+
+    post['title'] = row['title']
+    post['body'] = row['body'][0:truncate_body] if truncate_body else row['body']
+    post['json_metadata'] = row['json']
+
+    post['created'] = json_date(row['created_at'])
+    post['last_update'] = json_date(row['updated_at'])
+    post['depth'] = row['depth']
+    post['children'] = row['children']
+    post['net_rshares'] = row['rshares']
+
+    post['last_payout'] = json_date(row['payout_at'] if paid else None)
+    post['cashout_time'] = json_date(None if paid else row['payout_at'])
+    post['total_payout_value'] = _amount(row['payout'] if paid else 0)
+    post['curator_payout_value'] = _amount(0)
+    post['pending_payout_value'] = _amount(0 if paid else row['payout'])
+    post['promoted'] = _amount(row['promoted'])
+
+    post['replies'] = []
+    post['body_length'] = len(row['body'])
+    post['active_votes'] = _hydrate_active_votes(row['votes'])
+    #post['author_reputation'] = rep_to_raw(row['author_rep'])
+
+    post['legacy_id'] = row['legacy_id']
+
+    post['root_author'] = row['root_author']
+    post['root_permlink'] = row['root_permlink']
+
+    post['allow_replies'] = row['allow_replies']
+    post['allow_votes'] = row['allow_votes']
+    post['allow_curation_rewards'] = row['allow_curation_rewards']
+
+    if row['depth'] > 0:
+        post['parent_author'] = row['parent_author']
+        post['parent_permlink'] = row['parent_permlink']
+    else:
+        post['parent_author'] = ''
+        post['parent_permlink'] = row['category']
+
+    post['url'] = row['url']
+    post['root_title'] = row['root_title']
+    post['beneficiaries'] = row['beneficiaries']
+    post['max_accepted_payout'] = row['max_accepted_payout']
+    post['percent_steem_dollars'] = row['percent_steem_dollars']
+
+    if paid:
+        curator_payout = sbd_amount(row['curator_payout_value'])
+        post['curator_payout_value'] = _amount(curator_payout)
+        post['total_payout_value'] = _amount(row['payout'] - curator_payout)
+
+    # not used by condenser, but may be useful
+    # post['net_votes'] = post['total_votes'] - row['up_votes']
+
+    return post
--- a/hive/server/condenser_api/cursor.py
+++ b/hive/server/condenser_api/cursor.py
@@ -4,6 +4,7 @@ from datetime import datetime
 from dateutil.relativedelta import relativedelta

 from hive.utils.normalize import rep_to_raw
+from json import loads

 # pylint: disable=too-many-lines

@@ -374,3 +375,39 @@ async def pids_by_replies_to_account(db, start_author: str, start_permlink: str
    """ % seek

    return await db.query_col(sql, parent=parent_account, start_id=start_id, limit=limit)
+
+async def get_accounts(db, accounts: list):
+    """Returns accounts data for accounts given in list"""
+    ret = []
+
+    names = ["'{}'".format(a) for a in accounts]
+    sql = """SELECT created_at, reputation, display_name, about,
+        location, website, profile_image, cover_image, followers, following,
+        proxy, post_count, proxy_weight, vote_weight, rank,
+        lastread_at, active_at, cached_at, raw_json
+        FROM hive_accounts WHERE name IN ({})""".format(",".join(names))
+
+    result = await db.query_all(sql)
+    for row in result:
+        account_data = dict(loads(row.raw_json))
+        account_data['created_at'] = row.created_at.isoformat()
+        account_data['reputation'] = row.reputation
+        account_data['display_name'] = row.display_name
+        account_data['about'] = row.about
+        account_data['location'] = row.location
+        account_data['website'] = row.website
+        account_data['profile_image'] = row.profile_image
+        account_data['cover_image'] = row.cover_image
+        account_data['followers'] = row.followers
+        account_data['following'] = row.following
+        account_data['proxy'] = row.proxy
+        account_data['post_count'] = row.post_count
+        account_data['proxy_weight'] = row.proxy_weight
+        account_data['vote_weight'] = row.vote_weight
+        account_data['rank'] = row.rank
+        account_data['lastread_at'] = row.lastread_at.isoformat()
+        account_data['active_at'] = row.active_at.isoformat()
+        account_data['cached_at'] = row.cached_at.isoformat()
+        ret.append(account_data)
+
+    return ret
--- a/hive/server/condenser_api/methods.py
+++ b/hive/server/condenser_api/methods.py
 """Steemd/condenser_api compatibility layer API methods."""
-
+from json import loads
 from functools import wraps

 import hive.server.condenser_api.cursor as cursor
@@ -118,7 +118,10 @@ async def get_content_replies(context, author: str, permlink: str):

    sql = """SELECT post_id, author, permlink, title, body, category, depth,
             promoted, payout, payout_at, is_paidout, children, votes,
-             created_at, updated_at, rshares, raw_json, json
+             created_at, updated_at, rshares, json,
+             legacy_id, parent_author, parent_permlink, curator_payout_value, 
+             root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+             allow_replies, allow_votes, allow_curation_rewards, url, root_title 
             FROM hive_posts_cache WHERE post_id IN (
             SELECT hp2.id FROM hive_posts hp2
             WHERE hp2.is_deleted = '0' AND
@@ -404,3 +407,12 @@ async def _get_blog(db, account: str, start_index: int, limit: int = None):
        idx -= 1

    return out
+
+@return_error_info
+async def get_accounts(context, accounts: list):
+    """Returns accounts data for accounts given in list"""
+    print("Hivemind native get_accounts")
+    assert accounts, "Empty parameters are not supported"
+    assert len(accounts) < 1000, "Query exceeds limit"
+
+    return await cursor.get_accounts(context['db'], accounts)
--- a/hive/server/condenser_api/objects.py
+++ b/hive/server/condenser_api/objects.py
@@ -42,7 +42,10 @@ async def load_posts_keyed(db, ids, truncate_body=0):
    # fetch posts and associated author reps
    sql = """SELECT post_id, author, permlink, title, body, category, depth,
                    promoted, payout, payout_at, is_paidout, children, votes,
-                    created_at, updated_at, rshares, raw_json, json
+                    created_at, updated_at, rshares, json,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
               FROM hive_posts_cache WHERE post_id IN :ids"""
    result = await db.query_all(sql, ids=tuple(ids))
    author_reps = await _query_author_rep_map(db, result)
@@ -164,34 +167,35 @@ def _condenser_post_object(row, truncate_body=0):
    post['active_votes'] = _hydrate_active_votes(row['votes'])
    post['author_reputation'] = rep_to_raw(row['author_rep'])

-    # import fields from legacy object
-    assert row['raw_json']
-    assert len(row['raw_json']) > 32
-    raw_json = json.loads(row['raw_json'])
+    post['legacy_id'] = row['legacy_id']
+
+    post['root_author'] = row['root_author']
+    post['root_permlink'] = row['root_permlink']
+
+    post['allow_replies'] = row['allow_replies']
+    post['allow_votes'] = row['allow_votes']
+    post['allow_curation_rewards'] = row['allow_curation_rewards']

    if row['depth'] > 0:
-        post['parent_author'] = raw_json['parent_author']
-        post['parent_permlink'] = raw_json['parent_permlink']
+        post['parent_author'] = row['parent_author']
+        post['parent_permlink'] = row['parent_permlink']
    else:
        post['parent_author'] = ''
        post['parent_permlink'] = row['category']

-    post['url'] = raw_json['url']
-    post['root_title'] = raw_json['root_title']
-    post['beneficiaries'] = raw_json['beneficiaries']
-    post['max_accepted_payout'] = raw_json['max_accepted_payout']
-    post['percent_steem_dollars'] = raw_json['percent_steem_dollars']
+    post['url'] = row['url']
+    post['root_title'] = row['root_title']
+    post['beneficiaries'] = json.loads(row['beneficiaries'])
+    post['max_accepted_payout'] = row['max_accepted_payout']
+    post['percent_steem_dollars'] = row['percent_steem_dollars']

    if paid:
-        curator_payout = sbd_amount(raw_json['curator_payout_value'])
+        curator_payout = sbd_amount(row['curator_payout_value'])
        post['curator_payout_value'] = _amount(curator_payout)
        post['total_payout_value'] = _amount(row['payout'] - curator_payout)

    # not used by condenser, but may be useful
-    #post['net_votes'] = post['total_votes'] - row['up_votes']
-    #post['allow_replies'] = raw_json['allow_replies']
-    #post['allow_votes'] = raw_json['allow_votes']
-    #post['allow_curation_rewards'] = raw_json['allow_curation_rewards']
+    # post['net_votes'] = post['total_votes'] - row['up_votes']

    return post


--- a/hive/server/database_api/__init__.py
+++ b/hive/server/database_api/__init__.py
+"""Hive database_api methods and support."""
--- a/hive/server/database_api/methods.py
+++ b/hive/server/database_api/methods.py
+# pylint: disable=too-many-arguments,line-too-long,too-many-lines
+from hive.server.common.helpers import return_error_info, valid_limit
+from hive.server.common.objects import condenser_post_object
+
+@return_error_info
+async def list_comments(context, start: list, limit: int, order: str):
+    """Returns all comments, starting with the specified options."""
+    print("Hivemind native list_comments")
+    supported_order_list = ['by_cashout_time', 'by_permlink', 'by_root', 'by_parent']
+    assert order in supported_order_list, "Unsupported order, valid orders {}".format(supported_order_list)
+    limit = valid_limit(limit, 1000)
+    db = context['db']
+
+    comments = []
+    if order == 'by_cashout_time':
+        assert len(start) == 3, "Expecting three arguments"
+        payout_time = start[0]
+        author = start[1]
+        permlink = start[2]
+        post_id = 0
+        if author or permlink:
+            sql = """SELECT post_id FROM hive_posts_cache WHERE author >= :author AND permlink >= :permlink LIMIT 1"""
+            result = await db.query_row(sql, author=author, permlink=permlink)
+            post_id = result.post_id
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM hive_posts_cache WHERE payout_at >= :start AND post_id >= :post_id ORDER BY payout_at ASC, post_id ASC LIMIT :limit"""
+        result = await db.query_all(sql, start=payout_time, limit=limit, post_id=post_id)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    elif order == 'by_permlink':
+        assert len(start) == 2, "Expecting two arguments"
+        author = start[0]
+        permlink = start[1]
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM hive_posts_cache WHERE author >= :author AND permlink >= :permlink ORDER BY author ASC, permlink ASC, post_id ASC LIMIT :limit"""
+        result = await db.query_all(sql, author=author, permlink=permlink, limit=limit)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    elif order == 'by_root':
+        assert len(start) == 4, "Expecting 4 arguments"
+        root_author = start[0]
+        root_permlink = start[1]
+
+        child_author = start[2]
+        child_permlink = start[3]
+
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM get_rows_by_root(:root_author, :root_permlink, :child_author, :child_permlink) ORDER BY post_id ASC LIMIT :limit"""
+        result = await db.query_all(sql, root_author=root_author, root_permlink=root_permlink, child_author=child_author, child_permlink=child_permlink, limit=limit)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    elif order == 'by_parent':
+        assert len(start) == 4, "Expecting 4 arguments"
+
+        parent_author = start[0]
+        parent_permlink = start[1]
+
+        child_author = start[2]
+        child_permlink = start[3]
+
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM get_rows_by_parent(:parent_author, :parent_permlink, :child_author, :child_permlink) LIMIT :limit"""
+        result = await db.query_all(sql, parent_author=parent_author, parent_permlink=parent_permlink, child_author=child_author, child_permlink=child_permlink, limit=limit)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    return comments
--- a/hive/server/serve.py
+++ b/hive/server/serve.py
@@ -26,6 +26,8 @@ from hive.server.hive_api import community as hive_api_community
 from hive.server.hive_api import notify as hive_api_notify
 from hive.server.hive_api import stats as hive_api_stats

+from hive.server.database_api import methods as database_api
+
 from hive.server.db import Db

 # pylint: disable=too-many-lines
@@ -73,6 +75,7 @@ def build_methods():
        condenser_api.get_blog_entries,
        condenser_api.get_account_reputations,
        condenser_api.get_reblogged_by,
+        condenser_api.get_accounts
    )})

    # dummy methods -- serve informational error
@@ -135,6 +138,11 @@ def build_methods():
        hive_api_community.list_all_subscriptions,
    )})

+    # database_api methods
+    methods.add(**{
+        'database_api.list_comments' : database_api.list_comments
+    })
+
    return methods

 def truncate_response_log(logger):

--- a/hive/utils/post.py
+++ b/hive/utils/post.py
@@ -44,6 +44,7 @@ def post_to_internal(post, post_id, level='insert', promoted=None):
    # always write, unless simple vote update
    if level in ['insert', 'payout', 'update']:
        basic = post_basic(post)
+        legacy_data = post_legacy(post)
        values.extend([
            ('community_id',  post['community_id']), # immutable*
            ('created_at',    post['created']),    # immutable*
@@ -58,7 +59,21 @@ def post_to_internal(post, post_id, level='insert', promoted=None):
            ('is_full_power', basic['is_full_power']),
            ('is_paidout',    basic['is_paidout']),
            ('json',          json.dumps(basic['json_metadata'])),
-            ('raw_json',      json.dumps(post_legacy(post))),
+            #('raw_json',      json.dumps(legacy_data)),
+            ('legacy_id',               legacy_data['id']),
+            ('parent_author',           legacy_data['parent_author']),
+            ('parent_permlink',         legacy_data['parent_permlink']),
+            ('curator_payout_value',    legacy_data['curator_payout_value']),
+            ('root_author',             legacy_data['root_author']),
+            ('root_permlink',           legacy_data['root_permlink']),
+            ('max_accepted_payout',     legacy_data['max_accepted_payout']),
+            ('percent_steem_dollars',   legacy_data['percent_steem_dollars']),
+            ('allow_replies',           legacy_data['allow_replies']),
+            ('allow_votes',             legacy_data['allow_votes']),
+            ('allow_curation_rewards',   legacy_data['allow_curation_rewards']),
+            ('beneficiaries',           legacy_data['beneficiaries']),
+            ('url',                     legacy_data['url']),
+            ('root_title',              legacy_data['root_title']),
        ])

    # if there's a pending promoted value to write, pull it out

--- a/scripts/update_hivemind_db.sql
+++ b/scripts/update_hivemind_db.sql
+-- This script will upgrade hivemind database to new version
+-- Authors: Dariusz Kędzierski
+-- Created: 26-04-2020
+
+CREATE TABLE IF NOT EXISTS hive_db_version (
+  version VARCHAR(50) PRIMARY KEY,
+  notes VARCHAR(1024)
+);
+
+-- Upgrade to version 1.0
+-- in this version we will move data from raw_json into separate columns
+DO $$
+  DECLARE
+    -- We will perform our operations in baches to conserve memory and CPU
+      batch_size INTEGER := 100000;
+      
+      -- Get last id from hive_posts_cache
+      last_id INTEGER := 0;
+
+      current_id INTEGER := 0;
+
+      row RECORD;
+  BEGIN
+    RAISE NOTICE 'Upgrading database to version 1.0';
+    IF NOT EXISTS (SELECT version FROM hive_db_version WHERE version = '1.0')
+    THEN
+      -- Update version info
+      INSERT INTO hive_db_version (version, notes) VALUES ('1.0', 'https://gitlab.syncad.com/blocktrades/hivemind/issues/5');
+      -- Alter hive_comments_cache and add columns originally stored in raw_json
+      RAISE NOTICE 'Attempting to alter table hive_posts_cache';
+      ALTER TABLE hive_posts_cache 
+        ADD COLUMN legacy_id INT NOT NULL DEFAULT -1,
+        ADD COLUMN parent_author VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN parent_permlink VARCHAR(255) NOT NULL DEFAULT '',
+        ADD COLUMN curator_payout_value VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN root_author VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN root_permlink VARCHAR(255) NOT NULL DEFAULT '',
+        ADD COLUMN max_accepted_payout VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN percent_steem_dollars INT NOT NULL DEFAULT -1,
+        ADD COLUMN allow_replies BOOLEAN NOT NULL DEFAULT TRUE,
+        ADD COLUMN allow_votes BOOLEAN NOT NULL DEFAULT TRUE,
+        ADD COLUMN allow_curation_rewards BOOLEAN NOT NULL DEFAULT TRUE,
+        ADD COLUMN beneficiaries JSON NOT NULL DEFAULT '[]',
+        ADD COLUMN url TEXT NOT NULL DEFAULT '',
+        ADD COLUMN root_title VARCHAR(255) NOT NULL DEFAULT '';
+      RAISE NOTICE 'Done...';
+      
+      -- Helper type for use with json_populate_record
+      CREATE TYPE legacy_comment_type AS (
+        id INT,
+        parent_author VARCHAR(16),
+        parent_permlink VARCHAR(255),
+        curator_payout_value VARCHAR(16),
+        root_author VARCHAR(16),
+        root_permlink VARCHAR(255),
+        max_accepted_payout VARCHAR(16),
+        percent_steem_dollars INT,
+        allow_replies BOOLEAN,
+        allow_votes BOOLEAN,
+        allow_curation_rewards BOOLEAN,
+        beneficiaries JSON,
+        url TEXT,
+        root_title VARCHAR(255)  
+      );
+
+      SELECT INTO last_id post_id FROM hive_posts_cache ORDER BY post_id DESC LIMIT 1;
+
+      RAISE NOTICE 'Attempting to parse % rows in batches %', last_id, batch_size;
+      
+      WHILE current_id < last_id LOOP
+        RAISE NOTICE 'Processing batch: % <= post_id < % (of %)', current_id, current_id + batch_size, last_id;
+        FOR row IN SELECT post_id, raw_json FROM hive_posts_cache WHERE post_id >= current_id AND post_id < current_id + batch_size LOOP
+          UPDATE hive_posts_cache SET (
+            legacy_id, parent_author, parent_permlink, curator_payout_value, 
+            root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+            allow_replies, allow_votes, allow_curation_rewards, url, root_title
+          ) = (
+            SELECT id, parent_author, parent_permlink, curator_payout_value, 
+              root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+              allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+            FROM json_populate_record(null::legacy_comment_type, row.raw_json::json)
+          )
+          WHERE post_id = row.post_id;
+          current_id := row.post_id;
+        END LOOP;
+      END LOOP;
+      RAISE NOTICE 'Done...';
+      -- Creating indexes
+      RAISE NOTICE 'Creating author_permlink_idx';
+      CREATE INDEX IF NOT EXISTS author_permlink_idx ON hive_posts_cache (author ASC, permlink ASC);
+      RAISE NOTICE 'Creating root_author_permlink_idx';
+      CREATE INDEX IF NOT EXISTS root_author_permlink_idx ON hive_posts_cache (root_author ASC, root_permlink ASC);
+      RAISE NOTICE 'Creating parent_permlink_idx';
+      CREATE INDEX IF NOT EXISTS parent_author_permlink_idx ON hive_posts_cache (parent_author ASC, parent_permlink ASC);
+      RAISE NOTICE 'Creating author_permlink_post_id_idx';
+      CREATE INDEX IF NOT EXISTS author_permlink_post_id_idx ON hive_posts_cache (author ASC, permlink ASC, post_id ASC);
+      RAISE NOTICE 'Creating post_id_author_permlink_idx';
+      CREATE INDEX IF NOT EXISTS post_id_author_permlink_idx ON hive_posts_cache (post_id ASC, author ASC, permlink ASC);
+
+      -- Creating functions
+      -- for list_comments by_root
+      CREATE OR REPLACE FUNCTION get_rows_by_root(root_a VARCHAR, root_p VARCHAR, child_a VARCHAR, child_p VARCHAR, query_limit INT DEFAULT 1000) RETURNS SETOF hive_posts_cache AS $$
+      DECLARE
+        root_row hive_posts_cache;
+        child_row hive_posts_cache;
+        query_count INT := 0;
+      BEGIN
+        FOR root_row IN SELECT * FROM hive_posts_cache WHERE author >= root_a AND permlink >= root_p ORDER BY post_id ASC, author ASC, permlink ASC
+        LOOP
+          EXIT WHEN query_count >= query_limit;
+          FOR child_row IN SELECT * FROM hive_posts_cache WHERE author >= child_a AND permlink >= child_p AND root_author = root_row.root_author AND root_permlink = root_row.root_permlink ORDER BY post_id ASC, author ASC, permlink ASC
+          LOOP 
+            EXIT WHEN query_count >= query_limit;
+            RETURN NEXT child_row;
+            query_count := query_count + 1;
+          END LOOP;
+        END LOOP;
+        RETURN;
+      END
+      $$ LANGUAGE plpgsql;
+      -- for list_comments by_parent
+      CREATE OR REPLACE FUNCTION get_rows_by_parent(parent_a VARCHAR, parent_p VARCHAR, child_a VARCHAR, child_p VARCHAR, query_limit INT DEFAULT 1000) RETURNS SETOF hive_posts_cache AS $$
+      DECLARE
+        child_id INT := 0;
+      BEGIN
+        SELECT INTO child_id post_id FROM hive_posts_cache WHERE author >= child_a AND permlink >= child_p ORDER BY post_id ASC LIMIT 1;
+        RETURN QUERY SELECT * FROM hive_posts_cache WHERE parent_author = parent_a AND parent_permlink = parent_p AND post_id >= child_id ORDER BY post_id ASC, author ASC, permlink ASC LIMIT query_limit;
+      END
+      $$ LANGUAGE plpgsql;
+    ELSE
+      RAISE NOTICE 'Database already in version 1.0';
+    END IF;
+  END
+$$;
--- a/tests/server/test_server_database_api.py
+++ b/tests/server/test_server_database_api.py
+import pytest
+from hive.server.database_api.methods import list_comments
+from hive.steem.client import SteemClient
+
+@pytest.fixture
+def client():
+  return SteemClient(url='https://api.hive.blog')
+
+def test_list_comments_by_cashout_time(client):
+  reference_data = await client.list_comments({"start":["1990-01-01T00:00:00","steemit","firstpost"],"limit":10,"order":"by_cashout_time"})
+  test_data = await list_comments(["1990-01-01T00:00:00","steemit","firstpost"],10,"by_cashout_time")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
+    assert reference_data[idx]['cashout_time'] == test_data[idx]['payout_at']
+
+def test_list_comments_by_permlink(client):
+  reference_data = await client.list_comments({"start":["steemit","firstpost"],"limit":10,"order":"by_permlink"})
+  test_data = await list_comments(["steemit","firstpost"],10,"by_permlink")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
+
+def test_list_comments_by_root(client):
+  reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_root"})
+  test_data = await list_comments(["steemit","firstpost","",""],10,"by_root")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink','root_author','root_permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
+
+def test_list_comments_by_parent(client):
+  reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_parent"})
+  test_data = await list_comments(["steemit","firstpost","",""],10,"by_parent")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink','parent_author','parent_permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]