From ca0189b433abb77f4b1dd074b57d34f6b609cf02 Mon Sep 17 00:00:00 2001
From: Dariusz Kedzierski <dkedzierski@syncad.com>
Date: Wed, 29 Apr 2020 16:01:36 +0200
Subject: [PATCH] [WIP] list_commments implmemenation: added native
 database_api.list_comments, raw_json data moved to proper table columns in
 hive_posts_cache, engine changed to support new columns. Added script to
 perform database upgrade.

---
 hive/db/schema.py                        |  19 +++-
 hive/indexer/cached_post.py              |  17 ++-
 hive/server/bridge_api/objects.py        |  49 +++++----
 hive/server/common/objects.py            |  96 ++++++++++++++++
 hive/server/condenser_api/cursor.py      |  37 +++++++
 hive/server/condenser_api/methods.py     |  16 ++-
 hive/server/condenser_api/objects.py     |  38 ++++---
 hive/server/database_api/__init__.py     |   1 +
 hive/server/database_api/methods.py      |  90 +++++++++++++++
 hive/server/serve.py                     |   8 ++
 hive/utils/post.py                       |  17 ++-
 scripts/update_hivemind_db.sql           | 134 +++++++++++++++++++++++
 tests/server/test_server_database_api.py |  52 +++++++++
 13 files changed, 532 insertions(+), 42 deletions(-)
 create mode 100644 hive/server/common/objects.py
 create mode 100644 hive/server/database_api/__init__.py
 create mode 100644 hive/server/database_api/methods.py
 create mode 100644 scripts/update_hivemind_db.sql
 create mode 100644 tests/server/test_server_database_api.py

diff --git a/hive/db/schema.py b/hive/db/schema.py
index b6caeea5b..a85383af3 100644
--- a/hive/db/schema.py
+++ b/hive/db/schema.py
@@ -40,7 +40,7 @@ def build_metadata():
         sa.Column('display_name', sa.String(20)),
         sa.Column('about', sa.String(160)),
         sa.Column('location', sa.String(30)),
-        sa.Column('website', sa.String(100)),
+        sa.Column('website', sa.String(1024)),
         sa.Column('profile_image', sa.String(1024), nullable=False, server_default=''),
         sa.Column('cover_image', sa.String(1024), nullable=False, server_default=''),
 
@@ -197,7 +197,22 @@ def build_metadata():
         sa.Column('body', TEXT),
         sa.Column('votes', TEXT),
         sa.Column('json', sa.Text),
-        sa.Column('raw_json', sa.Text),
+        #sa.Column('raw_json', sa.Text),
+
+        sa.Column('legacy_id', sa.Integer, nullable=False, server_default='-1'),
+        sa.Column('parent_author', sa.String(16), nullable=False, server_default=''),
+        sa.Column('parent_permlink',  sa.String(255), nullable=False, server_default=''),
+        sa.Column('curator_payout_value', sa.String(16), nullable=False, server_default=''),
+        sa.Column('root_author',  sa.String(16), nullable=False, server_default=''),
+        sa.Column('root_permlink',  sa.String(255), nullable=False, server_default=''),
+        sa.Column('max_accepted_payout',  sa.String(16), nullable=False, server_default=''),
+        sa.Column('percent_steem_dollars', sa.Integer, nullable=False, server_default='-1'),
+        sa.Column('allow_replies', BOOLEAN, nullable=False, server_default='1'),
+        sa.Column('allow_votes', BOOLEAN, nullable=False, server_default='1'),
+        sa.Column('allow_curation_rewards', BOOLEAN, nullable=False, server_default='1'),
+        sa.Column('beneficiaries',  sa.JSON, nullable=False, server_default=''),
+        sa.Column('url', sa.Text, nullable=False, server_default=''),
+        sa.Column('root_title', sa.String(255), nullable=False, server_default=''),
 
         # index: misc
         sa.Index('hive_posts_cache_ix3',  'payout_at', 'post_id',           postgresql_where=sql_text("is_paidout = '0'")),         # core: payout sweep
diff --git a/hive/indexer/cached_post.py b/hive/indexer/cached_post.py
index 25f731827..fbcd5e03b 100644
--- a/hive/indexer/cached_post.py
+++ b/hive/indexer/cached_post.py
@@ -487,6 +487,7 @@ class CachedPost:
         # always write, unless simple vote update
         if level in ['insert', 'payout', 'update']:
             basic = post_basic(post)
+            legacy_data = post_legacy(post)
             values.extend([
                 ('community_id',  post['community_id']), # immutable*
                 ('created_at',    post['created']),    # immutable*
@@ -501,7 +502,21 @@ class CachedPost:
                 ('is_full_power', basic['is_full_power']),
                 ('is_paidout',    basic['is_paidout']),
                 ('json',          json.dumps(basic['json_metadata'])),
-                ('raw_json',      json.dumps(post_legacy(post))),
+                #('raw_json',      json.dumps(legacy_data)),
+                ('legacy_id',               legacy_data['id']),
+                ('parent_author',           legacy_data['parent_author']),
+                ('parent_permlink',         legacy_data['parent_permlink']),
+                ('curator_payout_value',    legacy_data['curator_payout_value']),
+                ('root_author',             legacy_data['root_author']),
+                ('root_permlink',           legacy_data['root_permlink']),
+                ('max_accepted_payout',     legacy_data['max_accepted_payout']),
+                ('percent_steem_dollars',   legacy_data['percent_steem_dollars']),
+                ('allow_replies',           legacy_data['allow_replies']),
+                ('allow_votes',             legacy_data['allow_votes']),
+                ('allow_curation_rewards',  legacy_data['allow_curation_rewards']),
+                ('beneficiaries',           json.dumps(legacy_data['beneficiaries'])),
+                ('url',                     legacy_data['url']),
+                ('root_title',              legacy_data['root_title']),
             ])
 
         # if there's a pending promoted value to write, pull it out
diff --git a/hive/server/bridge_api/objects.py b/hive/server/bridge_api/objects.py
index 9846e3f4f..204078ca1 100644
--- a/hive/server/bridge_api/objects.py
+++ b/hive/server/bridge_api/objects.py
@@ -45,8 +45,11 @@ async def load_posts_keyed(db, ids, truncate_body=0):
     # fetch posts and associated author reps
     sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
                     promoted, payout, payout_at, is_paidout, children, votes,
-                    created_at, updated_at, rshares, raw_json, json,
-                    is_hidden, is_grayed, total_votes, flag_weight
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
                FROM hive_posts_cache WHERE post_id IN :ids"""
     result = await db.query_all(sql, ids=tuple(ids))
     author_map = await _query_author_map(db, result)
@@ -216,29 +219,37 @@ def _condenser_post_object(row, truncate_body=0):
         'total_votes': row['total_votes'],
         'flag_weight': row['flag_weight']} # TODO: down_weight
 
-    # import fields from legacy object
-    assert row['raw_json']
-    assert len(row['raw_json']) > 32
-    raw_json = json.loads(row['raw_json'])
 
-    # TODO: move to core, or payout_details
-    post['beneficiaries'] = raw_json['beneficiaries']
-    post['max_accepted_payout'] = raw_json['max_accepted_payout']
-    post['percent_steem_dollars'] = raw_json['percent_steem_dollars'] # TODO: systag?
+    #post['author_reputation'] = rep_to_raw(row['author_rep'])
+
+    post['legacy_id'] = row['legacy_id']
+
+    post['root_author'] = row['root_author']
+    post['root_permlink'] = row['root_permlink']
+
+    post['allow_replies'] = row['allow_replies']
+    post['allow_votes'] = row['allow_votes']
+    post['allow_curation_rewards'] = row['allow_curation_rewards']
+
+    post['url'] = row['url']
+    post['root_title'] = row['root_title']
+    post['beneficiaries'] = json.loads(row['beneficiaries'])
+    post['max_accepted_payout'] = row['max_accepted_payout']
+    post['percent_steem_dollars'] = row['percent_steem_dollars']
+
     if paid:
-        curator_payout = sbd_amount(raw_json['curator_payout_value'])
-        post['author_payout_value'] = _amount(row['payout'] - curator_payout)
+        curator_payout = sbd_amount(row['curator_payout_value'])
         post['curator_payout_value'] = _amount(curator_payout)
+        post['total_payout_value'] = _amount(row['payout'] - curator_payout)
+
+    # not used by condenser, but may be useful
+    # post['net_votes'] = post['total_votes'] - row['up_votes']
 
     # TODO: re-evaluate
     if row['depth'] > 0:
-        post['parent_author'] = raw_json['parent_author']
-        post['parent_permlink'] = raw_json['parent_permlink']
-        post['title'] = 'RE: ' + raw_json['root_title'] # PostSummary & comment context
-    #else:
-    #    post['parent_author'] = ''
-    #    post['parent_permlink'] = ''
-    post['url'] = raw_json['url']
+        post['parent_author'] = row['parent_author']
+        post['parent_permlink'] = row['parent_permlink']
+        post['title'] = 'RE: ' + row['root_title'] # PostSummary & comment context
 
     return post
 
diff --git a/hive/server/common/objects.py b/hive/server/common/objects.py
new file mode 100644
index 000000000..1e731d960
--- /dev/null
+++ b/hive/server/common/objects.py
@@ -0,0 +1,96 @@
+from hive.server.common.helpers import json_date
+from hive.utils.normalize import sbd_amount, rep_to_raw
+import ujson as json
+
+def _amount(amount, asset='HBD'):
+    """Return a steem-style amount string given a (numeric, asset-str)."""
+    assert asset == 'HBD', 'unhandled asset %s' % asset
+    return "%.3f HBD" % amount
+
+def _hydrate_active_votes(vote_csv):
+    """Convert minimal CSV representation into steemd-style object."""
+    if not vote_csv:
+        return []
+    votes = []
+    for line in vote_csv.split("\n"):
+        voter, rshares, percent, reputation = line.split(',')
+        votes.append(dict(voter=voter,
+                          rshares=rshares,
+                          percent=percent,
+                          reputation=rep_to_raw(reputation)))
+    return votes
+
+async def query_author_map(db, posts):
+    """Given a list of posts, returns an author->reputation map."""
+    if not posts: return {}
+    names = tuple({post['author'] for post in posts})
+    sql = "SELECT id, name, reputation FROM hive_accounts WHERE name IN :names"
+    return {r['name']: r for r in await db.query_all(sql, names=names)}
+
+def condenser_post_object(row, truncate_body=0):
+    """Given a hive_posts_cache row, create a legacy-style post object."""
+    paid = row['is_paidout']
+
+    # condenser#3424 mitigation
+    if not row['category']:
+        row['category'] = 'undefined'
+
+    post = {}
+    post['post_id'] = row['post_id']
+    post['author'] = row['author']
+    post['permlink'] = row['permlink']
+    post['category'] = row['category']
+
+    post['title'] = row['title']
+    post['body'] = row['body'][0:truncate_body] if truncate_body else row['body']
+    post['json_metadata'] = row['json']
+
+    post['created'] = json_date(row['created_at'])
+    post['last_update'] = json_date(row['updated_at'])
+    post['depth'] = row['depth']
+    post['children'] = row['children']
+    post['net_rshares'] = row['rshares']
+
+    post['last_payout'] = json_date(row['payout_at'] if paid else None)
+    post['cashout_time'] = json_date(None if paid else row['payout_at'])
+    post['total_payout_value'] = _amount(row['payout'] if paid else 0)
+    post['curator_payout_value'] = _amount(0)
+    post['pending_payout_value'] = _amount(0 if paid else row['payout'])
+    post['promoted'] = _amount(row['promoted'])
+
+    post['replies'] = []
+    post['body_length'] = len(row['body'])
+    post['active_votes'] = _hydrate_active_votes(row['votes'])
+    #post['author_reputation'] = rep_to_raw(row['author_rep'])
+
+    post['legacy_id'] = row['legacy_id']
+
+    post['root_author'] = row['root_author']
+    post['root_permlink'] = row['root_permlink']
+
+    post['allow_replies'] = row['allow_replies']
+    post['allow_votes'] = row['allow_votes']
+    post['allow_curation_rewards'] = row['allow_curation_rewards']
+
+    if row['depth'] > 0:
+        post['parent_author'] = row['parent_author']
+        post['parent_permlink'] = row['parent_permlink']
+    else:
+        post['parent_author'] = ''
+        post['parent_permlink'] = row['category']
+
+    post['url'] = row['url']
+    post['root_title'] = row['root_title']
+    post['beneficiaries'] = row['beneficiaries']
+    post['max_accepted_payout'] = row['max_accepted_payout']
+    post['percent_steem_dollars'] = row['percent_steem_dollars']
+
+    if paid:
+        curator_payout = sbd_amount(row['curator_payout_value'])
+        post['curator_payout_value'] = _amount(curator_payout)
+        post['total_payout_value'] = _amount(row['payout'] - curator_payout)
+
+    # not used by condenser, but may be useful
+    # post['net_votes'] = post['total_votes'] - row['up_votes']
+
+    return post
diff --git a/hive/server/condenser_api/cursor.py b/hive/server/condenser_api/cursor.py
index 1b3fa5caa..04d65b452 100644
--- a/hive/server/condenser_api/cursor.py
+++ b/hive/server/condenser_api/cursor.py
@@ -4,6 +4,7 @@ from datetime import datetime
 from dateutil.relativedelta import relativedelta
 
 from hive.utils.normalize import rep_to_raw
+from json import loads
 
 # pylint: disable=too-many-lines
 
@@ -376,3 +377,39 @@ async def pids_by_replies_to_account(db, start_author: str, start_permlink: str
     """ % seek
 
     return await db.query_col(sql, parent=parent_account, start_id=start_id, limit=limit)
+
+async def get_accounts(db, accounts: list):
+    """Returns accounts data for accounts given in list"""
+    ret = []
+
+    names = ["'{}'".format(a) for a in accounts]
+    sql = """SELECT created_at, reputation, display_name, about,
+        location, website, profile_image, cover_image, followers, following,
+        proxy, post_count, proxy_weight, vote_weight, rank,
+        lastread_at, active_at, cached_at, raw_json
+        FROM hive_accounts WHERE name IN ({})""".format(",".join(names))
+
+    result = await db.query_all(sql)
+    for row in result:
+        account_data = dict(loads(row.raw_json))
+        account_data['created_at'] = row.created_at.isoformat()
+        account_data['reputation'] = row.reputation
+        account_data['display_name'] = row.display_name
+        account_data['about'] = row.about
+        account_data['location'] = row.location
+        account_data['website'] = row.website
+        account_data['profile_image'] = row.profile_image
+        account_data['cover_image'] = row.cover_image
+        account_data['followers'] = row.followers
+        account_data['following'] = row.following
+        account_data['proxy'] = row.proxy
+        account_data['post_count'] = row.post_count
+        account_data['proxy_weight'] = row.proxy_weight
+        account_data['vote_weight'] = row.vote_weight
+        account_data['rank'] = row.rank
+        account_data['lastread_at'] = row.lastread_at.isoformat()
+        account_data['active_at'] = row.active_at.isoformat()
+        account_data['cached_at'] = row.cached_at.isoformat()
+        ret.append(account_data)
+
+    return ret
diff --git a/hive/server/condenser_api/methods.py b/hive/server/condenser_api/methods.py
index fd76750ee..be188cbd7 100644
--- a/hive/server/condenser_api/methods.py
+++ b/hive/server/condenser_api/methods.py
@@ -1,5 +1,5 @@
 """Steemd/condenser_api compatibility layer API methods."""
-
+from json import loads
 from functools import wraps
 
 import hive.server.condenser_api.cursor as cursor
@@ -144,7 +144,10 @@ async def get_content_replies(context, author: str, permlink: str):
              --get_content_replies
              SELECT post_id, author, permlink, title, body, category, depth,
              promoted, payout, payout_at, is_paidout, children, votes,
-             created_at, updated_at, rshares, raw_json, json
+             created_at, updated_at, rshares, json,
+             legacy_id, parent_author, parent_permlink, curator_payout_value, 
+             root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+             allow_replies, allow_votes, allow_curation_rewards, url, root_title 
              FROM hive_posts_cache WHERE post_id IN (
              SELECT hp2.id FROM hive_posts hp2
              WHERE hp2.is_deleted = '0' AND
@@ -524,3 +527,12 @@ async def _get_blog(db, account: str, start_index: int, limit: int = None):
         idx -= 1
 
     return out
+
+@return_error_info
+async def get_accounts(context, accounts: list):
+    """Returns accounts data for accounts given in list"""
+    print("Hivemind native get_accounts")
+    assert accounts, "Empty parameters are not supported"
+    assert len(accounts) < 1000, "Query exceeds limit"
+
+    return await cursor.get_accounts(context['db'], accounts)
diff --git a/hive/server/condenser_api/objects.py b/hive/server/condenser_api/objects.py
index 725d804bb..838df98db 100644
--- a/hive/server/condenser_api/objects.py
+++ b/hive/server/condenser_api/objects.py
@@ -42,7 +42,10 @@ async def load_posts_keyed(db, ids, truncate_body=0):
     # fetch posts and associated author reps
     sql = """SELECT post_id, author, permlink, title, body, category, depth,
                     promoted, payout, payout_at, is_paidout, children, votes,
-                    created_at, updated_at, rshares, raw_json, json
+                    created_at, updated_at, rshares, json,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
                FROM hive_posts_cache WHERE post_id IN :ids"""
     result = await db.query_all(sql, ids=tuple(ids))
     author_reps = await _query_author_rep_map(db, result)
@@ -164,34 +167,35 @@ def _condenser_post_object(row, truncate_body=0):
     post['active_votes'] = _hydrate_active_votes(row['votes'])
     post['author_reputation'] = rep_to_raw(row['author_rep'])
 
-    # import fields from legacy object
-    assert row['raw_json']
-    assert len(row['raw_json']) > 32
-    raw_json = json.loads(row['raw_json'])
+    post['legacy_id'] = row['legacy_id']
+
+    post['root_author'] = row['root_author']
+    post['root_permlink'] = row['root_permlink']
+
+    post['allow_replies'] = row['allow_replies']
+    post['allow_votes'] = row['allow_votes']
+    post['allow_curation_rewards'] = row['allow_curation_rewards']
 
     if row['depth'] > 0:
-        post['parent_author'] = raw_json['parent_author']
-        post['parent_permlink'] = raw_json['parent_permlink']
+        post['parent_author'] = row['parent_author']
+        post['parent_permlink'] = row['parent_permlink']
     else:
         post['parent_author'] = ''
         post['parent_permlink'] = row['category']
 
-    post['url'] = raw_json['url']
-    post['root_title'] = raw_json['root_title']
-    post['beneficiaries'] = raw_json['beneficiaries']
-    post['max_accepted_payout'] = raw_json['max_accepted_payout']
-    post['percent_steem_dollars'] = raw_json['percent_steem_dollars']
+    post['url'] = row['url']
+    post['root_title'] = row['root_title']
+    post['beneficiaries'] = json.loads(row['beneficiaries'])
+    post['max_accepted_payout'] = row['max_accepted_payout']
+    post['percent_steem_dollars'] = row['percent_steem_dollars']
 
     if paid:
-        curator_payout = sbd_amount(raw_json['curator_payout_value'])
+        curator_payout = sbd_amount(row['curator_payout_value'])
         post['curator_payout_value'] = _amount(curator_payout)
         post['total_payout_value'] = _amount(row['payout'] - curator_payout)
 
     # not used by condenser, but may be useful
-    #post['net_votes'] = post['total_votes'] - row['up_votes']
-    #post['allow_replies'] = raw_json['allow_replies']
-    #post['allow_votes'] = raw_json['allow_votes']
-    #post['allow_curation_rewards'] = raw_json['allow_curation_rewards']
+    # post['net_votes'] = post['total_votes'] - row['up_votes']
 
     return post
 
diff --git a/hive/server/database_api/__init__.py b/hive/server/database_api/__init__.py
new file mode 100644
index 000000000..609e45795
--- /dev/null
+++ b/hive/server/database_api/__init__.py
@@ -0,0 +1 @@
+"""Hive database_api methods and support."""
diff --git a/hive/server/database_api/methods.py b/hive/server/database_api/methods.py
new file mode 100644
index 000000000..9aef5a97a
--- /dev/null
+++ b/hive/server/database_api/methods.py
@@ -0,0 +1,90 @@
+# pylint: disable=too-many-arguments,line-too-long,too-many-lines
+from hive.server.common.helpers import return_error_info, valid_limit
+from hive.server.common.objects import condenser_post_object
+
+@return_error_info
+async def list_comments(context, start: list, limit: int, order: str):
+    """Returns all comments, starting with the specified options."""
+    print("Hivemind native list_comments")
+    supported_order_list = ['by_cashout_time', 'by_permlink', 'by_root', 'by_parent']
+    assert order in supported_order_list, "Unsupported order, valid orders {}".format(supported_order_list)
+    limit = valid_limit(limit, 1000)
+    db = context['db']
+
+    comments = []
+    if order == 'by_cashout_time':
+        assert len(start) == 3, "Expecting three arguments"
+        payout_time = start[0]
+        author = start[1]
+        permlink = start[2]
+        post_id = 0
+        if author or permlink:
+            sql = """SELECT post_id FROM hive_posts_cache WHERE author >= :author AND permlink >= :permlink LIMIT 1"""
+            result = await db.query_row(sql, author=author, permlink=permlink)
+            post_id = result.post_id
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM hive_posts_cache WHERE payout_at >= :start AND post_id >= :post_id ORDER BY payout_at ASC, post_id ASC LIMIT :limit"""
+        result = await db.query_all(sql, start=payout_time, limit=limit, post_id=post_id)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    elif order == 'by_permlink':
+        assert len(start) == 2, "Expecting two arguments"
+        author = start[0]
+        permlink = start[1]
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM hive_posts_cache WHERE author >= :author AND permlink >= :permlink ORDER BY author ASC, permlink ASC, post_id ASC LIMIT :limit"""
+        result = await db.query_all(sql, author=author, permlink=permlink, limit=limit)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    elif order == 'by_root':
+        assert len(start) == 4, "Expecting 4 arguments"
+        root_author = start[0]
+        root_permlink = start[1]
+
+        child_author = start[2]
+        child_permlink = start[3]
+
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM get_rows_by_root(:root_author, :root_permlink, :child_author, :child_permlink) ORDER BY post_id ASC LIMIT :limit"""
+        result = await db.query_all(sql, root_author=root_author, root_permlink=root_permlink, child_author=child_author, child_permlink=child_permlink, limit=limit)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    elif order == 'by_parent':
+        assert len(start) == 4, "Expecting 4 arguments"
+
+        parent_author = start[0]
+        parent_permlink = start[1]
+
+        child_author = start[2]
+        child_permlink = start[3]
+
+        sql = """SELECT post_id, community_id, author, permlink, title, body, category, depth,
+                    promoted, payout, payout_at, is_paidout, children, votes,
+                    created_at, updated_at, rshares, json,
+                    is_hidden, is_grayed, total_votes, flag_weight,
+                    legacy_id, parent_author, parent_permlink, curator_payout_value, 
+                    root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+                    allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+               FROM get_rows_by_parent(:parent_author, :parent_permlink, :child_author, :child_permlink) LIMIT :limit"""
+        result = await db.query_all(sql, parent_author=parent_author, parent_permlink=parent_permlink, child_author=child_author, child_permlink=child_permlink, limit=limit)
+        for row in result:
+            comments.append(condenser_post_object(dict(row)))
+    return comments
diff --git a/hive/server/serve.py b/hive/server/serve.py
index a43c8e8da..873387dba 100644
--- a/hive/server/serve.py
+++ b/hive/server/serve.py
@@ -26,6 +26,8 @@ from hive.server.hive_api import community as hive_api_community
 from hive.server.hive_api import notify as hive_api_notify
 from hive.server.hive_api import stats as hive_api_stats
 
+from hive.server.database_api import methods as database_api
+
 from hive.server.db import Db
 
 # pylint: disable=too-many-lines
@@ -73,6 +75,7 @@ def build_methods():
         condenser_api.get_blog_entries,
         condenser_api.get_account_reputations,
         condenser_api.get_reblogged_by,
+        condenser_api.get_accounts
     )})
 
     # dummy methods -- serve informational error
@@ -135,6 +138,11 @@ def build_methods():
         hive_api_community.list_all_subscriptions,
     )})
 
+    # database_api methods
+    methods.add(**{
+        'database_api.list_comments' : database_api.list_comments
+    })
+
     return methods
 
 def truncate_response_log(logger):
diff --git a/hive/utils/post.py b/hive/utils/post.py
index 0d2b316f9..9f299710f 100644
--- a/hive/utils/post.py
+++ b/hive/utils/post.py
@@ -44,6 +44,7 @@ def post_to_internal(post, post_id, level='insert', promoted=None):
     # always write, unless simple vote update
     if level in ['insert', 'payout', 'update']:
         basic = post_basic(post)
+        legacy_data = post_legacy(post)
         values.extend([
             ('community_id',  post['community_id']), # immutable*
             ('created_at',    post['created']),    # immutable*
@@ -58,7 +59,21 @@ def post_to_internal(post, post_id, level='insert', promoted=None):
             ('is_full_power', basic['is_full_power']),
             ('is_paidout',    basic['is_paidout']),
             ('json',          json.dumps(basic['json_metadata'])),
-            ('raw_json',      json.dumps(post_legacy(post))),
+            #('raw_json',      json.dumps(legacy_data)),
+            ('legacy_id',               legacy_data['id']),
+            ('parent_author',           legacy_data['parent_author']),
+            ('parent_permlink',         legacy_data['parent_permlink']),
+            ('curator_payout_value',    legacy_data['curator_payout_value']),
+            ('root_author',             legacy_data['root_author']),
+            ('root_permlink',           legacy_data['root_permlink']),
+            ('max_accepted_payout',     legacy_data['max_accepted_payout']),
+            ('percent_steem_dollars',   legacy_data['percent_steem_dollars']),
+            ('allow_replies',           legacy_data['allow_replies']),
+            ('allow_votes',             legacy_data['allow_votes']),
+            ('allow_curation_rewards',   legacy_data['allow_curation_rewards']),
+            ('beneficiaries',           legacy_data['beneficiaries']),
+            ('url',                     legacy_data['url']),
+            ('root_title',              legacy_data['root_title']),
         ])
 
     # if there's a pending promoted value to write, pull it out
diff --git a/scripts/update_hivemind_db.sql b/scripts/update_hivemind_db.sql
new file mode 100644
index 000000000..d15a676df
--- /dev/null
+++ b/scripts/update_hivemind_db.sql
@@ -0,0 +1,134 @@
+-- This script will upgrade hivemind database to new version
+-- Authors: Dariusz Kędzierski
+-- Created: 26-04-2020
+
+CREATE TABLE IF NOT EXISTS hive_db_version (
+  version VARCHAR(50) PRIMARY KEY,
+  notes VARCHAR(1024)
+);
+
+-- Upgrade to version 1.0
+-- in this version we will move data from raw_json into separate columns
+DO $$
+  DECLARE
+    -- We will perform our operations in baches to conserve memory and CPU
+      batch_size INTEGER := 100000;
+      
+      -- Get last id from hive_posts_cache
+      last_id INTEGER := 0;
+
+      current_id INTEGER := 0;
+
+      row RECORD;
+  BEGIN
+    RAISE NOTICE 'Upgrading database to version 1.0';
+    IF NOT EXISTS (SELECT version FROM hive_db_version WHERE version = '1.0')
+    THEN
+      -- Update version info
+      INSERT INTO hive_db_version (version, notes) VALUES ('1.0', 'https://gitlab.syncad.com/blocktrades/hivemind/issues/5');
+      -- Alter hive_comments_cache and add columns originally stored in raw_json
+      RAISE NOTICE 'Attempting to alter table hive_posts_cache';
+      ALTER TABLE hive_posts_cache 
+        ADD COLUMN legacy_id INT NOT NULL DEFAULT -1,
+        ADD COLUMN parent_author VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN parent_permlink VARCHAR(255) NOT NULL DEFAULT '',
+        ADD COLUMN curator_payout_value VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN root_author VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN root_permlink VARCHAR(255) NOT NULL DEFAULT '',
+        ADD COLUMN max_accepted_payout VARCHAR(16) NOT NULL DEFAULT '',
+        ADD COLUMN percent_steem_dollars INT NOT NULL DEFAULT -1,
+        ADD COLUMN allow_replies BOOLEAN NOT NULL DEFAULT TRUE,
+        ADD COLUMN allow_votes BOOLEAN NOT NULL DEFAULT TRUE,
+        ADD COLUMN allow_curation_rewards BOOLEAN NOT NULL DEFAULT TRUE,
+        ADD COLUMN beneficiaries JSON NOT NULL DEFAULT '[]',
+        ADD COLUMN url TEXT NOT NULL DEFAULT '',
+        ADD COLUMN root_title VARCHAR(255) NOT NULL DEFAULT '';
+      RAISE NOTICE 'Done...';
+      
+      -- Helper type for use with json_populate_record
+      CREATE TYPE legacy_comment_type AS (
+        id INT,
+        parent_author VARCHAR(16),
+        parent_permlink VARCHAR(255),
+        curator_payout_value VARCHAR(16),
+        root_author VARCHAR(16),
+        root_permlink VARCHAR(255),
+        max_accepted_payout VARCHAR(16),
+        percent_steem_dollars INT,
+        allow_replies BOOLEAN,
+        allow_votes BOOLEAN,
+        allow_curation_rewards BOOLEAN,
+        beneficiaries JSON,
+        url TEXT,
+        root_title VARCHAR(255)  
+      );
+
+      SELECT INTO last_id post_id FROM hive_posts_cache ORDER BY post_id DESC LIMIT 1;
+
+      RAISE NOTICE 'Attempting to parse % rows in batches %', last_id, batch_size;
+      
+      WHILE current_id < last_id LOOP
+        RAISE NOTICE 'Processing batch: % <= post_id < % (of %)', current_id, current_id + batch_size, last_id;
+        FOR row IN SELECT post_id, raw_json FROM hive_posts_cache WHERE post_id >= current_id AND post_id < current_id + batch_size LOOP
+          UPDATE hive_posts_cache SET (
+            legacy_id, parent_author, parent_permlink, curator_payout_value, 
+            root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+            allow_replies, allow_votes, allow_curation_rewards, url, root_title
+          ) = (
+            SELECT id, parent_author, parent_permlink, curator_payout_value, 
+              root_author, root_permlink, max_accepted_payout, percent_steem_dollars, 
+              allow_replies, allow_votes, allow_curation_rewards, url, root_title 
+            FROM json_populate_record(null::legacy_comment_type, row.raw_json::json)
+          )
+          WHERE post_id = row.post_id;
+          current_id := row.post_id;
+        END LOOP;
+      END LOOP;
+      RAISE NOTICE 'Done...';
+      -- Creating indexes
+      RAISE NOTICE 'Creating author_permlink_idx';
+      CREATE INDEX IF NOT EXISTS author_permlink_idx ON hive_posts_cache (author ASC, permlink ASC);
+      RAISE NOTICE 'Creating root_author_permlink_idx';
+      CREATE INDEX IF NOT EXISTS root_author_permlink_idx ON hive_posts_cache (root_author ASC, root_permlink ASC);
+      RAISE NOTICE 'Creating parent_permlink_idx';
+      CREATE INDEX IF NOT EXISTS parent_author_permlink_idx ON hive_posts_cache (parent_author ASC, parent_permlink ASC);
+      RAISE NOTICE 'Creating author_permlink_post_id_idx';
+      CREATE INDEX IF NOT EXISTS author_permlink_post_id_idx ON hive_posts_cache (author ASC, permlink ASC, post_id ASC);
+      RAISE NOTICE 'Creating post_id_author_permlink_idx';
+      CREATE INDEX IF NOT EXISTS post_id_author_permlink_idx ON hive_posts_cache (post_id ASC, author ASC, permlink ASC);
+
+      -- Creating functions
+      -- for list_comments by_root
+      CREATE OR REPLACE FUNCTION get_rows_by_root(root_a VARCHAR, root_p VARCHAR, child_a VARCHAR, child_p VARCHAR, query_limit INT DEFAULT 1000) RETURNS SETOF hive_posts_cache AS $$
+      DECLARE
+        root_row hive_posts_cache;
+        child_row hive_posts_cache;
+        query_count INT := 0;
+      BEGIN
+        FOR root_row IN SELECT * FROM hive_posts_cache WHERE author >= root_a AND permlink >= root_p ORDER BY post_id ASC, author ASC, permlink ASC
+        LOOP
+          EXIT WHEN query_count >= query_limit;
+          FOR child_row IN SELECT * FROM hive_posts_cache WHERE author >= child_a AND permlink >= child_p AND root_author = root_row.root_author AND root_permlink = root_row.root_permlink ORDER BY post_id ASC, author ASC, permlink ASC
+          LOOP 
+            EXIT WHEN query_count >= query_limit;
+            RETURN NEXT child_row;
+            query_count := query_count + 1;
+          END LOOP;
+        END LOOP;
+        RETURN;
+      END
+      $$ LANGUAGE plpgsql;
+      -- for list_comments by_parent
+      CREATE OR REPLACE FUNCTION get_rows_by_parent(parent_a VARCHAR, parent_p VARCHAR, child_a VARCHAR, child_p VARCHAR, query_limit INT DEFAULT 1000) RETURNS SETOF hive_posts_cache AS $$
+      DECLARE
+        child_id INT := 0;
+      BEGIN
+        SELECT INTO child_id post_id FROM hive_posts_cache WHERE author >= child_a AND permlink >= child_p ORDER BY post_id ASC LIMIT 1;
+        RETURN QUERY SELECT * FROM hive_posts_cache WHERE parent_author = parent_a AND parent_permlink = parent_p AND post_id >= child_id ORDER BY post_id ASC, author ASC, permlink ASC LIMIT query_limit;
+      END
+      $$ LANGUAGE plpgsql;
+    ELSE
+      RAISE NOTICE 'Database already in version 1.0';
+    END IF;
+  END
+$$;
diff --git a/tests/server/test_server_database_api.py b/tests/server/test_server_database_api.py
new file mode 100644
index 000000000..9e8c1691d
--- /dev/null
+++ b/tests/server/test_server_database_api.py
@@ -0,0 +1,52 @@
+import pytest
+from hive.server.database_api.methods import list_comments
+from hive.steem.client import SteemClient
+
+@pytest.fixture
+def client():
+  return SteemClient(url='https://api.hive.blog')
+
+def test_list_comments_by_cashout_time(client):
+  reference_data = await client.list_comments({"start":["1990-01-01T00:00:00","steemit","firstpost"],"limit":10,"order":"by_cashout_time"})
+  test_data = await list_comments(["1990-01-01T00:00:00","steemit","firstpost"],10,"by_cashout_time")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
+    assert reference_data[idx]['cashout_time'] == test_data[idx]['payout_at']
+
+def test_list_comments_by_permlink(client):
+  reference_data = await client.list_comments({"start":["steemit","firstpost"],"limit":10,"order":"by_permlink"})
+  test_data = await list_comments(["steemit","firstpost"],10,"by_permlink")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
+
+def test_list_comments_by_root(client):
+  reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_root"})
+  test_data = await list_comments(["steemit","firstpost","",""],10,"by_root")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink','root_author','root_permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
+
+def test_list_comments_by_parent(client):
+  reference_data = await client.list_comments({"start":["steemit","firstpost","",""],"limit":10,"order":"by_parent"})
+  test_data = await list_comments(["steemit","firstpost","",""],10,"by_parent")
+  assert reference_data
+  assert test_data
+  assert len(reference_data) == len(test_data)
+  to_compare = keys = ['author','permlink','parent_author','parent_permlink']
+  for idx in range(len(reference_data)):
+    for key in to_compare:
+      assert reference_data[idx][key] == test_data[idx][key]
-- 
GitLab