Skip to content
Snippets Groups Projects
Commit 94af6155 authored by Bartek Wrona's avatar Bartek Wrona
Browse files

Improved post importing into DB - eliminated most of additional queries.

parent 689ffc0d
No related branches found
No related tags found
5 merge requests!456Release candidate v1 24,!230Setup monitoring with pghero,!135Enable postgres monitoring on CI server,!16Dk issue 3 concurrent block query rebase,!15Dk issue 3 concurrent block query
......@@ -374,19 +374,113 @@ def setup(db):
sqls = [
"INSERT INTO hive_state (block_num, db_version, steem_per_mvest, usd_per_steem, sbd_per_steem, dgpo) VALUES (0, %d, 0, 0, 0, '')" % DB_VERSION,
"INSERT INTO hive_blocks (num, hash, created_at) VALUES (0, '0000000000000000000000000000000000000000', '2016-03-24 16:04:57')",
"INSERT INTO hive_permlink_data (id, permlink) VALUES (0, '')",
"INSERT INTO hive_category_data (id, category) VALUES (0, '')",
"INSERT INTO hive_accounts (id, name, created_at) VALUES (0, '', '1990-01-01T00:00:00')",
"INSERT INTO hive_accounts (name, created_at) VALUES ('miners', '2016-03-24 16:05:00')",
"INSERT INTO hive_accounts (name, created_at) VALUES ('null', '2016-03-24 16:05:00')",
"INSERT INTO hive_accounts (name, created_at) VALUES ('temp', '2016-03-24 16:05:00')",
"INSERT INTO hive_accounts (name, created_at) VALUES ('initminer', '2016-03-24 16:05:00')",
"INSERT INTO hive_accounts (name, created_at) VALUES ('', '1990-01-01T00:00:00') ON CONFLICT (name) DO NOTHING",
"INSERT INTO hive_permlink_data (permlink) VALUES ('') ON CONFLICT (permlink) DO NOTHING",
"INSERT INTO hive_category_data (category) VALUES ('') ON CONFLICT (category) DO NOTHING"]
"""INSERT INTO public.hive_posts(
id, parent_id, author_id, permlink_id, category_id, community_id, parent_author_id, parent_permlink_id, root_author_id, root_permlink_id, created_at, depth)
VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, now(), 0);"""]
for sql in sqls:
db.query(sql)
sql = "CREATE INDEX hive_communities_ft1 ON hive_communities USING GIN (to_tsvector('english', title || ' ' || about))"
db.query(sql)
sql = """
DROP FUNCTION if exists add_hive_post(character varying,character varying,character varying,character varying,timestamp without time zone,timestamp without time zone)
;
CREATE OR REPLACE FUNCTION add_hive_post(
in _author hive_accounts.name%TYPE,
in _permlink hive_permlink_data.permlink%TYPE,
in _parent_author hive_accounts.name%TYPE,
in _parent_permlink hive_permlink_data.permlink%TYPE,
in _date hive_posts.created_at%TYPE,
in _community_support_start_date hive_posts.created_at%TYPE)
RETURNS TABLE (id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE,
parent_id hive_posts.parent_id%TYPE, community_id hive_posts.community_id%TYPE,
is_valid hive_posts.is_valid%TYPE, is_muted hive_posts.is_muted%TYPE, depth hive_posts.depth%TYPE)
LANGUAGE plpgsql
AS
$function$
BEGIN
INSERT INTO hive_permlink_data
(permlink)
values
(
_permlink
)
ON CONFLICT DO NOTHING
;
if _parent_author != '' THEN
RETURN QUERY INSERT INTO hive_posts
(parent_id, parent_author_id, parent_permlink_id, depth, community_id,
category_id,
root_author_id, root_permlink_id,
is_muted, is_valid,
author_id, permlink_id, created_at)
SELECT php.id AS parent_id, php.author_id as parent_author_id,
php.permlink_id as parent_permlink_id, php.depth + 1 as depth,
(CASE
WHEN _date > _community_support_start_date THEN
COALESCE(php.community_id, (select hc.id from hive_communities hc where hc.name = _parent_permlink))
ELSE NULL
END) as community_id,
COALESCE(php.category_id, (select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink)) as category_id,
php.root_author_id as root_author_id,
php.root_permlink_id as root_permlink_id,
php.is_muted as is_muted, php.is_valid as is_valid,
ha.id as author_id, hpd.id as permlink_id, _date as created_at
FROM hive_accounts ha,
hive_permlink_data hpd,
hive_posts php
INNER JOIN hive_accounts pha ON pha.id = php.author_id
INNER JOIN hive_permlink_data phpd ON phpd.id = php.permlink_id
WHERE pha.name = _parent_author and phpd.permlink = _parent_permlink AND
ha.name = _author and hpd.permlink = _permlink
RETURNING hive_posts.id, hive_posts.author_id, hive_posts.permlink_id, hive_posts.parent_id, hive_posts.community_id, hive_posts.is_valid, hive_posts.is_muted, hive_posts.depth
;
ELSE
INSERT INTO hive_category_data
(category)
VALUES (_parent_permlink)
ON CONFLICT (category) DO NOTHING
;
RETURN QUERY INSERT INTO hive_posts
(parent_id, parent_author_id, parent_permlink_id, depth, community_id,
category_id,
root_author_id, root_permlink_id,
is_muted, is_valid,
author_id, permlink_id, created_at)
SELECT 0 AS parent_id, 0 as parent_author_id, 0 as parent_permlink_id, 0 as depth,
(CASE
WHEN _date > _community_support_start_date THEN
(select hc.id from hive_communities hc where hc.name = _parent_permlink)
ELSE NULL
END) as community_id,
(select hcg.id from hive_category_data hcg where hcg.category = _parent_permlink) as category_id,
ha.id as root_author_id, -- use author_id as root one if no parent
hpd.id as root_permlink_id, -- use perlink_id as root one if no parent
false as is_muted, true as is_valid,
ha.id as author_id, hpd.id as permlink_id, _date as created_at
FROM hive_accounts ha,
hive_permlink_data hpd
WHERE ha.name = _author and hpd.permlink = _permlink
RETURNING hive_posts.id, hive_posts.author_id, hive_posts.permlink_id, hive_posts.parent_id, hive_posts.community_id, hive_posts.is_valid, hive_posts.is_muted, hive_posts.depth;
END IF;
END
$function$
"""
# db.query(sql)
def reset_autovac(db):
"""Initializes/resets per-table autovacuum/autoanalyze params.
......
......@@ -39,12 +39,13 @@ class Posts:
(
SELECT id, parent_id, 1 AS level from hive_posts WHERE id = (SELECT hp.id
FROM hive_posts hp
LEFT JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
LEFT JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
WHERE ha_a.name = :a AND hpd_p.permlink = :p)
UNION ALL
SELECT t.id, t.parent_id, level + 1 FROM parent
INNER JOIN hive_posts t ON t.id = parent.parent_id
WHERE parent.parent_id != 0
)
SELECT id FROM parent ORDER BY level DESC LIMIT 1"""
_id = DB.query_one(sql, a=author, p=permlink)
......@@ -63,8 +64,8 @@ class Posts:
sql = """
SELECT hp.id
FROM hive_posts hp
LEFT JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
LEFT JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
WHERE ha_a.name = :a AND hpd_p.permlink = :p
"""
_id = DB.query_one(sql, a=author, p=permlink)
......@@ -199,62 +200,38 @@ class Posts:
# * obtain id
# * insert post content data
# add permlinks to permlink table
for permlink in ['permlink', 'parent_permlink', 'root_permlink']:
if permlink in op:
sql = """
INSERT INTO hive_permlink_data (permlink)
VALUES (:permlink)
ON CONFLICT (permlink) DO NOTHING"""
DB.query(sql, permlink=op[permlink])
sql = """
SELECT id, author_id, permlink_id, parent_id, community_id, is_valid, is_muted, depth
FROM add_hive_post(:author, :permlink, :parent_author, :parent_permlink, :date, :community_support_start_date);
"""
post = cls._build_post(op, date)
row = DB.query_row(sql, author=op['author'], permlink=op['permlink'], parent_author=op['parent_author'],
parent_permlink=op['parent_permlink'], date=date, community_support_start_date=START_DATE)
# add category to category table
sql = """
INSERT INTO hive_category_data (category)
VALUES (:category)
ON CONFLICT (category) DO NOTHING"""
DB.query(sql, category=post['category'])
result = dict(row)
sql = """
INSERT INTO hive_posts (parent_id, author_id, permlink_id,
category_id, community_id, created_at, depth, is_muted,
is_valid, parent_author_id, parent_permlink_id, root_author_id, root_permlink_id)
VALUES (:parent_id,
(SELECT id FROM hive_accounts WHERE name = :author),
(SELECT id FROM hive_permlink_data WHERE permlink = :permlink),
(SELECT id FROM hive_category_data WHERE category = :category),
:community_id, :date, :depth,
:is_muted, :is_valid,
(SELECT id FROM hive_accounts WHERE name = :parent_author),
(SELECT id FROM hive_permlink_data WHERE permlink = :parent_permlink),
(SELECT id FROM hive_accounts WHERE name = :root_author),
(SELECT id FROM hive_permlink_data WHERE permlink = :root_permlink)
)"""
sql += ";SELECT currval(pg_get_serial_sequence('hive_posts','id'))"
result = DB.query(sql, **post)
post['id'] = int(list(result)[0][0])
cls._set_id(op['author']+'/'+op['permlink'], post['id'])
# TODO we need to enhance checking related community post validation and honor is_muted.
error = cls._verify_post_against_community(op, result['community_id'], result['is_valid'], result['is_muted'])
cls._set_id(op['author']+'/'+op['permlink'], result['id'])
# add content data to hive_post_data
sql = """
INSERT INTO hive_post_data (id, title, preview, img_url, body, json)
VALUES (:id, :title, :preview, :img_url, :body, :json)"""
DB.query(sql, id=post['id'], title=op['title'],
DB.query(sql, id=result['id'], title=op['title'],
preview=op['preview'] if 'preview' in op else "",
img_url=op['img_url'] if 'img_url' in op else "",
body=op['body'], json=op['json_metadata'] if op['json_metadata'] else '{}')
if not DbState.is_initial_sync():
if post['error']:
author_id = Accounts.get_id(post['author'])
if error:
author_id = result.author_id
Notify('error', dst_id=author_id, when=date,
post_id=post['id'], payload=post['error']).write()
post_id=result['id'], payload=error).write()
if op['parent_author']: # update parent's child count
cls.update_child_count(post['parent_id'])
cls._insert_feed_cache(post)
cls.update_child_count(result['parent_id'])
cls._insert_feed_cache4(result['depth'], result['id'], result['author_id'], date)
@classmethod
def update_child_count(cls, parent_id, op='+'):
......@@ -455,8 +432,8 @@ class Posts:
hpd_p.permlink as permlink
FROM
hive_posts hp
LEFT JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
LEFT JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
WHERE
hp.id = (SELECT parent_id FROM hive_posts WHERE id = :child_id)"""
result = DB.query_row(sql, child_id=child_id)
......@@ -468,7 +445,23 @@ class Posts:
"""Insert the new post into feed cache if it's not a comment."""
if not post['depth']:
account_id = Accounts.get_id(post['author'])
FeedCache.insert(post['id'], account_id, post['date'])
_insert_feed_cache4(cls, post['depth'], post['id'], account_id, post['date'])
@classmethod
def _insert_feed_cache4(cls, post_depth, post_id, author_id, post_date):
"""Insert the new post into feed cache if it's not a comment."""
if not post_depth:
FeedCache.insert(post_id, account_id, post_date)
@classmethod
def _verify_post_against_community(cls, op, community_id, is_valid, is_muted):
error = None
if community_id and is_valid and not Community.is_post_valid(community_id, op):
error = 'not authorized'
#is_valid = False # TODO: reserved for future blacklist status?
is_muted = True
return error
@classmethod
def _build_post(cls, op, date, pid=None):
......@@ -503,7 +496,7 @@ class Posts:
sql = """
SELECT depth, hcd.category as category, community_id, is_valid, is_muted
FROM hive_posts hp
LEFT JOIN hive_category_data hcd ON hcd.id = hp.category_id
INNER JOIN hive_category_data hcd ON hcd.id = hp.category_id
WHERE hp.id = :id"""
(parent_depth, category, community_id, is_valid,
is_muted) = DB.query_row(sql, id=parent_id)
......@@ -517,8 +510,8 @@ class Posts:
ha_a.name as author, hpd_p.permlink as permlink
FROM
hive_posts hp
LEFT JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
LEFT JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
INNER JOIN hive_accounts ha_a ON ha_a.id = hp.author_id
INNER JOIN hive_permlink_data hpd_p ON hpd_p.id = hp.permlink_id
WHERE
hp.id = :id"""
root_author, root_permlink = DB.query_row(sql, id=root_id)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment