Skip to content
Snippets Groups Projects
Commit 3e8b1b76 authored by Marcin's avatar Marcin
Browse files

indexing tags only in sql

parent 829e86e8
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !377. Comments created here will be created in the context of that merge request.
DROP FUNCTION if exists process_hive_post_operation(character varying,character varying,character varying,character varying,timestamp without time zone,timestamp without time zone)
DROP FUNCTION IF EXISTS prepare_tags;
CREATE OR REPLACE FUNCTION prepare_tags( in _raw_tags VARCHAR[] )
RETURNS SETOF hive_tag_data.id%TYPE
LANGUAGE 'plpgsql'
VOLATILE
AS
$function$
DECLARE
__i INTEGER;
__tags VARCHAR[];
__tag TEXT;
BEGIN
FOR __i IN 1 .. ARRAY_UPPER( _raw_tags, 1)
LOOP
__tag = LEFT(LOWER(REGEXP_REPLACE( _raw_tags[ __i ], '[#\s]', '', 'g' )),32);
CONTINUE WHEN __tag = '' OR __tags @> ARRAY_APPEND(ARRAY[]::VARCHAR[], CAST(__tag as VARCHAR) );
__tags = ARRAY_APPEND( __tags, CAST(__tag as VARCHAR) );
END LOOP;
RETURN QUERY INSERT INTO
hive_tag_data AS htd(tag)
SELECT UNNEST( __tags )
ON CONFLICT("tag") DO UPDATE SET tag=EXCLUDED.tag --trick to always return id
RETURNING htd.id;
END
$function$;
DROP FUNCTION IF EXISTS process_hive_post_operation;
;
CREATE OR REPLACE FUNCTION process_hive_post_operation(
in _author hive_accounts.name%TYPE,
......@@ -7,7 +34,9 @@ CREATE OR REPLACE FUNCTION process_hive_post_operation(
in _parent_permlink hive_permlink_data.permlink%TYPE,
in _date hive_posts.created_at%TYPE,
in _community_support_start_date hive_posts.created_at%TYPE,
in _block_num hive_posts.block_num%TYPE)
in _block_num hive_posts.block_num%TYPE,
in _metadata_tags VARCHAR[]
)
RETURNS TABLE (is_new_post boolean, id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE,
post_category hive_category_data.category%TYPE, parent_id hive_posts.parent_id%TYPE, community_id hive_posts.community_id%TYPE,
is_valid hive_posts.is_valid%TYPE, is_muted hive_posts.is_muted%TYPE, depth hive_posts.depth%TYPE)
......@@ -28,7 +57,9 @@ if _parent_author != '' THEN
RETURN QUERY INSERT INTO hive_posts as hp
(parent_id, depth, community_id, category_id,
root_id, is_muted, is_valid,
author_id, permlink_id, created_at, updated_at, sc_hot, sc_trend, active, payout_at, cashout_time, counter_deleted, block_num, block_num_created)
author_id, permlink_id, created_at, updated_at, sc_hot, sc_trend,
active, payout_at, cashout_time, counter_deleted, block_num, block_num_created
)
SELECT php.id AS parent_id, php.depth + 1 AS depth,
(CASE
WHEN _date > _community_support_start_date THEN
......@@ -74,7 +105,9 @@ ELSE
RETURN QUERY INSERT INTO hive_posts as hp
(parent_id, depth, community_id, category_id,
root_id, is_muted, is_valid,
author_id, permlink_id, created_at, updated_at, sc_hot, sc_trend, active, payout_at, cashout_time, counter_deleted, block_num, block_num_created)
author_id, permlink_id, created_at, updated_at, sc_hot, sc_trend,
active, payout_at, cashout_time, counter_deleted, block_num, block_num_created,
tags_ids)
SELECT 0 AS parent_id, 0 AS depth,
(CASE
WHEN _date > _community_support_start_date THEN
......@@ -90,8 +123,12 @@ ELSE
calculate_time_part_of_trending(_date) AS sc_trend,
_date AS active, (_date + INTERVAL '7 days') AS payout_at, (_date + INTERVAL '7 days') AS cashout_time, 0
, _block_num as block_num, _block_num as block_num_created
, (
SELECT ARRAY_AGG( prepare_tags )
FROM prepare_tags( ARRAY_APPEND(_metadata_tags, _parent_permlink ) )
) as tags_ids
FROM hive_accounts ha,
hive_permlink_data hpd
hive_permlink_data hpd
WHERE ha.name = _author and hpd.permlink = _permlink
ON CONFLICT ON CONSTRAINT hive_posts_ux1 DO UPDATE SET
......@@ -100,7 +137,8 @@ ELSE
--- post edit part
updated_at = _date,
active = _date,
block_num = _block_num
block_num = _block_num,
tags_ids = EXCLUDED.tags_ids
RETURNING (xmax = 0) as is_new_post, hp.id, hp.author_id, hp.permlink_id, _parent_permlink as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth
;
......@@ -138,29 +176,3 @@ BEGIN
END
$function$
;
DROP FUNCTION IF EXISTS add_tags;
CREATE FUNCTION add_tags( in _post_id hive_posts.id%TYPE, in _tags VARCHAR[] )
RETURNS void
LANGUAGE 'plpgsql'
VOLATILE
AS
$function$
DECLARE
__tags_ids INTEGER[];
BEGIN
WITH tags_ids(id) AS
(
INSERT INTO
hive_tag_data AS htd(tag)
SELECT UNNEST( _tags )
ON CONFLICT("tag") DO UPDATE SET tag=EXCLUDED.tag --trick to always return id
RETURNING htd.id
)
SELECT ARRAY_AGG( id ) INTO __tags_ids FROM tags_ids;
UPDATE hive_posts hp
SET tags_ids = __tags_ids
WHERE hp.id = _post_id;
END
$function$
......@@ -89,13 +89,27 @@ class Posts(DbAdapterHolder):
def comment_op(cls, op, block_date):
"""Register new/edited/undeleted posts; insert into feed cache."""
md = {}
# At least one case where jsonMetadata was double-encoded: condenser#895
# jsonMetadata = JSON.parse(jsonMetadata);
try:
md = loads(op['json_metadata'])
if not isinstance(md, dict):
md = {}
except Exception:
pass
tags = []
if md and 'tags' in md and isinstance(md['tags'], list):
tags = md['tags']
sql = """
SELECT is_new_post, id, author_id, permlink_id, post_category, parent_id, community_id, is_valid, is_muted, depth
FROM process_hive_post_operation((:author)::varchar, (:permlink)::varchar, (:parent_author)::varchar, (:parent_permlink)::varchar, (:date)::timestamp, (:community_support_start_date)::timestamp, (:block_num)::integer);
FROM process_hive_post_operation((:author)::varchar, (:permlink)::varchar, (:parent_author)::varchar, (:parent_permlink)::varchar, (:date)::timestamp, (:community_support_start_date)::timestamp, (:block_num)::integer, (:tags)::VARCHAR[]);
"""
row = DB.query_row(sql, author=op['author'], permlink=op['permlink'], parent_author=op['parent_author'],
parent_permlink=op['parent_permlink'], date=block_date, community_support_start_date=START_DATE, block_num=op['block_num'])
parent_permlink=op['parent_permlink'], date=block_date, community_support_start_date=START_DATE, block_num=op['block_num'], tags=tags)
result = dict(row)
......@@ -104,16 +118,6 @@ class Posts(DbAdapterHolder):
cls._set_id(op['author']+'/'+op['permlink'], result['id'])
md = {}
# At least one case where jsonMetadata was double-encoded: condenser#895
# jsonMetadata = JSON.parse(jsonMetadata);
try:
md = loads(op['json_metadata'])
if not isinstance(md, dict):
md = {}
except Exception:
pass
img_url = None
if 'image' in md:
img_url = md['image']
......@@ -142,18 +146,6 @@ class Posts(DbAdapterHolder):
# log.info("Adding author: {} permlink: {}".format(op['author'], op['permlink']))
PostDataCache.add_data(result['id'], post_data, is_new_post)
if not result['depth']:
tags = [result['post_category']]
if md and 'tags' in md and isinstance(md['tags'], list):
tags = tags + md['tags']
tags = map(lambda tag: (str(tag) or '').strip('# ').lower()[:32], tags)
tags = filter(None, tags)
from funcy.seqs import distinct
tags = list(distinct(tags))[:5]
sql = """SELECT add_tags( (:post_id)::INTEGER, (:tags)::VARCHAR[] )"""
DB.query_row( sql, post_id = result['id'], tags=tags );
if not DbState.is_initial_sync():
if error:
author_id = result['author_id']
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment