Skip to content
Snippets Groups Projects
Commit 0f191353 authored by Marcin's avatar Marcin
Browse files

GIN index uses to find posts by tag

-remove hive_post_tags table
-extend hive_posts for a column with an arrays of tags id
-tags are added next after a post, not in paralles as it was previously
-fix bug with not removing tags for a post whose author removed tags
during edition
parent 317ba694
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !377. Comments created here will be created in the context of that merge request.
...@@ -119,6 +119,7 @@ class DbState: ...@@ -119,6 +119,7 @@ class DbState:
'hive_posts_updated_at_idx', 'hive_posts_updated_at_idx',
'hive_posts_payout_plus_pending_payout_id_idx', 'hive_posts_payout_plus_pending_payout_id_idx',
'hive_posts_category_id_payout_plus_pending_payout_depth_idx', 'hive_posts_category_id_payout_plus_pending_payout_depth_idx',
'hive_posts_tags_ids_idx',
'hive_posts_api_helper_author_s_permlink_idx', 'hive_posts_api_helper_author_s_permlink_idx',
...@@ -130,8 +131,6 @@ class DbState: ...@@ -130,8 +131,6 @@ class DbState:
'hive_communities_block_num_idx', 'hive_communities_block_num_idx',
'hive_reblogs_created_at_idx', 'hive_reblogs_created_at_idx',
'hive_post_tags_tag_id_idx',
'hive_votes_voter_id_post_id_idx', 'hive_votes_voter_id_post_id_idx',
'hive_votes_post_id_voter_id_idx', 'hive_votes_post_id_voter_id_idx',
......
...@@ -128,6 +128,7 @@ def build_metadata(): ...@@ -128,6 +128,7 @@ def build_metadata():
sa.Column('beneficiaries', sa.JSON, nullable=False, server_default='[]'), sa.Column('beneficiaries', sa.JSON, nullable=False, server_default='[]'),
sa.Column('block_num', sa.Integer, nullable=False ), sa.Column('block_num', sa.Integer, nullable=False ),
sa.Column('block_num_created', sa.Integer, nullable=False ), sa.Column('block_num_created', sa.Integer, nullable=False ),
sa.Column('tags_ids', sa.ARRAY(sa.Integer), nullable=True ),
sa.ForeignKeyConstraint(['author_id'], ['hive_accounts.id'], name='hive_posts_fk1'), sa.ForeignKeyConstraint(['author_id'], ['hive_accounts.id'], name='hive_posts_fk1'),
sa.ForeignKeyConstraint(['root_id'], ['hive_posts.id'], name='hive_posts_fk2'), sa.ForeignKeyConstraint(['root_id'], ['hive_posts.id'], name='hive_posts_fk2'),
...@@ -152,8 +153,9 @@ def build_metadata(): ...@@ -152,8 +153,9 @@ def build_metadata():
sa.Index('hive_posts_cashout_time_id_idx', 'cashout_time', 'id'), sa.Index('hive_posts_cashout_time_id_idx', 'cashout_time', 'id'),
sa.Index('hive_posts_updated_at_idx', sa.text('updated_at DESC')), sa.Index('hive_posts_updated_at_idx', sa.text('updated_at DESC')),
sa.Index('hive_posts_payout_plus_pending_payout_id_idx', sa.text('(payout+pending_payout), id, is_paidout'), postgresql_where=sql_text("counter_deleted = 0 AND NOT is_paidout")), sa.Index('hive_posts_payout_plus_pending_payout_id_idx', sa.text('(payout+pending_payout), id, is_paidout'), postgresql_where=sql_text("counter_deleted = 0 AND NOT is_paidout")),
sa.Index('hive_posts_category_id_payout_plus_pending_payout_depth_idx', sa.text('category_id, (payout+pending_payout), depth'), postgresql_where=sql_text("NOT is_paidout AND counter_deleted = 0")) sa.Index('hive_posts_category_id_payout_plus_pending_payout_depth_idx', sa.text('category_id, (payout+pending_payout), depth'), postgresql_where=sql_text("NOT is_paidout AND counter_deleted = 0")),
) sa.Index('hive_posts_tags_ids_idx', 'tags_ids', postgresql_using="gin", postgresql_ops={'tags_ids': 'gin__int_ops'})
)
sa.Table( sa.Table(
'hive_post_data', metadata, 'hive_post_data', metadata,
...@@ -215,18 +217,6 @@ def build_metadata(): ...@@ -215,18 +217,6 @@ def build_metadata():
sa.UniqueConstraint('tag', name='hive_tag_data_ux1') sa.UniqueConstraint('tag', name='hive_tag_data_ux1')
) )
sa.Table(
'hive_post_tags', metadata,
sa.Column('post_id', sa.Integer, nullable=False),
sa.Column('tag_id', sa.Integer, nullable=False),
sa.PrimaryKeyConstraint('post_id', 'tag_id', name='hive_post_tags_pk1'),
sa.ForeignKeyConstraint(['post_id'], ['hive_posts.id'], name='hive_post_tags_fk1'),
sa.ForeignKeyConstraint(['tag_id'], ['hive_tag_data.id'], name='hive_post_tags_fk2'),
sa.Index('hive_post_tags_tag_id_idx', 'tag_id')
)
sa.Table( sa.Table(
'hive_follows', metadata, 'hive_follows', metadata,
sa.Column('id', sa.Integer, primary_key=True ), sa.Column('id', sa.Integer, primary_key=True ),
...@@ -457,6 +447,9 @@ def create_fk(db): ...@@ -457,6 +447,9 @@ def create_fk(db):
def setup(db): def setup(db):
"""Creates all tables and seed data""" """Creates all tables and seed data"""
sql = """SELECT * FROM pg_extension WHERE extname='intarray'"""
assert db.query_row( sql ), "The database requires created 'intarray' extension"
# initialize schema # initialize schema
build_metadata().create_all(db.engine()) build_metadata().create_all(db.engine())
...@@ -615,8 +608,8 @@ def setup(db): ...@@ -615,8 +608,8 @@ def setup(db):
dir_path = dirname(realpath(__file__)) dir_path = dirname(realpath(__file__))
for script in sql_scripts: for script in sql_scripts:
execute_sql_script(db.query_no_return, "{}/sql_scripts/{}".format(dir_path, script)) execute_sql_script(db.query_no_return, "{}/sql_scripts/{}".format(dir_path, script))
...@@ -629,7 +622,6 @@ def reset_autovac(db): ...@@ -629,7 +622,6 @@ def reset_autovac(db):
autovac_config = { # vacuum analyze autovac_config = { # vacuum analyze
'hive_accounts': (50000, 100000), 'hive_accounts': (50000, 100000),
'hive_posts': (2500, 10000), 'hive_posts': (2500, 10000),
'hive_post_tags': (5000, 10000),
'hive_follows': (5000, 5000), 'hive_follows': (5000, 5000),
'hive_feed_cache': (5000, 5000), 'hive_feed_cache': (5000, 5000),
'hive_blocks': (5000, 25000), 'hive_blocks': (5000, 25000),
...@@ -665,7 +657,6 @@ def set_logged_table_attribute(db, logged): ...@@ -665,7 +657,6 @@ def set_logged_table_attribute(db, logged):
logged_config = [ logged_config = [
'hive_accounts', 'hive_accounts',
'hive_permlink_data', 'hive_permlink_data',
'hive_post_tags',
'hive_posts', 'hive_posts',
'hive_post_data', 'hive_post_data',
'hive_votes', 'hive_votes',
......
...@@ -5,10 +5,10 @@ AS ...@@ -5,10 +5,10 @@ AS
$function$ $function$
DECLARE DECLARE
__post_id INT; __post_id INT;
__hive_tag INT; __hive_tag INT[];
BEGIN BEGIN
__post_id = find_comment_id( _author, _permlink, True ); __post_id = find_comment_id( _author, _permlink, True );
__hive_tag = find_tag_id( _tag, True ); __hive_tag = ARRAY_APPEND( __hive_tag, find_tag_id( _tag, True ));
RETURN QUERY SELECT RETURN QUERY SELECT
hp.id, hp.id,
hp.author, hp.author,
...@@ -52,10 +52,9 @@ BEGIN ...@@ -52,10 +52,9 @@ BEGIN
SELECT SELECT
hp1.id hp1.id
FROM FROM
hive_post_tags hpt hive_posts hp1
JOIN hive_posts hp1 ON hp1.id = hpt.post_id
JOIN hive_accounts_view ha ON hp1.author_id = ha.id JOIN hive_accounts_view ha ON hp1.author_id = ha.id
WHERE hpt.tag_id = __hive_tag AND hp1.counter_deleted = 0 AND hp1.depth = 0 AND NOT ha.is_grayed AND ( __post_id = 0 OR hp1.id < __post_id ) WHERE hp1.tags_ids @> __hive_tag AND hp1.counter_deleted = 0 AND hp1.depth = 0 AND NOT ha.is_grayed AND ( __post_id = 0 OR hp1.id < __post_id )
--ORDER BY hp1.id + 0 DESC -- this workaround helped the query to better choose indexes, but after some time it started to significally slow down --ORDER BY hp1.id + 0 DESC -- this workaround helped the query to better choose indexes, but after some time it started to significally slow down
ORDER BY hp1.id DESC ORDER BY hp1.id DESC
LIMIT _limit LIMIT _limit
...@@ -76,13 +75,13 @@ $function$ ...@@ -76,13 +75,13 @@ $function$
DECLARE DECLARE
__post_id INT; __post_id INT;
__hot_limit FLOAT; __hot_limit FLOAT;
__hive_tag INT; __hive_tag INT[];
BEGIN BEGIN
__post_id = find_comment_id( _author, _permlink, True ); __post_id = find_comment_id( _author, _permlink, True );
IF __post_id <> 0 THEN IF __post_id <> 0 THEN
SELECT hp.sc_hot INTO __hot_limit FROM hive_posts hp WHERE hp.id = __post_id; SELECT hp.sc_hot INTO __hot_limit FROM hive_posts hp WHERE hp.id = __post_id;
END IF; END IF;
__hive_tag = find_tag_id( _tag, True ); __hive_tag = ARRAY_APPEND( __hive_tag, find_tag_id( _tag, True ));
RETURN QUERY SELECT RETURN QUERY SELECT
hp.id, hp.id,
hp.author, hp.author,
...@@ -127,9 +126,8 @@ BEGIN ...@@ -127,9 +126,8 @@ BEGIN
hp1.id hp1.id
, hp1.sc_hot as hot , hp1.sc_hot as hot
FROM FROM
hive_post_tags hpt hive_posts hp1
JOIN hive_posts hp1 ON hp1.id = hpt.post_id WHERE hp1.tags_ids @> __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND hp1.depth = 0
WHERE hpt.tag_id = __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND hp1.depth = 0
AND ( __post_id = 0 OR hp1.sc_hot < __hot_limit OR ( hp1.sc_hot = __hot_limit AND hp1.id < __post_id ) ) AND ( __post_id = 0 OR hp1.sc_hot < __hot_limit OR ( hp1.sc_hot = __hot_limit AND hp1.id < __post_id ) )
ORDER BY hp1.sc_hot DESC, hp1.id DESC ORDER BY hp1.sc_hot DESC, hp1.id DESC
LIMIT _limit LIMIT _limit
...@@ -150,13 +148,13 @@ $function$ ...@@ -150,13 +148,13 @@ $function$
DECLARE DECLARE
__post_id INT; __post_id INT;
__payout_limit hive_posts.payout%TYPE; __payout_limit hive_posts.payout%TYPE;
__hive_tag INT; __hive_tag INT[];
BEGIN BEGIN
__post_id = find_comment_id( _author, _permlink, True ); __post_id = find_comment_id( _author, _permlink, True );
IF __post_id <> 0 THEN IF __post_id <> 0 THEN
SELECT ( hp.payout + hp.pending_payout ) INTO __payout_limit FROM hive_posts hp WHERE hp.id = __post_id; SELECT ( hp.payout + hp.pending_payout ) INTO __payout_limit FROM hive_posts hp WHERE hp.id = __post_id;
END IF; END IF;
__hive_tag = find_tag_id( _tag, True ); __hive_tag = ARRAY_APPEND( __hive_tag, find_tag_id( _tag, True ) );
RETURN QUERY SELECT RETURN QUERY SELECT
hp.id, hp.id,
hp.author, hp.author,
...@@ -202,9 +200,8 @@ BEGIN ...@@ -202,9 +200,8 @@ BEGIN
, ( hp1.payout + hp1.pending_payout ) as all_payout , ( hp1.payout + hp1.pending_payout ) as all_payout
FROM FROM
hive_posts hp1 hive_posts hp1
JOIN hive_post_tags hpt ON hp1.id = hpt.post_id
JOIN hive_accounts_view ha ON hp1.author_id = ha.id JOIN hive_accounts_view ha ON hp1.author_id = ha.id
WHERE hpt.tag_id = __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND ha.is_grayed AND ( hp1.payout + hp1.pending_payout ) > 0 WHERE hp1.tags_ids @> __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND ha.is_grayed AND ( hp1.payout + hp1.pending_payout ) > 0
AND ( __post_id = 0 OR ( hp1.payout + hp1.pending_payout ) < __payout_limit OR ( ( hp1.payout + hp1.pending_payout ) = __payout_limit AND hp1.id < __post_id ) ) AND ( __post_id = 0 OR ( hp1.payout + hp1.pending_payout ) < __payout_limit OR ( ( hp1.payout + hp1.pending_payout ) = __payout_limit AND hp1.id < __post_id ) )
ORDER BY ( hp1.payout + hp1.pending_payout ) DESC, hp1.id DESC ORDER BY ( hp1.payout + hp1.pending_payout ) DESC, hp1.id DESC
LIMIT _limit LIMIT _limit
...@@ -374,13 +371,13 @@ $function$ ...@@ -374,13 +371,13 @@ $function$
DECLARE DECLARE
__post_id INT; __post_id INT;
__promoted_limit hive_posts.promoted%TYPE; __promoted_limit hive_posts.promoted%TYPE;
__hive_tag INT; __hive_tag INT[];
BEGIN BEGIN
__post_id = find_comment_id( _author, _permlink, True ); __post_id = find_comment_id( _author, _permlink, True );
IF __post_id <> 0 THEN IF __post_id <> 0 THEN
SELECT hp.promoted INTO __promoted_limit FROM hive_posts hp WHERE hp.id = __post_id; SELECT hp.promoted INTO __promoted_limit FROM hive_posts hp WHERE hp.id = __post_id;
END IF; END IF;
__hive_tag = find_tag_id( _tag, True ); __hive_tag = ARRAY_APPEND( __hive_tag, find_tag_id( _tag, True ) );
RETURN QUERY SELECT RETURN QUERY SELECT
hp.id, hp.id,
hp.author, hp.author,
...@@ -425,9 +422,8 @@ BEGIN ...@@ -425,9 +422,8 @@ BEGIN
hp1.id hp1.id
, hp1.promoted as promoted , hp1.promoted as promoted
FROM FROM
hive_post_tags hpt hive_posts hp1
JOIN hive_posts hp1 ON hp1.id = hpt.post_id WHERE hp1.tags_ids @> __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND hp1.promoted > 0
WHERE hpt.tag_id = __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND hp1.promoted > 0
AND ( __post_id = 0 OR hp1.promoted < __promoted_limit OR ( hp1.promoted = __promoted_limit AND hp1.id < __post_id ) ) AND ( __post_id = 0 OR hp1.promoted < __promoted_limit OR ( hp1.promoted = __promoted_limit AND hp1.id < __post_id ) )
ORDER BY hp1.promoted DESC, hp1.id DESC ORDER BY hp1.promoted DESC, hp1.id DESC
LIMIT _limit LIMIT _limit
...@@ -448,13 +444,13 @@ $function$ ...@@ -448,13 +444,13 @@ $function$
DECLARE DECLARE
__post_id INT; __post_id INT;
__trending_limit FLOAT; __trending_limit FLOAT;
__hive_tag INT; __hive_tag INT[];
BEGIN BEGIN
__post_id = find_comment_id( _author, _permlink, True ); __post_id = find_comment_id( _author, _permlink, True );
IF __post_id <> 0 THEN IF __post_id <> 0 THEN
SELECT hp.sc_trend INTO __trending_limit FROM hive_posts hp WHERE hp.id = __post_id; SELECT hp.sc_trend INTO __trending_limit FROM hive_posts hp WHERE hp.id = __post_id;
END IF; END IF;
__hive_tag = find_tag_id( _tag, True ); __hive_tag = ARRAY_APPEND( __hive_tag, find_tag_id( _tag, True ));
RETURN QUERY SELECT RETURN QUERY SELECT
hp.id, hp.id,
hp.author, hp.author,
...@@ -499,9 +495,8 @@ BEGIN ...@@ -499,9 +495,8 @@ BEGIN
hp1.id hp1.id
, hp1.sc_trend as trend , hp1.sc_trend as trend
FROM FROM
hive_post_tags hpt hive_posts hp1
JOIN hive_posts hp1 ON hp1.id = hpt.post_id WHERE hp1.tags_ids @> __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND hp1.depth = 0
WHERE hpt.tag_id = __hive_tag AND hp1.counter_deleted = 0 AND NOT hp1.is_paidout AND hp1.depth = 0
AND ( __post_id = 0 OR hp1.sc_trend < __trending_limit OR ( hp1.sc_trend = __trending_limit AND hp1.id < __post_id ) ) AND ( __post_id = 0 OR hp1.sc_trend < __trending_limit OR ( hp1.sc_trend = __trending_limit AND hp1.id < __post_id ) )
ORDER BY hp1.sc_trend DESC, hp1.id DESC ORDER BY hp1.sc_trend DESC, hp1.id DESC
LIMIT _limit LIMIT _limit
......
...@@ -138,3 +138,29 @@ BEGIN ...@@ -138,3 +138,29 @@ BEGIN
END END
$function$ $function$
; ;
DROP FUNCTION IF EXISTS add_tags;
CREATE FUNCTION add_tags( in _post_id hive_posts.id%TYPE, in _tags VARCHAR[] )
RETURNS void
LANGUAGE 'plpgsql'
VOLATILE
AS
$function$
DECLARE
__tags_ids INTEGER[];
BEGIN
WITH tags_ids(id) AS
(
INSERT INTO
hive_tag_data AS htd(tag)
SELECT UNNEST( _tags )
ON CONFLICT("tag") DO UPDATE SET tag=EXCLUDED.tag --trick to always return id
RETURNING htd.id
)
SELECT ARRAY_AGG( id ) INTO __tags_ids FROM tags_ids;
UPDATE hive_posts hp
SET tags_ids = __tags_ids
WHERE hp.id = _post_id;
END
$function$
do $$
BEGIN
ASSERT EXISTS (SELECT * FROM pg_extension WHERE extname='intarray'), 'The database requires created "intarray" extension';
END$$;
CREATE TABLE IF NOT EXISTS hive_db_patch_level CREATE TABLE IF NOT EXISTS hive_db_patch_level
( (
level SERIAL NOT NULL PRIMARY KEY, level SERIAL NOT NULL PRIMARY KEY,
...@@ -213,6 +218,15 @@ IF NOT EXISTS (SELECT data_type FROM information_schema.columns ...@@ -213,6 +218,15 @@ IF NOT EXISTS (SELECT data_type FROM information_schema.columns
ELSE ELSE
RAISE NOTICE 'SKIPPING hive_posts upgrade - adding total_votes and net_votes columns'; RAISE NOTICE 'SKIPPING hive_posts upgrade - adding total_votes and net_votes columns';
END IF; END IF;
IF NOT EXISTS(SELECT data_type FROM information_schema.columns
WHERE table_name = 'hive_posts' AND column_name = 'tags_ids') THEN
ALTER TABLE ONLY hive_posts
ADD COLUMN tags_ids INTEGER[];
ELSE
RAISE NOTICE 'SKIPPING hive_posts upgrade - adding a tags_ids column';
END IF;
END END
$BODY$ $BODY$
...@@ -383,3 +397,6 @@ DROP INDEX IF EXISTS hive_posts_promoted_idx; ...@@ -383,3 +397,6 @@ DROP INDEX IF EXISTS hive_posts_promoted_idx;
CREATE INDEX IF NOT EXISTS hive_posts_promoted_id_idx ON hive_posts (promoted, id) CREATE INDEX IF NOT EXISTS hive_posts_promoted_id_idx ON hive_posts (promoted, id)
WHERE NOT is_paidout AND counter_deleted = 0 WHERE NOT is_paidout AND counter_deleted = 0
; ;
CREATE INDEX IF NOT EXISTS hive_posts_tags_ids_idx ON hive_posts USING gin(tags_ids gin__int_ops);
...@@ -14,7 +14,6 @@ from hive.indexer.payments import Payments ...@@ -14,7 +14,6 @@ from hive.indexer.payments import Payments
from hive.indexer.follow import Follow from hive.indexer.follow import Follow
from hive.indexer.votes import Votes from hive.indexer.votes import Votes
from hive.indexer.post_data_cache import PostDataCache from hive.indexer.post_data_cache import PostDataCache
from hive.indexer.tags import Tags
from hive.indexer.reputations import Reputations from hive.indexer.reputations import Reputations
from hive.indexer.reblog import Reblog from hive.indexer.reblog import Reblog
from hive.indexer.notify import Notify from hive.indexer.notify import Notify
...@@ -49,7 +48,6 @@ class Blocks: ...@@ -49,7 +48,6 @@ class Blocks:
('PostDataCache', PostDataCache.flush, PostDataCache), ('PostDataCache', PostDataCache.flush, PostDataCache),
('Reputations', Reputations.flush, Reputations), ('Reputations', Reputations.flush, Reputations),
('Votes', Votes.flush, Votes), ('Votes', Votes.flush, Votes),
('Tags', Tags.flush, Tags),
('Follow', Follow.flush, Follow), ('Follow', Follow.flush, Follow),
('Reblog', Reblog.flush, Reblog), ('Reblog', Reblog.flush, Reblog),
('Notify', Notify.flush, Notify), ('Notify', Notify.flush, Notify),
...@@ -70,7 +68,6 @@ class Blocks: ...@@ -70,7 +68,6 @@ class Blocks:
PostDataCache.setup_own_db_access(sharedDbAdapter) PostDataCache.setup_own_db_access(sharedDbAdapter)
Reputations.setup_own_db_access(sharedDbAdapter) Reputations.setup_own_db_access(sharedDbAdapter)
Votes.setup_own_db_access(sharedDbAdapter) Votes.setup_own_db_access(sharedDbAdapter)
Tags.setup_own_db_access(sharedDbAdapter)
Follow.setup_own_db_access(sharedDbAdapter) Follow.setup_own_db_access(sharedDbAdapter)
Posts.setup_own_db_access(sharedDbAdapter) Posts.setup_own_db_access(sharedDbAdapter)
Reblog.setup_own_db_access(sharedDbAdapter) Reblog.setup_own_db_access(sharedDbAdapter)
...@@ -413,7 +410,6 @@ class Blocks: ...@@ -413,7 +410,6 @@ class Blocks:
# remove posts: core, tags, cache entries # remove posts: core, tags, cache entries
if post_ids: if post_ids:
DB.query("DELETE FROM hive_post_tags WHERE post_id IN :ids", ids=post_ids)
DB.query("DELETE FROM hive_posts WHERE id IN :ids", ids=post_ids) DB.query("DELETE FROM hive_posts WHERE id IN :ids", ids=post_ids)
DB.query("DELETE FROM hive_post_data WHERE id IN :ids", ids=post_ids) DB.query("DELETE FROM hive_post_data WHERE id IN :ids", ids=post_ids)
......
...@@ -14,7 +14,6 @@ from hive.indexer.feed_cache import FeedCache ...@@ -14,7 +14,6 @@ from hive.indexer.feed_cache import FeedCache
from hive.indexer.community import Community, START_DATE from hive.indexer.community import Community, START_DATE
from hive.indexer.notify import Notify from hive.indexer.notify import Notify
from hive.indexer.post_data_cache import PostDataCache from hive.indexer.post_data_cache import PostDataCache
from hive.indexer.tags import Tags
from hive.indexer.db_adapter_holder import DbAdapterHolder from hive.indexer.db_adapter_holder import DbAdapterHolder
from hive.utils.misc import chunks from hive.utils.misc import chunks
...@@ -152,8 +151,8 @@ class Posts(DbAdapterHolder): ...@@ -152,8 +151,8 @@ class Posts(DbAdapterHolder):
from funcy.seqs import distinct from funcy.seqs import distinct
tags = list(distinct(tags))[:5] tags = list(distinct(tags))[:5]
for tag in tags: sql = """SELECT add_tags( (:post_id)::INTEGER, (:tags)::VARCHAR[] )"""
Tags.add_tag(result['id'], tag) DB.query_row( sql, post_id = result['id'], tags=tags );
if not DbState.is_initial_sync(): if not DbState.is_initial_sync():
if error: if error:
......
import logging
from hive.indexer.db_adapter_holder import DbAdapterHolder
log = logging.getLogger(__name__)
from hive.utils.normalize import escape_characters
class Tags(DbAdapterHolder):
""" Tags cache """
_tags = []
@classmethod
def add_tag(cls, tid, tag):
""" Add tag to cache """
cls._tags.append((tid, tag))
@classmethod
def flush(cls):
""" Flush tags to table """
if cls._tags:
cls.beginTx()
limit = 1000
sql = """
INSERT INTO
hive_tag_data (tag)
VALUES {}
ON CONFLICT DO NOTHING
"""
values = []
for tag in cls._tags:
values.append("({})".format(escape_characters(tag[1])))
if len(values) >= limit:
tag_query = str(sql)
cls.db.query(tag_query.format(','.join(values)))
values.clear()
if len(values) > 0:
tag_query = str(sql)
cls.db.query(tag_query.format(','.join(values)))
values.clear()
sql = """
INSERT INTO
hive_post_tags (post_id, tag_id)
SELECT
data_source.post_id, data_source.tag_id
FROM
(
SELECT
post_id, htd.id
FROM
(
VALUES
{}
) AS T(post_id, tag)
INNER JOIN hive_tag_data htd ON htd.tag = T.tag
) AS data_source(post_id, tag_id)
ON CONFLICT DO NOTHING
"""
values = []
for tag in cls._tags:
values.append("({}, {})".format(tag[0], escape_characters(tag[1])))
if len(values) >= limit:
tag_query = str(sql)
cls.db.query(tag_query.format(','.join(values)))
values.clear()
if len(values) > 0:
tag_query = str(sql)
cls.db.query(tag_query.format(','.join(values)))
values.clear()
cls.commitTx()
n = len(cls._tags)
cls._tags.clear()
return n
Subproject commit e8f2e1810071df5659edb45edf3d59af4b9be48e Subproject commit 1ffd591d38c5e764e8a3910af2d5548d8b28a55b
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment