diff --git a/hive/indexer/tags.py b/hive/indexer/tags.py index f60165cda35aa972fa679bae4958a7d12a879ce9..e63d198e0ac1b9b6088a3ef67cf955dd69d9a165 100644 --- a/hive/indexer/tags.py +++ b/hive/indexer/tags.py @@ -19,27 +19,53 @@ class Tags(object): def flush(cls): """ Flush tags to table """ if cls._tags: + limit = 1000 + sql = """ INSERT INTO hive_tag_data (tag) - VALUES + VALUES {} + ON CONFLICT DO NOTHING """ values = [] for tag in cls._tags: values.append("('{}')".format(escape_characters(tag[1]))) - sql += ",".join(values) - sql += " ON CONFLICT DO NOTHING" - DB.query(sql) + if len(values) >= limit: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() + if len(values) > 0: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() sql = """ INSERT INTO hive_post_tags (post_id, tag_id) - VALUES + SELECT + data_source.post_id, data_source.tag_id + FROM + ( + SELECT + post_id, htd.id + FROM + ( + VALUES + {} + ) AS T(post_id, tag) + INNER JOIN hive_tag_data htd ON htd.tag = T.tag + ) AS data_source(post_id, tag_id) + ON CONFLICT DO NOTHING """ values = [] for tag in cls._tags: - values.append("({}, (SELECT id FROM hive_tag_data WHERE tag='{}'))".format(tag[0], escape_characters(tag[1]))) - sql += ",".join(values) - sql += " ON CONFLICT DO NOTHING" - DB.query(sql) + values.append("({}, '{}')".format(tag[0], escape_characters(tag[1]))) + if len(values) >= limit: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() + if len(values) > 0: + tag_query = str(sql) + DB.query(tag_query.format(','.join(values))) + values.clear() cls._tags.clear() diff --git a/hive/server/bridge_api/cursor.py b/hive/server/bridge_api/cursor.py index c65baf4163fa499774cef44dfad5e3b94dc7db3b..6a0fc86bb4150aee30bb67d04890c5d02ce6d9e9 100644 --- a/hive/server/bridge_api/cursor.py +++ b/hive/server/bridge_api/cursor.py @@ -180,7 +180,14 @@ async def pids_by_category(db, tag, sort, last_id, limit): if sort in ['payout', 'payout_comments']: where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)') else: - sql = "SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag)" + sql = """ + SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id + WHERE htd.tag = :tag + """ where.append("id IN (%s)" % sql) if last_id: diff --git a/hive/server/bridge_api/methods.py b/hive/server/bridge_api/methods.py index d473ff1cb0f8dd3c5e8ba73df9bf5f7fca47706f..8f10fd0fbec09cf691ffa27cf9ac7ed96915fb87 100644 --- a/hive/server/bridge_api/methods.py +++ b/hive/server/bridge_api/methods.py @@ -191,7 +191,15 @@ async def get_ranked_posts(context, sort, start_author='', start_permlink='', if sort in ['payout', 'payout_comments']: sql = sql % """ AND hp.category = :tag """ else: - sql = sql % """ AND hp.post_id IN (SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag))""" + sql = sql % """ AND hp.post_id IN + (SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id + WHERE htd.tag = :tag + ) + """ if not observer: observer = '' diff --git a/hive/server/condenser_api/cursor.py b/hive/server/condenser_api/cursor.py index 980fbc1c8f10857f80efc7eb2aad7e8ac390cad5..006a47d4ef5a89071ea0efefc398d8f4dbe3e67b 100644 --- a/hive/server/condenser_api/cursor.py +++ b/hive/server/condenser_api/cursor.py @@ -175,7 +175,14 @@ async def pids_by_query(db, sort, start_author, start_permlink, limit, tag): where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)') if sort in ('trending', 'hot'): where.append('depth = 0') - sql = "SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag)" + sql = """ + SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id + WHERE htd.tag = :tag + """ where.append("id IN (%s)" % sql) start_id = None diff --git a/hive/server/condenser_api/methods.py b/hive/server/condenser_api/methods.py index 00afdeba603399fe4a9bbbd8ee7681ff8483b2ab..a6a0ba05d2b15c87fc828e4bedfdb5ed48f4d60e 100644 --- a/hive/server/condenser_api/methods.py +++ b/hive/server/condenser_api/methods.py @@ -251,7 +251,14 @@ async def get_discussions_by(discussion_type, context, start_author: str = '', if tag[:5] == 'hive-': sql = sql % """ %s AND hp.category = :tag """ else: - sql = sql % """ %s AND hp.post_id IN (SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag)) """ + sql = sql % """ %s AND hp.post_id IN + (SELECT + post_id + FROM + hive_post_tags hpt + INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id + WHERE htd.tag = :tag + ) """ if start_author and start_permlink: if discussion_type == 'trending':