From 76ef6deaae9163c932f18734273a8107cd1a4b05 Mon Sep 17 00:00:00 2001
From: Dariusz Kedzierski <dkedzierski@syncad.com>
Date: Wed, 24 Jun 2020 19:44:40 +0200
Subject: [PATCH] - In tags:   * added query limit to avoid SQL overflow
 errors,   * fixed inefficient insert query, pervious was using selects for
 each row, - In apis:   * subqueries were changed to inner joins

---
 hive/indexer/tags.py                 | 44 ++++++++++++++++++++++------
 hive/server/bridge_api/cursor.py     |  9 +++++-
 hive/server/bridge_api/methods.py    | 10 ++++++-
 hive/server/condenser_api/cursor.py  |  9 +++++-
 hive/server/condenser_api/methods.py |  9 +++++-
 5 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/hive/indexer/tags.py b/hive/indexer/tags.py
index f60165cda..e63d198e0 100644
--- a/hive/indexer/tags.py
+++ b/hive/indexer/tags.py
@@ -19,27 +19,53 @@ class Tags(object):
     def flush(cls):
         """ Flush tags to table """
         if cls._tags:
+            limit = 1000
+
             sql = """
                 INSERT INTO
                     hive_tag_data (tag)
-                VALUES 
+                VALUES {} 
+                ON CONFLICT DO NOTHING
             """
             values = []
             for tag in cls._tags:
                 values.append("('{}')".format(escape_characters(tag[1])))
-            sql += ",".join(values)
-            sql += " ON CONFLICT DO NOTHING"
-            DB.query(sql)
+                if len(values) >= limit:
+                    tag_query = str(sql)
+                    DB.query(tag_query.format(','.join(values)))
+                    values.clear()
+            if len(values) > 0:
+                tag_query = str(sql)
+                DB.query(tag_query.format(','.join(values)))
+                values.clear()
 
             sql = """
                 INSERT INTO
                     hive_post_tags (post_id, tag_id)
-                VALUES 
+                SELECT 
+                    data_source.post_id, data_source.tag_id
+                FROM
+                (
+                    SELECT 
+                        post_id, htd.id
+                    FROM
+                    (
+                        VALUES 
+                            {}
+                    ) AS T(post_id, tag)
+                    INNER JOIN hive_tag_data htd ON htd.tag = T.tag
+                ) AS data_source(post_id, tag_id)
+                ON CONFLICT DO NOTHING
             """
             values = []
             for tag in cls._tags:
-                values.append("({}, (SELECT id FROM hive_tag_data WHERE tag='{}'))".format(tag[0], escape_characters(tag[1])))
-            sql += ",".join(values)
-            sql += " ON CONFLICT DO NOTHING"
-            DB.query(sql)
+                values.append("({}, '{}')".format(tag[0], escape_characters(tag[1])))
+                if len(values) >= limit:
+                    tag_query = str(sql)
+                    DB.query(tag_query.format(','.join(values)))
+                    values.clear()
+            if len(values) > 0:
+                tag_query = str(sql)
+                DB.query(tag_query.format(','.join(values)))
+                values.clear()
             cls._tags.clear()
diff --git a/hive/server/bridge_api/cursor.py b/hive/server/bridge_api/cursor.py
index c65baf416..6a0fc86bb 100644
--- a/hive/server/bridge_api/cursor.py
+++ b/hive/server/bridge_api/cursor.py
@@ -180,7 +180,14 @@ async def pids_by_category(db, tag, sort, last_id, limit):
         if sort in ['payout', 'payout_comments']:
             where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)')
         else:
-            sql = "SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag)"
+            sql = """
+                SELECT 
+                    post_id 
+                FROM 
+                    hive_post_tags hpt
+                INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id
+                WHERE htd.tag = :tag
+            """
             where.append("id IN (%s)" % sql)
 
     if last_id:
diff --git a/hive/server/bridge_api/methods.py b/hive/server/bridge_api/methods.py
index d473ff1cb..8f10fd0fb 100644
--- a/hive/server/bridge_api/methods.py
+++ b/hive/server/bridge_api/methods.py
@@ -191,7 +191,15 @@ async def get_ranked_posts(context, sort, start_author='', start_permlink='',
         if sort in ['payout', 'payout_comments']:
             sql = sql % """ AND hp.category = :tag """
         else:
-            sql = sql % """ AND hp.post_id IN (SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag))"""
+            sql = sql % """ AND hp.post_id IN 
+                (SELECT 
+                    post_id 
+                FROM 
+                    hive_post_tags hpt
+                INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id
+                WHERE htd.tag = :tag
+                )
+            """
 
     if not observer:
         observer = ''
diff --git a/hive/server/condenser_api/cursor.py b/hive/server/condenser_api/cursor.py
index 980fbc1c8..006a47d4e 100644
--- a/hive/server/condenser_api/cursor.py
+++ b/hive/server/condenser_api/cursor.py
@@ -175,7 +175,14 @@ async def pids_by_query(db, sort, start_author, start_permlink, limit, tag):
                 where.append('category_id = (SELECT id FROM hive_category_data WHERE category = :tag)')
                 if sort in ('trending', 'hot'):
                     where.append('depth = 0')
-            sql = "SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag)"
+            sql = """
+                SELECT 
+                    post_id 
+                FROM 
+                    hive_post_tags hpt
+                INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id
+                WHERE htd.tag = :tag
+            """
             where.append("id IN (%s)" % sql)
 
     start_id = None
diff --git a/hive/server/condenser_api/methods.py b/hive/server/condenser_api/methods.py
index 00afdeba6..a6a0ba05d 100644
--- a/hive/server/condenser_api/methods.py
+++ b/hive/server/condenser_api/methods.py
@@ -251,7 +251,14 @@ async def get_discussions_by(discussion_type, context, start_author: str = '',
         if tag[:5] == 'hive-':
             sql = sql % """ %s AND hp.category = :tag """
         else:
-            sql = sql % """ %s AND hp.post_id IN (SELECT post_id FROM hive_post_tags WHERE tag_id = (SELECT id FROM hive_tag_data WHERE tag = :tag)) """
+            sql = sql % """ %s AND hp.post_id IN 
+                (SELECT 
+                    post_id 
+                FROM 
+                    hive_post_tags hpt
+                INNER JOIN hive_tag_data htd ON hpt.tag_id=htp.id
+                WHERE htd.tag = :tag
+            ) """
 
     if start_author and start_permlink:
         if discussion_type == 'trending':
-- 
GitLab