diff --git a/hive/db/schema.py b/hive/db/schema.py index 4321c72d01302f3b32764a6aa099b0b3b9f12c90..400748ddf3e48c84ec6e24a6d50c8e5c3603730d 100644 --- a/hive/db/schema.py +++ b/hive/db/schema.py @@ -147,8 +147,8 @@ def build_metadata(): 'hive_post_data', metadata, sa.Column('id', sa.Integer, primary_key=True, autoincrement=False), sa.Column('title', VARCHAR(512), nullable=False, server_default=''), - sa.Column('preview', VARCHAR(1024), nullable=False, server_default=''), - sa.Column('img_url', VARCHAR(1024), nullable=False, server_default=''), + sa.Column('preview', VARCHAR(1024), nullable=False, server_default=''), # first 1k of 'body' + sa.Column('img_url', VARCHAR(1024), nullable=False, server_default=''), # first 'image' from 'json' sa.Column('body', TEXT, nullable=False, server_default=''), sa.Column('json', TEXT, nullable=False, server_default='') ) diff --git a/hive/indexer/blocks.py b/hive/indexer/blocks.py index 67e2a6e8a0dd815a0b9819c977c9a0d21e14bc20..df214f0ea4b7b04feaf6d10c065397db49c78d07 100644 --- a/hive/indexer/blocks.py +++ b/hive/indexer/blocks.py @@ -2,7 +2,6 @@ from hive.indexer.reblog import Reblog import logging -import json from hive.db.adapter import Db diff --git a/hive/indexer/post_data_cache.py b/hive/indexer/post_data_cache.py index 5cdcc3a4b00f40de89f7087486b19a06958caa13..3b90035b0771670ed3dac0476774690b27eac42f 100644 --- a/hive/indexer/post_data_cache.py +++ b/hive/indexer/post_data_cache.py @@ -15,9 +15,16 @@ class PostDataCache(object): return pid in cls._data @classmethod - def add_data(cls, pid, post_data, print_query = False): + def add_data(cls, pid, post_data, is_new_post): """ Add data to cache """ - cls._data[pid] = post_data + if not cls.is_cached(pid): + cls._data[pid] = post_data + cls._data[pid]['is_new_post'] = is_new_post + else: + assert not is_new_post + for k, data in post_data.items(): + if data is not None: + cls._data[pid][k] = data @classmethod def get_post_body(cls, pid): @@ -36,37 +43,53 @@ class PostDataCache(object): def flush(cls, print_query = False): """ Flush data from cache to db """ if cls._data: - sql = """ - INSERT INTO - hive_post_data (id, title, preview, img_url, body, json) - VALUES - """ - values = [] + values_insert = [] + values_update = [] for k, data in cls._data.items(): - title = "''" if not data['title'] else "{}".format(escape_characters(data['title'])) - preview = "''" if not data['preview'] else "{}".format(escape_characters(data['preview'])) - img_url = "''" if not data['img_url'] else "{}".format(escape_characters(data['img_url'])) - body = "''" if not data['body'] else "{}".format(escape_characters(data['body'])) - json = "'{}'" if not data['json'] else "{}".format(escape_characters(data['json'])) - values.append("({},{},{},{},{},{})".format(k, title, preview, img_url, body, json)) - sql += ','.join(values) - sql += """ - ON CONFLICT (id) - DO - UPDATE SET - title = EXCLUDED.title, - preview = EXCLUDED.preview, - img_url = EXCLUDED.img_url, - body = EXCLUDED.body, - json = EXCLUDED.json - WHERE - hive_post_data.id = EXCLUDED.id - """ + title = 'NULL' if data['title'] is None else "{}".format(escape_characters(data['title'])) + body = 'NULL' if data['body'] is None else "{}".format(escape_characters(data['body'])) + preview = 'NULL' if data['body'] is None else "{}".format(escape_characters(data['body'][0:1024])) + json = 'NULL' if data['json'] is None else "{}".format(escape_characters(data['json'])) + img_url = 'NULL' if data['img_url'] is None else "{}".format(escape_characters(data['img_url'])) + value = "({},{},{},{},{},{})".format(k, title, preview, img_url, body, json) + if data['is_new_post']: + values_insert.append(value) + else: + values_update.append(value) + + if values_insert: + sql = """ + INSERT INTO + hive_post_data (id, title, preview, img_url, body, json) + VALUES + """ + sql += ','.join(values_insert) + if print_query: + log.info("Executing query:\n{}".format(sql)) + DB.query(sql) - if(print_query): - log.info("Executing query:\n{}".format(sql)) + if values_update: + sql = """ + UPDATE hive_post_data AS hpd SET + title = COALESCE( data_source.title, hpd.title ), + preview = COALESCE( data_source.preview, hpd.preview ), + img_url = COALESCE( data_source.img_url, hpd.img_url ), + body = COALESCE( data_source.body, hpd.body ), + json = COALESCE( data_source.json, hpd.json ) + FROM + ( SELECT * FROM + ( VALUES + """ + sql += ','.join(values_update) + sql += """ + ) AS T(id, title, preview, img_url, body, json) + ) AS data_source + WHERE hpd.id = data_source.id + """ + if print_query: + log.info("Executing query:\n{}".format(sql)) + DB.query(sql) - DB.query(sql) n = len(cls._data.keys()) cls._data.clear() return n diff --git a/hive/indexer/posts.py b/hive/indexer/posts.py index ba63521efa0f968534f39981e33047dfdf550220..f2560d54a90fb4f28cf31fdd4bd5c23e5e755082 100644 --- a/hive/indexer/posts.py +++ b/hive/indexer/posts.py @@ -3,7 +3,7 @@ import logging import collections -from json import dumps, loads +from ujson import dumps, loads from diff_match_patch import diff_match_patch @@ -16,7 +16,7 @@ from hive.indexer.community import Community, START_DATE from hive.indexer.notify import Notify from hive.indexer.post_data_cache import PostDataCache from hive.indexer.tags import Tags -from hive.utils.normalize import sbd_amount, legacy_amount, asset_to_hbd_hive +from hive.utils.normalize import sbd_amount, legacy_amount, asset_to_hbd_hive, safe_img_url log = logging.getLogger(__name__) DB = Db.instance() @@ -103,23 +103,6 @@ class Posts: cls._set_id(op['author']+'/'+op['permlink'], result['id']) - if result['is_new_post']: - # add content data to hive_post_data - post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "", - img_url=op['img_url'] if 'img_url' in op else "", body=op['body'], - json=op['json_metadata'] if op['json_metadata'] else '{}') - else: - # edit case. Now we need to (potentially) apply patch to the post body. - new_body = cls._merge_post_body(id=result['id'], new_body_def=op['body']) - post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "", - img_url=op['img_url'] if 'img_url' in op else "", body=new_body, - json=op['json_metadata'] if op['json_metadata'] else '{}') - -# log.info("Adding author: {} permlink: {}".format(op['author'], op['permlink'])) - - printQuery = False # op['author'] == 'xeroc' and op['permlink'] == 're-piston-20160818t080811' - PostDataCache.add_data(result['id'], post_data, printQuery) - md = {} # At least one case where jsonMetadata was double-encoded: condenser#895 # jsonMetadata = JSON.parse(jsonMetadata); @@ -130,6 +113,34 @@ class Posts: except Exception: pass + img_url = None + if 'image' in md: + img_url = md['image'] + if isinstance(img_url, list) and img_url: + img_url = img_url[0] + if img_url: + img_url = safe_img_url(img_url) + + is_new_post = result['is_new_post'] + if is_new_post: + # add content data to hive_post_data + post_data = dict(title=op['title'] if op['title'] else '', + img_url=img_url if img_url else '', + body=op['body'] if op['body'] else '', + json=op['json_metadata'] if op['json_metadata'] else '') + else: + # edit case. Now we need to (potentially) apply patch to the post body. + # empty new body means no body edit, not clear (same with other data) + new_body = cls._merge_post_body(id=result['id'], new_body_def=op['body']) if op['body'] else None + new_title = op['title'] if op['title'] else None + new_json = op['json_metadata'] if op['json_metadata'] else None + # when 'new_json' is not empty, 'img_url' should be overwritten even if it is itself empty + new_img = img_url if img_url else '' if new_json else None + post_data = dict(title=new_title, img_url=new_img, body=new_body, json=new_json) + +# log.info("Adding author: {} permlink: {}".format(op['author'], op['permlink'])) + PostDataCache.add_data(result['id'], post_data, is_new_post) + if not result['depth']: tags = [result['post_category']] if md and 'tags' in md and isinstance(md['tags'], list): diff --git a/hive/server/bridge_api/objects.py b/hive/server/bridge_api/objects.py index ae9969d144a457011da270e74f3af7cb3e4d0f29..c99adcfd9d84cc946bbf230db388bab4dbfe8393 100644 --- a/hive/server/bridge_api/objects.py +++ b/hive/server/bridge_api/objects.py @@ -231,7 +231,10 @@ def _bridge_post_object(row, truncate_body=0): post['title'] = row['title'] post['body'] = row['body'][0:truncate_body] if truncate_body else row['body'] - post['json_metadata'] = json.loads(row['json']) + try: + post['json_metadata'] = json.loads(row['json']) + except Exception: + post['json_metadata'] = {} post['created'] = json_date(row['created_at']) post['updated'] = json_date(row['updated_at']) diff --git a/tests/tests_api b/tests/tests_api index 263b3b14421f52394e344bdfb6bf0c345c5dad5a..9f6058b31adec6378ead1b15ae6c1e7bb75823f7 160000 --- a/tests/tests_api +++ b/tests/tests_api @@ -1 +1 @@ -Subproject commit 263b3b14421f52394e344bdfb6bf0c345c5dad5a +Subproject commit 9f6058b31adec6378ead1b15ae6c1e7bb75823f7