Skip to content
Snippets Groups Projects
Commit a7d14ad6 authored by Andrzej Lisak's avatar Andrzej Lisak
Browse files

[ABW]: Fixed filling of preview and img_url in hive_posts_data

Fixed edit case with empty title, body or json_metadata
Fixed use of json - now everywhere ujson is used
In case json_metadata in comment_operation would not actually be json, HiveMind no longer crashes on access
Since we are handling bad json anyway it was no longer needed to fill empty json_metadata with {}, original empty string restored
parent c5bf3fd6
No related branches found
No related tags found
5 merge requests!456Release candidate v1 24,!230Setup monitoring with pghero,!138Small typos fixed,!135Enable postgres monitoring on CI server,!120Fixes around post data edit
...@@ -147,8 +147,8 @@ def build_metadata(): ...@@ -147,8 +147,8 @@ def build_metadata():
'hive_post_data', metadata, 'hive_post_data', metadata,
sa.Column('id', sa.Integer, primary_key=True, autoincrement=False), sa.Column('id', sa.Integer, primary_key=True, autoincrement=False),
sa.Column('title', VARCHAR(512), nullable=False, server_default=''), sa.Column('title', VARCHAR(512), nullable=False, server_default=''),
sa.Column('preview', VARCHAR(1024), nullable=False, server_default=''), sa.Column('preview', VARCHAR(1024), nullable=False, server_default=''), # first 1k of 'body'
sa.Column('img_url', VARCHAR(1024), nullable=False, server_default=''), sa.Column('img_url', VARCHAR(1024), nullable=False, server_default=''), # first 'image' from 'json'
sa.Column('body', TEXT, nullable=False, server_default=''), sa.Column('body', TEXT, nullable=False, server_default=''),
sa.Column('json', TEXT, nullable=False, server_default='') sa.Column('json', TEXT, nullable=False, server_default='')
) )
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
from hive.indexer.reblog import Reblog from hive.indexer.reblog import Reblog
import logging import logging
import json
from hive.db.adapter import Db from hive.db.adapter import Db
......
...@@ -15,9 +15,16 @@ class PostDataCache(object): ...@@ -15,9 +15,16 @@ class PostDataCache(object):
return pid in cls._data return pid in cls._data
@classmethod @classmethod
def add_data(cls, pid, post_data, print_query = False): def add_data(cls, pid, post_data, is_new_post):
""" Add data to cache """ """ Add data to cache """
cls._data[pid] = post_data if not cls.is_cached(pid):
cls._data[pid] = post_data
cls._data[pid]['is_new_post'] = is_new_post
else:
assert not is_new_post
for k, data in post_data.items():
if data is not None:
cls._data[pid][k] = data
@classmethod @classmethod
def get_post_body(cls, pid): def get_post_body(cls, pid):
...@@ -36,37 +43,53 @@ class PostDataCache(object): ...@@ -36,37 +43,53 @@ class PostDataCache(object):
def flush(cls, print_query = False): def flush(cls, print_query = False):
""" Flush data from cache to db """ """ Flush data from cache to db """
if cls._data: if cls._data:
sql = """ values_insert = []
INSERT INTO values_update = []
hive_post_data (id, title, preview, img_url, body, json)
VALUES
"""
values = []
for k, data in cls._data.items(): for k, data in cls._data.items():
title = "''" if not data['title'] else "{}".format(escape_characters(data['title'])) title = 'NULL' if data['title'] is None else "{}".format(escape_characters(data['title']))
preview = "''" if not data['preview'] else "{}".format(escape_characters(data['preview'])) body = 'NULL' if data['body'] is None else "{}".format(escape_characters(data['body']))
img_url = "''" if not data['img_url'] else "{}".format(escape_characters(data['img_url'])) preview = 'NULL' if data['body'] is None else "{}".format(escape_characters(data['body'][0:1024]))
body = "''" if not data['body'] else "{}".format(escape_characters(data['body'])) json = 'NULL' if data['json'] is None else "{}".format(escape_characters(data['json']))
json = "'{}'" if not data['json'] else "{}".format(escape_characters(data['json'])) img_url = 'NULL' if data['img_url'] is None else "{}".format(escape_characters(data['img_url']))
values.append("({},{},{},{},{},{})".format(k, title, preview, img_url, body, json)) value = "({},{},{},{},{},{})".format(k, title, preview, img_url, body, json)
sql += ','.join(values) if data['is_new_post']:
sql += """ values_insert.append(value)
ON CONFLICT (id) else:
DO values_update.append(value)
UPDATE SET
title = EXCLUDED.title, if values_insert:
preview = EXCLUDED.preview, sql = """
img_url = EXCLUDED.img_url, INSERT INTO
body = EXCLUDED.body, hive_post_data (id, title, preview, img_url, body, json)
json = EXCLUDED.json VALUES
WHERE """
hive_post_data.id = EXCLUDED.id sql += ','.join(values_insert)
""" if print_query:
log.info("Executing query:\n{}".format(sql))
DB.query(sql)
if(print_query): if values_update:
log.info("Executing query:\n{}".format(sql)) sql = """
UPDATE hive_post_data AS hpd SET
title = COALESCE( data_source.title, hpd.title ),
preview = COALESCE( data_source.preview, hpd.preview ),
img_url = COALESCE( data_source.img_url, hpd.img_url ),
body = COALESCE( data_source.body, hpd.body ),
json = COALESCE( data_source.json, hpd.json )
FROM
( SELECT * FROM
( VALUES
"""
sql += ','.join(values_update)
sql += """
) AS T(id, title, preview, img_url, body, json)
) AS data_source
WHERE hpd.id = data_source.id
"""
if print_query:
log.info("Executing query:\n{}".format(sql))
DB.query(sql)
DB.query(sql)
n = len(cls._data.keys()) n = len(cls._data.keys())
cls._data.clear() cls._data.clear()
return n return n
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import logging import logging
import collections import collections
from json import dumps, loads from ujson import dumps, loads
from diff_match_patch import diff_match_patch from diff_match_patch import diff_match_patch
...@@ -16,7 +16,7 @@ from hive.indexer.community import Community, START_DATE ...@@ -16,7 +16,7 @@ from hive.indexer.community import Community, START_DATE
from hive.indexer.notify import Notify from hive.indexer.notify import Notify
from hive.indexer.post_data_cache import PostDataCache from hive.indexer.post_data_cache import PostDataCache
from hive.indexer.tags import Tags from hive.indexer.tags import Tags
from hive.utils.normalize import sbd_amount, legacy_amount, asset_to_hbd_hive from hive.utils.normalize import sbd_amount, legacy_amount, asset_to_hbd_hive, safe_img_url
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
DB = Db.instance() DB = Db.instance()
...@@ -103,23 +103,6 @@ class Posts: ...@@ -103,23 +103,6 @@ class Posts:
cls._set_id(op['author']+'/'+op['permlink'], result['id']) cls._set_id(op['author']+'/'+op['permlink'], result['id'])
if result['is_new_post']:
# add content data to hive_post_data
post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "",
img_url=op['img_url'] if 'img_url' in op else "", body=op['body'],
json=op['json_metadata'] if op['json_metadata'] else '{}')
else:
# edit case. Now we need to (potentially) apply patch to the post body.
new_body = cls._merge_post_body(id=result['id'], new_body_def=op['body'])
post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "",
img_url=op['img_url'] if 'img_url' in op else "", body=new_body,
json=op['json_metadata'] if op['json_metadata'] else '{}')
# log.info("Adding author: {} permlink: {}".format(op['author'], op['permlink']))
printQuery = False # op['author'] == 'xeroc' and op['permlink'] == 're-piston-20160818t080811'
PostDataCache.add_data(result['id'], post_data, printQuery)
md = {} md = {}
# At least one case where jsonMetadata was double-encoded: condenser#895 # At least one case where jsonMetadata was double-encoded: condenser#895
# jsonMetadata = JSON.parse(jsonMetadata); # jsonMetadata = JSON.parse(jsonMetadata);
...@@ -130,6 +113,34 @@ class Posts: ...@@ -130,6 +113,34 @@ class Posts:
except Exception: except Exception:
pass pass
img_url = None
if 'image' in md:
img_url = md['image']
if isinstance(img_url, list) and img_url:
img_url = img_url[0]
if img_url:
img_url = safe_img_url(img_url)
is_new_post = result['is_new_post']
if is_new_post:
# add content data to hive_post_data
post_data = dict(title=op['title'] if op['title'] else '',
img_url=img_url if img_url else '',
body=op['body'] if op['body'] else '',
json=op['json_metadata'] if op['json_metadata'] else '')
else:
# edit case. Now we need to (potentially) apply patch to the post body.
# empty new body means no body edit, not clear (same with other data)
new_body = cls._merge_post_body(id=result['id'], new_body_def=op['body']) if op['body'] else None
new_title = op['title'] if op['title'] else None
new_json = op['json_metadata'] if op['json_metadata'] else None
# when 'new_json' is not empty, 'img_url' should be overwritten even if it is itself empty
new_img = img_url if img_url else '' if new_json else None
post_data = dict(title=new_title, img_url=new_img, body=new_body, json=new_json)
# log.info("Adding author: {} permlink: {}".format(op['author'], op['permlink']))
PostDataCache.add_data(result['id'], post_data, is_new_post)
if not result['depth']: if not result['depth']:
tags = [result['post_category']] tags = [result['post_category']]
if md and 'tags' in md and isinstance(md['tags'], list): if md and 'tags' in md and isinstance(md['tags'], list):
......
...@@ -231,7 +231,10 @@ def _bridge_post_object(row, truncate_body=0): ...@@ -231,7 +231,10 @@ def _bridge_post_object(row, truncate_body=0):
post['title'] = row['title'] post['title'] = row['title']
post['body'] = row['body'][0:truncate_body] if truncate_body else row['body'] post['body'] = row['body'][0:truncate_body] if truncate_body else row['body']
post['json_metadata'] = json.loads(row['json']) try:
post['json_metadata'] = json.loads(row['json'])
except Exception:
post['json_metadata'] = {}
post['created'] = json_date(row['created_at']) post['created'] = json_date(row['created_at'])
post['updated'] = json_date(row['updated_at']) post['updated'] = json_date(row['updated_at'])
......
Subproject commit 263b3b14421f52394e344bdfb6bf0c345c5dad5a Subproject commit 9f6058b31adec6378ead1b15ae6c1e7bb75823f7
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment