Skip to content
Snippets Groups Projects
Commit 8f3240f2 authored by Bartek Wrona's avatar Bartek Wrona
Browse files

- Implemented comment body patching using diff_match_patch library to follow...

- Implemented comment body patching using diff_match_patch library to follow original hived node behavior
- extended string literal normalization/escaping to support unicode strings containing nonprintable characters
parent d2dc1f46
No related branches found
No related tags found
4 merge requests!456Release candidate v1 24,!230Setup monitoring with pghero,!135Enable postgres monitoring on CI server,!49- Implemented comment body patching using diff_match_patch library to follow...
......@@ -15,7 +15,7 @@ jsonrpcserver = "4.1.3+8f3437a"
aiohttp = "*"
aiopg = "*"
"psycopg2-binary" = "*"
"diff-match-patch" = "*"
[dev-packages]
......
......@@ -424,7 +424,7 @@ def setup(db):
in _parent_permlink hive_permlink_data.permlink%TYPE,
in _date hive_posts.created_at%TYPE,
in _community_support_start_date hive_posts.created_at%TYPE)
RETURNS TABLE (id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE,
RETURNS TABLE (is_new_post boolean, id hive_posts.id%TYPE, author_id hive_posts.author_id%TYPE, permlink_id hive_posts.permlink_id%TYPE,
post_category hive_category_data.category%TYPE, parent_id hive_posts.parent_id%TYPE, community_id hive_posts.community_id%TYPE,
is_valid hive_posts.is_valid%TYPE, is_muted hive_posts.is_muted%TYPE, depth hive_posts.depth%TYPE,
is_edited boolean)
......@@ -487,7 +487,7 @@ def setup(db):
END
)
RETURNING hp.id, hp.author_id, hp.permlink_id, (SELECT hcd.category FROM hive_category_data hcd WHERE hcd.id = hp.category_id) as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited
RETURNING (xmax = 0) as is_new_post, hp.id, hp.author_id, hp.permlink_id, (SELECT hcd.category FROM hive_category_data hcd WHERE hcd.id = hp.category_id) as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited
;
ELSE
INSERT INTO hive_category_data
......@@ -536,7 +536,7 @@ def setup(db):
END
)
RETURNING hp.id, hp.author_id, hp.permlink_id, _parent_permlink as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited
RETURNING (xmax = 0) as is_new_post, hp.id, hp.author_id, hp.permlink_id, _parent_permlink as post_category, hp.parent_id, hp.community_id, hp.is_valid, hp.is_muted, hp.depth, (hp.updated_at > hp.created_at) as is_edited
;
END IF;
END
......
......@@ -15,12 +15,25 @@ class PostDataCache(object):
return pid in cls._data
@classmethod
def add_data(cls, pid, post_data):
def add_data(cls, pid, post_data, print_query = False):
""" Add data to cache """
cls._data[pid] = post_data
@classmethod
def flush(cls):
def get_post_body(cls, pid):
""" Returns body of given post from collected cache or from underlying DB storage. """
try:
post_data = cls._data[pid]
except KeyError:
sql = """
SELECT hpd.body FROM hive_post_data hpd WHERE hpd.id = :post_id;
"""
row = DB.query_row(sql, post_id = pid)
post_data = dict(row)
return post_data['body']
@classmethod
def flush(cls, print_query = False):
""" Flush data from cache to db """
if cls._data:
sql = """
......@@ -49,5 +62,9 @@ class PostDataCache(object):
WHERE
hive_post_data.id = EXCLUDED.id
"""
if(print_query):
log.info("Executing query:\n{}".format(sql))
DB.query(sql)
cls._data.clear()
......@@ -5,6 +5,8 @@ import collections
from json import dumps, loads
from diff_match_patch import diff_match_patch
from hive.db.adapter import Db
from hive.db.db_state import DbState
......@@ -94,7 +96,7 @@ class Posts:
"""Register new/edited/undeleted posts; insert into feed cache."""
sql = """
SELECT id, author_id, permlink_id, post_category, parent_id, community_id, is_valid, is_muted, depth, is_edited
SELECT is_new_post, id, author_id, permlink_id, post_category, parent_id, community_id, is_valid, is_muted, depth, is_edited
FROM process_hive_post_operation((:author)::varchar, (:permlink)::varchar, (:parent_author)::varchar, (:parent_permlink)::varchar, (:date)::timestamp, (:community_support_start_date)::timestamp);
"""
......@@ -108,11 +110,22 @@ class Posts:
cls._set_id(op['author']+'/'+op['permlink'], result['id'])
# add content data to hive_post_data
post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "",
img_url=op['img_url'] if 'img_url' in op else "", body=op['body'],
json=op['json_metadata'] if op['json_metadata'] else '{}')
PostDataCache.add_data(result['id'], post_data)
if result['is_new_post']:
# add content data to hive_post_data
post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "",
img_url=op['img_url'] if 'img_url' in op else "", body=op['body'],
json=op['json_metadata'] if op['json_metadata'] else '{}')
else:
# edit case. Now we need to (potentially) apply patch to the post body.
new_body = cls._merge_post_body(id=result['id'], new_body_def=op['body'])
post_data = dict(title=op['title'], preview=op['preview'] if 'preview' in op else "",
img_url=op['img_url'] if 'img_url' in op else "", body=new_body,
json=op['json_metadata'] if op['json_metadata'] else '{}')
# log.info("Adding author: {} permlink: {}".format(op['author'], op['permlink']))
printQuery = False # op['author'] == 'xeroc' and op['permlink'] == 're-piston-20160818t080811'
PostDataCache.add_data(result['id'], post_data, printQuery)
md = {}
# At least one case where jsonMetadata was double-encoded: condenser#895
......@@ -402,3 +415,30 @@ class Posts:
is_muted = True
return error
@classmethod
def _merge_post_body(cls, id, new_body_def):
new_body = ''
old_body = ''
try:
dmp = diff_match_patch()
patch = dmp.patch_fromText(new_body_def)
if patch is not None and len(patch):
old_body = PostDataCache.get_post_body(id)
new_body, _ = dmp.patch_apply(patch, old_body)
#new_utf8_body = new_body.decode('utf-8')
#new_body = new_utf8_body
else:
new_body = new_body_def
except ValueError as e:
# log.info("Merging a body post id: {} caused an ValueError exception {}".format(id, e))
# log.info("New body definition: {}".format(new_body_def))
# log.info("Old body definition: {}".format(old_body))
new_body = new_body_def
except Exception as ex:
log.info("Merging a body post id: {} caused an unknown exception {}".format(id, ex))
log.info("New body definition: {}".format(new_body_def))
log.info("Old body definition: {}".format(old_body))
new_body = new_body_def
return new_body
......@@ -20,6 +20,10 @@ dct={'0':'a','1':'b','2':'c','3':'d','4':'e',
# convert special chars into their octal formats recognized by sql
special_chars={
"\r":"\\015",
"\n":"\\012",
"\v":"\\013",
"\f": "\\014",
"\\":"\\134",
"'":"\\047",
"%":"\\045",
......@@ -35,11 +39,21 @@ def escape_characters(text):
ret = "E'"
for ch in text:
try:
dw=special_chars[ch]
ret=ret+dw
except KeyError as k:
ret=ret+ch
if ch.isprintable() or ch in special_chars:
try:
dw=special_chars[ch]
ret=ret+dw
except KeyError as k:
ret=ret+ch
else:
ordinal = ord(ch)
if ordinal == 0 or ordinal >= 0x80:
escaped_value = 'u' + hex(ordinal)[2:]
# logging.info("Encoded unicode escape: {}".format(escaped_value))
else:
escaped_value = ch.encode('unicode-escape').decode('utf-8')
ret = ret + escaped_value
ret = ret + "'"
return ret
......
......@@ -39,3 +39,4 @@ tzlocal==2.0.0
ujson==2.0.3
urllib3==1.25.9
yarl==1.4.2
diff-match-patch=20200713
......@@ -46,6 +46,7 @@ setup(
'aiocache',
'configargparse',
'pdoc',
'diff-match-patch'
],
extras_require={'test': tests_require},
entry_points={
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment