From 4894f005f0d484d5203c8d5e141fa1bfd15a71c4 Mon Sep 17 00:00:00 2001 From: roadscape <roadscape@users.noreply.github.com> Date: Wed, 18 Sep 2019 10:13:34 -0500 Subject: [PATCH] test basic mentions match --- hive/indexer/cached_post.py | 6 ++++- hive/utils/post.py | 42 ++++++++-------------------------- tests/utils/test_utils_post.py | 13 ++++------- 3 files changed, 20 insertions(+), 41 deletions(-) diff --git a/hive/indexer/cached_post.py b/hive/indexer/cached_post.py index 48fb797ef..afa15c91c 100644 --- a/hive/indexer/cached_post.py +++ b/hive/indexer/cached_post.py @@ -560,7 +560,11 @@ class CachedPost: Notify(notif_type, src_id=author_id, dst_id=parent_id, post_id=pid, when=post['last_update'], score=score).write() if level in ('insert', 'update'): - mentions(post) + accts = mentions(post['body']) + for acct in accts: + if not Accounts.exists(acct): + url = '@' + post['author'] + '/' + post['permlink'] + log.warning("bad mention [%s] in %s", acct, url) @classmethod diff --git a/hive/utils/post.py b/hive/utils/post.py index 6a7ffd130..c91e0f03c 100644 --- a/hive/utils/post.py +++ b/hive/utils/post.py @@ -11,39 +11,17 @@ from hive.utils.normalize import sbd_amount, rep_log10, safe_img_url, parse_time log = logging.getLogger(__name__) -def mentions(post): - """Given a post, return proper @-mentioned account names.""" - # pylint: disable=invalid-name - detected = text_mentions(post['body']) - provided = _post_users(post) - - d1 = detected - provided - d2 = provided - detected - - url = '@' + post['author'] + '/' + post['permlink'] - if d1: log.warning("%s detected - provided: %s", url, d1) - if d2: log.warning("%s provided - detected: %s", url, d2) - - return detected & provided - -def text_mentions(body): +def mentions(body): """Given a post body, return proper @-mentioned account names.""" - matches = re.findall('(?:^|[^a-zA-Z0-9_!#$%&*@])(:?@)([a-z\\d\\-.]+)', body) - return {grp[1] for grp in matches} - -def _post_users(post): - """Retrieve `users` key from json_metadata.""" - md = {} - try: - md = json.loads(post['json_metadata']) - if not isinstance(md, dict): - md = {} - except Exception: - pass - - if 'users' in md and isinstance(md['users'], list): - return {user.strip('.@') for user in md['users'] if user and isinstance(user, str)} - return set() + # condenser: + # /(^|[^a-zA-Z0-9_!#$%&*@ï¼ \/]|(^|[^a-zA-Z0-9_+~.-\/#]))[@ï¼ ]([a-z][-\.a-z\d]+[a-z\d])/gi, + + matches = re.findall( + '(?:^|[^a-zA-Z0-9_!#$%&*@\\/])' + '(:?@)' + '([a-zA-Z0-9][a-zA-Z0-9\\-.]{1,14}[a-zA-Z0-9])' + '(?![a-z])', body) + return {grp[1].lower() for grp in matches} def post_basic(post): """Basic post normalization: json-md, tags, and flags.""" diff --git a/tests/utils/test_utils_post.py b/tests/utils/test_utils_post.py index a604cbe27..a17a6bd45 100644 --- a/tests/utils/test_utils_post.py +++ b/tests/utils/test_utils_post.py @@ -3,7 +3,6 @@ from decimal import Decimal from hive.utils.post import ( mentions, - text_mentions, post_basic, post_legacy, post_payout, @@ -140,19 +139,17 @@ POST_2 = { } def test_mentions(): - post = {'body': 'Who is @abc and @foo and @bar', - 'json_metadata': '{"users":["foo","bar"]}'} - assert mentions(post) == {'foo', 'bar'} - -def test_text_mentions(): # pylint: disable=invalid-name - m = text_mentions + m = mentions assert m('Hi @abc, meet @bob') == {'abc', 'bob'} assert m('Hi @abc, meet @abc') == {'abc'} assert not m('') assert not m('@') - assert m('steemit.com/@apple') == {'apple'} + assert not m('steemit.com/@apple') assert not m('joe@apple.com') + assert m('@longestokaccount') == {'longestokaccount'} + assert not m('@longestokaccountx') + assert m('@abc- @-foo @bar.') == {'abc', 'bar'} def test_post_basic(): ret = post_basic(POST_1) -- GitLab