Merge branch 'dk-follow-name-escaping' into 'develop'

Escape characters in user names provided in follow_op See merge request !346

Merge branch 'dk-follow-name-escaping' into 'develop'
6e925e63 · Bartek Wrona · 0d5073dc · cf31b17b · 6e925e63 · 6e925e63
Commit 6e925e63 authored 4 years ago by Bartek Wrona
--- a/hive/indexer/follow.py
+++ b/hive/indexer/follow.py
@@ -10,6 +10,7 @@ from hive.utils.misc import chunks
 from hive.indexer.accounts import Accounts
 from hive.indexer.db_adapter_holder import DbAdapterHolder
+from hive.utils.normalize import escape_characters
 log = logging.getLogger(__name__)
@@ -124,8 +125,8 @@ class Follow(DbAdapterHolder):
        if non_existent_names:
            log.warning("Follow op validation, following names does not exists in database: {}".format(non_existent_names))
-        return dict(flr=op['follower'],
+        return dict(flr=escape_characters(op['follower']),
-                    flg=op['following'],
+                    flg=[escape_characters(following) for following in op['following']],
                    state=defs[what],
                    at=date)
@@ -226,7 +227,7 @@ class Follow(DbAdapterHolder):
            cls.beginTx()
            for _, follow_item in cls.follow_items_to_flush.items():
                if count < limit:
-                    values.append("({}, '{}', '{}', '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'],
+                    values.append("({}, {}, {}, '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'],
                                                                          follow_item['flr'],
                                                                          follow_item['flg'],
                                                                          follow_item['at'],
@@ -241,7 +242,7 @@ class Follow(DbAdapterHolder):
                    query += sql_postfix
                    cls.db.query(query)
                    values.clear()
-                    values.append("({}, '{}', '{}', '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'],
+                    values.append("({}, {}, {}, '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'],
                                                                          follow_item['flr'],
                                                                          follow_item['flg'],
                                                                          follow_item['at'],
@@ -269,7 +270,7 @@ class Follow(DbAdapterHolder):
            for state, update_flush_items in cls.follow_update_items_to_flush.items():
                for chunk in chunks(update_flush_items, 1000):
                    sql = None
-                    query_values = ','.join(["('{}')".format(account) for account in chunk])
+                    query_values = ','.join(["({})".format(account) for account in chunk])
                    # [DK] probaly not a bad idea to move that logic to SQL function
                    if state == 9:
                        #reset blacklists for follower
@@ -360,7 +361,7 @@ class Follow(DbAdapterHolder):
                            UPDATE
                                hive_follows hf
                            SET
-                                hf.follow_blacklists = true
+                                follow_blacklists = true
                            FROM
                            (
                                SELECT
@@ -481,7 +482,7 @@ class Follow(DbAdapterHolder):
        for col, deltas in cls._delta.items():
            for delta, names in _flip_dict(deltas).items():
                updated += len(names)
-                query_values = ','.join(["('{}')".format(account) for account in names])
+                query_values = ','.join(["({})".format(account) for account in names])
                sql = """
                    UPDATE
                        hive_accounts ha
@@ -518,7 +519,7 @@ class Follow(DbAdapterHolder):
        """
        names = set([*cls._delta[FOLLOWERS].keys(),
                   *cls._delta[FOLLOWING].keys()])
-        query_values = ','.join(["('{}')".format(account) for account in names])
+        query_values = ','.join(["({})".format(account) for account in names])
        sql = """
            UPDATE
                hive_accounts ha

--- a/hive/steem/client.py
+++ b/hive/steem/client.py
@@ -153,16 +153,19 @@ class SteemClient:
        batch_params = [{'block_num': i} for i in block_nums]
        for result in self.__exec_batch('get_block', batch_params):
-            assert 'block' in result, "result w/o block key: %s" % result
+            if 'block' in result:
-            block = result['block']
+                block = result['block']
-            num = int(block['block_id'][:8], base=16)
+                num = int(block['block_id'][:8], base=16)
-            blocks[num] = block
+                blocks[num] = block
        for block_num in block_nums:
            data = MockBlockProvider.get_block_data(block_num, True)
            if data is not None:
-                blocks[block_num]["transactions"].extend(data["transactions"])
+                if block_num in blocks:
-                blocks[block_num]["transaction_ids"].extend(data["transaction_ids"])
+                    blocks[block_num]["transactions"].extend(data["transactions"])
+                    blocks[block_num]["transaction_ids"].extend(data["transaction_ids"])
+                else:
+                    blocks[block_num] = data
        return [blocks[x] for x in block_nums]

--- a/mock_data/block_data/follow_op/mock_block_data_follow.json
+++ b/mock_data/block_data/follow_op/mock_block_data_follow.json
--- a/scripts/ci_sync.sh
+++ b/scripts/ci_sync.sh
@@ -58,5 +58,5 @@ fi
 echo Attempting to starting hive sync using hived node: $HIVEMIND_SOURCE_HIVED_URL . Max sync block is: $HIVEMIND_MAX_BLOCK
 echo Attempting to access database $DB_URL
-./$HIVE_NAME sync --pid-file hive_sync.pid --test-max-block=$HIVEMIND_MAX_BLOCK --exit-after-sync --test-profile=False --steemd-url "$HIVEMIND_SOURCE_HIVED_URL" --prometheus-port 11011 --database-url $DB_URL 2>&1 | tee -i hivemind-sync.log
+./$HIVE_NAME sync --pid-file hive_sync.pid --test-max-block=$HIVEMIND_MAX_BLOCK --exit-after-sync --test-profile=False --steemd-url "$HIVEMIND_SOURCE_HIVED_URL" --prometheus-port 11011 --database-url $DB_URL --mock-block-data-path mock_data/block_data/follow_op/mock_block_data_follow.json 2>&1 | tee -i hivemind-sync.log
 rm hive_sync.pid
--- a/scripts/operation_extractor.py
+++ b/scripts/operation_extractor.py
+#!/usr/bin/python3
+"""
+This script will scan blocks from range `from_block` to `to_block` if it finds operation defined in `operations` it will
+save such block to a `output_file`. Blocks not containing any operations from list `operations` will be saved empty.
+There is an option to save only operations data, without blocks data: use `--dump-ops-only`
+You can pass multiple operations.
+Example:
+./operation_extractor.py https://api.hive.blog 20000000 25000000 dump.json custom_json_operation --dump-ops-only True
+"""
+from json import dumps
+from hive.steem.client import SteemClient
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("hived_url", type=str, help="Url address of hived instance")
+    parser.add_argument("from_block", type=int, help="Scan from block")
+    parser.add_argument("to_block", type=int, help="Scan to block")
+    parser.add_argument("output_file", type=str, help="Prepared blocks will be saved in this file")
+    parser.add_argument("operations", type=str, nargs='+', help="Save selected operations")
+    parser.add_argument("--dump-ops-only", type=bool, default=False, help="Dump only selected ops, without block data")
+    args = parser.parse_args()
+    client = SteemClient({"default":args.hived_url})
+    from_block = args.from_block
+    with open(args.output_file, "w") as output_file:
+        if not args.dump_ops_only:
+            output_file.write("{\n")
+        while from_block < args.to_block:
+            to_block = from_block + 1000
+            if to_block >= args.to_block:
+                to_block = args.to_block + 1
+            print("Processing range from: ", from_block, " to: ", to_block)
+            blocks = client.get_blocks_range(from_block, to_block)
+            for block in blocks:
+                block_num = int(block['block_id'][:8], base=16)
+                block_data = dict(block)
+                for idx in range(len(block_data['transactions'])):
+                    block_data['transactions'][idx]['operations'] = [op for op in block_data['transactions'][idx]['operations'] if op['type'] in args.operations]
+                    if args.dump_ops_only and block_data['transactions'][idx]['operations']:
+                        output_file.write("{}\n".format(dumps(block_data['transactions'][idx]['operations'])))
+                if not args.dump_ops_only:
+                    output_file.write('"{}":{},\n'.format(block_num, dumps(block_data)))
+            from_block = to_block
+        if not args.dump_ops_only:
+            output_file.write("}\n")
--- a/tests_api @ 46409462
+++ b/tests_api @ 46409462
-Subproject commit 819563bf5c43f0d7620b4be6e2a33df86dd168e4
+Subproject commit 4640946229bce481c03f64bea01d97588340a213