Skip to content
Snippets Groups Projects
Commit 6e925e63 authored by Bartek Wrona's avatar Bartek Wrona
Browse files

Merge branch 'dk-follow-name-escaping' into 'develop'

Escape characters in user names provided in follow_op

See merge request !346
parents 0d5073dc cf31b17b
No related branches found
No related tags found
2 merge requests!456Release candidate v1 24,!346Escape characters in user names provided in follow_op
...@@ -10,6 +10,7 @@ from hive.utils.misc import chunks ...@@ -10,6 +10,7 @@ from hive.utils.misc import chunks
from hive.indexer.accounts import Accounts from hive.indexer.accounts import Accounts
from hive.indexer.db_adapter_holder import DbAdapterHolder from hive.indexer.db_adapter_holder import DbAdapterHolder
from hive.utils.normalize import escape_characters
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -124,8 +125,8 @@ class Follow(DbAdapterHolder): ...@@ -124,8 +125,8 @@ class Follow(DbAdapterHolder):
if non_existent_names: if non_existent_names:
log.warning("Follow op validation, following names does not exists in database: {}".format(non_existent_names)) log.warning("Follow op validation, following names does not exists in database: {}".format(non_existent_names))
return dict(flr=op['follower'], return dict(flr=escape_characters(op['follower']),
flg=op['following'], flg=[escape_characters(following) for following in op['following']],
state=defs[what], state=defs[what],
at=date) at=date)
...@@ -226,7 +227,7 @@ class Follow(DbAdapterHolder): ...@@ -226,7 +227,7 @@ class Follow(DbAdapterHolder):
cls.beginTx() cls.beginTx()
for _, follow_item in cls.follow_items_to_flush.items(): for _, follow_item in cls.follow_items_to_flush.items():
if count < limit: if count < limit:
values.append("({}, '{}', '{}', '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'], values.append("({}, {}, {}, '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'],
follow_item['flr'], follow_item['flr'],
follow_item['flg'], follow_item['flg'],
follow_item['at'], follow_item['at'],
...@@ -241,7 +242,7 @@ class Follow(DbAdapterHolder): ...@@ -241,7 +242,7 @@ class Follow(DbAdapterHolder):
query += sql_postfix query += sql_postfix
cls.db.query(query) cls.db.query(query)
values.clear() values.clear()
values.append("({}, '{}', '{}', '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'], values.append("({}, {}, {}, '{}'::timestamp, {}, {}, {}, {}, {})".format(follow_item['idx'],
follow_item['flr'], follow_item['flr'],
follow_item['flg'], follow_item['flg'],
follow_item['at'], follow_item['at'],
...@@ -269,7 +270,7 @@ class Follow(DbAdapterHolder): ...@@ -269,7 +270,7 @@ class Follow(DbAdapterHolder):
for state, update_flush_items in cls.follow_update_items_to_flush.items(): for state, update_flush_items in cls.follow_update_items_to_flush.items():
for chunk in chunks(update_flush_items, 1000): for chunk in chunks(update_flush_items, 1000):
sql = None sql = None
query_values = ','.join(["('{}')".format(account) for account in chunk]) query_values = ','.join(["({})".format(account) for account in chunk])
# [DK] probaly not a bad idea to move that logic to SQL function # [DK] probaly not a bad idea to move that logic to SQL function
if state == 9: if state == 9:
#reset blacklists for follower #reset blacklists for follower
...@@ -360,7 +361,7 @@ class Follow(DbAdapterHolder): ...@@ -360,7 +361,7 @@ class Follow(DbAdapterHolder):
UPDATE UPDATE
hive_follows hf hive_follows hf
SET SET
hf.follow_blacklists = true follow_blacklists = true
FROM FROM
( (
SELECT SELECT
...@@ -481,7 +482,7 @@ class Follow(DbAdapterHolder): ...@@ -481,7 +482,7 @@ class Follow(DbAdapterHolder):
for col, deltas in cls._delta.items(): for col, deltas in cls._delta.items():
for delta, names in _flip_dict(deltas).items(): for delta, names in _flip_dict(deltas).items():
updated += len(names) updated += len(names)
query_values = ','.join(["('{}')".format(account) for account in names]) query_values = ','.join(["({})".format(account) for account in names])
sql = """ sql = """
UPDATE UPDATE
hive_accounts ha hive_accounts ha
...@@ -518,7 +519,7 @@ class Follow(DbAdapterHolder): ...@@ -518,7 +519,7 @@ class Follow(DbAdapterHolder):
""" """
names = set([*cls._delta[FOLLOWERS].keys(), names = set([*cls._delta[FOLLOWERS].keys(),
*cls._delta[FOLLOWING].keys()]) *cls._delta[FOLLOWING].keys()])
query_values = ','.join(["('{}')".format(account) for account in names]) query_values = ','.join(["({})".format(account) for account in names])
sql = """ sql = """
UPDATE UPDATE
hive_accounts ha hive_accounts ha
......
...@@ -153,16 +153,19 @@ class SteemClient: ...@@ -153,16 +153,19 @@ class SteemClient:
batch_params = [{'block_num': i} for i in block_nums] batch_params = [{'block_num': i} for i in block_nums]
for result in self.__exec_batch('get_block', batch_params): for result in self.__exec_batch('get_block', batch_params):
assert 'block' in result, "result w/o block key: %s" % result if 'block' in result:
block = result['block'] block = result['block']
num = int(block['block_id'][:8], base=16) num = int(block['block_id'][:8], base=16)
blocks[num] = block blocks[num] = block
for block_num in block_nums: for block_num in block_nums:
data = MockBlockProvider.get_block_data(block_num, True) data = MockBlockProvider.get_block_data(block_num, True)
if data is not None: if data is not None:
blocks[block_num]["transactions"].extend(data["transactions"]) if block_num in blocks:
blocks[block_num]["transaction_ids"].extend(data["transaction_ids"]) blocks[block_num]["transactions"].extend(data["transactions"])
blocks[block_num]["transaction_ids"].extend(data["transaction_ids"])
else:
blocks[block_num] = data
return [blocks[x] for x in block_nums] return [blocks[x] for x in block_nums]
......
This diff is collapsed.
...@@ -58,5 +58,5 @@ fi ...@@ -58,5 +58,5 @@ fi
echo Attempting to starting hive sync using hived node: $HIVEMIND_SOURCE_HIVED_URL . Max sync block is: $HIVEMIND_MAX_BLOCK echo Attempting to starting hive sync using hived node: $HIVEMIND_SOURCE_HIVED_URL . Max sync block is: $HIVEMIND_MAX_BLOCK
echo Attempting to access database $DB_URL echo Attempting to access database $DB_URL
./$HIVE_NAME sync --pid-file hive_sync.pid --test-max-block=$HIVEMIND_MAX_BLOCK --exit-after-sync --test-profile=False --steemd-url "$HIVEMIND_SOURCE_HIVED_URL" --prometheus-port 11011 --database-url $DB_URL 2>&1 | tee -i hivemind-sync.log ./$HIVE_NAME sync --pid-file hive_sync.pid --test-max-block=$HIVEMIND_MAX_BLOCK --exit-after-sync --test-profile=False --steemd-url "$HIVEMIND_SOURCE_HIVED_URL" --prometheus-port 11011 --database-url $DB_URL --mock-block-data-path mock_data/block_data/follow_op/mock_block_data_follow.json 2>&1 | tee -i hivemind-sync.log
rm hive_sync.pid rm hive_sync.pid
#!/usr/bin/python3
"""
This script will scan blocks from range `from_block` to `to_block` if it finds operation defined in `operations` it will
save such block to a `output_file`. Blocks not containing any operations from list `operations` will be saved empty.
There is an option to save only operations data, without blocks data: use `--dump-ops-only`
You can pass multiple operations.
Example:
./operation_extractor.py https://api.hive.blog 20000000 25000000 dump.json custom_json_operation --dump-ops-only True
"""
from json import dumps
from hive.steem.client import SteemClient
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("hived_url", type=str, help="Url address of hived instance")
parser.add_argument("from_block", type=int, help="Scan from block")
parser.add_argument("to_block", type=int, help="Scan to block")
parser.add_argument("output_file", type=str, help="Prepared blocks will be saved in this file")
parser.add_argument("operations", type=str, nargs='+', help="Save selected operations")
parser.add_argument("--dump-ops-only", type=bool, default=False, help="Dump only selected ops, without block data")
args = parser.parse_args()
client = SteemClient({"default":args.hived_url})
from_block = args.from_block
with open(args.output_file, "w") as output_file:
if not args.dump_ops_only:
output_file.write("{\n")
while from_block < args.to_block:
to_block = from_block + 1000
if to_block >= args.to_block:
to_block = args.to_block + 1
print("Processing range from: ", from_block, " to: ", to_block)
blocks = client.get_blocks_range(from_block, to_block)
for block in blocks:
block_num = int(block['block_id'][:8], base=16)
block_data = dict(block)
for idx in range(len(block_data['transactions'])):
block_data['transactions'][idx]['operations'] = [op for op in block_data['transactions'][idx]['operations'] if op['type'] in args.operations]
if args.dump_ops_only and block_data['transactions'][idx]['operations']:
output_file.write("{}\n".format(dumps(block_data['transactions'][idx]['operations'])))
if not args.dump_ops_only:
output_file.write('"{}":{},\n'.format(block_num, dumps(block_data)))
from_block = to_block
if not args.dump_ops_only:
output_file.write("}\n")
Subproject commit 819563bf5c43f0d7620b4be6e2a33df86dd168e4 Subproject commit 4640946229bce481c03f64bea01d97588340a213
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment