From d018f2a0577172ccef8f28ca2e43f15be3e26616 Mon Sep 17 00:00:00 2001 From: roadscape <roadscape@users.noreply.github.com> Date: Thu, 3 May 2018 15:29:02 -0500 Subject: [PATCH] bump up LRU post map to 2M entries --- README.md | 2 +- hive/indexer/posts.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0f91f2c77..f1464b8f6 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Precedence: CLI over ENV over hive.conf. Check `hive --help` for details. ### Hardware - Focus on Postgres performance - - 2GB of memory for hive itself (TODO: verify/limit max usage during initial sync) + - 2.5GB of memory for `hive sync` process - 200GB storage for database diff --git a/hive/indexer/posts.py b/hive/indexer/posts.py index f8f312dc4..58c3a7371 100644 --- a/hive/indexer/posts.py +++ b/hive/indexer/posts.py @@ -18,6 +18,7 @@ class Posts: """Handles critical/core post ops and data.""" # LRU cache for (author-permlink -> id) lookup + CACHE_SIZE = 2000000 _ids = collections.OrderedDict() _hits = 0 _miss = 0 @@ -56,7 +57,7 @@ class Posts: def _set_id(cls, url, pid): """Add an entry to the LRU, maintaining max size.""" assert pid, "no pid provided for %s" % url - if len(cls._ids) > 1000000: + if len(cls._ids) > cls.CACHE_SIZE: cls._ids.popitem(last=False) cls._ids[url] = pid -- GitLab