diff --git a/README.md b/README.md index 0f91f2c77acebb8a8cd5d9aa4b838463146d4cc0..f1464b8f6aa7246a7d25513e9afeacf7df5097ae 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Precedence: CLI over ENV over hive.conf. Check `hive --help` for details. ### Hardware - Focus on Postgres performance - - 2GB of memory for hive itself (TODO: verify/limit max usage during initial sync) + - 2.5GB of memory for `hive sync` process - 200GB storage for database diff --git a/hive/indexer/posts.py b/hive/indexer/posts.py index f8f312dc4a0de1510c3c22b75a4b1ca9521b002e..58c3a7371f0712c224536ad9917d1a69daa719c6 100644 --- a/hive/indexer/posts.py +++ b/hive/indexer/posts.py @@ -18,6 +18,7 @@ class Posts: """Handles critical/core post ops and data.""" # LRU cache for (author-permlink -> id) lookup + CACHE_SIZE = 2000000 _ids = collections.OrderedDict() _hits = 0 _miss = 0 @@ -56,7 +57,7 @@ class Posts: def _set_id(cls, url, pid): """Add an entry to the LRU, maintaining max size.""" assert pid, "no pid provided for %s" % url - if len(cls._ids) > 1000000: + if len(cls._ids) > cls.CACHE_SIZE: cls._ids.popitem(last=False) cls._ids[url] = pid