From 6458a8ac2baaf15308b1ba5d3d77c9208fe09886 Mon Sep 17 00:00:00 2001 From: Eric Long Date: Tue, 31 Oct 2023 18:23:01 +0800 Subject: [PATCH] lmdb: Platform-specific default map size The current code works fine on x86 platforms since x86-64 uses at least 48-bit of virtual address space. On other 64-bit platforms like aarch64 or riscv64, the minimum allowed virtual address is 39-bit [1] [2]. Current 2**40 allocation will fail: ``` zict/tests/test_lmdb.py:53: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = directory = '/tmp/test_lmdb-sfepfnw8' def __init__(self, directory: str): import lmdb # map_size is the maximum database size but shouldn't fill up the # virtual address space map_size = 1 << 40 if sys.maxsize >= 2**32 else 1 << 28 # writemap requires sparse file support otherwise the whole # `map_size` may be reserved up front on disk writemap = sys.platform.startswith("linux") > self.db = lmdb.open( directory, subdir=True, map_size=map_size, sync=False, writemap=writemap, ) E lmdb.Error: /tmp/test_lmdb-sfepfnw8: Operation not supported zict/lmdb.py:43: Error ``` Switching to 2**37 on aarch64 and riscv64 should fix the issue. [1]: https://www.kernel.org/doc/html/v5.8/arm64/memory.html [2]: https://www.kernel.org/doc/html/latest/riscv/vm-layout.html --- zict/lmdb.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/zict/lmdb.py b/zict/lmdb.py index 71b1005..8ff4dc7 100644 --- a/zict/lmdb.py +++ b/zict/lmdb.py @@ -2,6 +2,7 @@ import pathlib import sys +import platform from collections.abc import ItemsView, Iterable, Iterator, ValuesView from zict.common import ZictBase @@ -25,7 +26,8 @@ class LMDB(ZictBase[str, bytes]): directory: str map_size: int On Linux and MacOS, maximum size of the database file on disk. - Defaults to 1 TiB on 64 bit systems and 1 GiB on 32 bit ones. + Defaults to 128 GiB on aarch64 and riscv64, 1 TiB on other 64 bit systems like + x86-64 and 1 GiB on 32 bit ones. On Windows, preallocated total size of the database file on disk. Defaults to 10 MiB to encourage explicitly setting it. @@ -48,10 +50,17 @@ def __init__(self, directory: str | pathlib.Path, map_size: int | None = None): super().__init__() if map_size is None: - if sys.platform != "win32": - map_size = min(2**40, sys.maxsize // 4) - else: + machine = platform.machine() + if sys.platform == "win32": map_size = 10 * 2**20 + elif machine in ["x86_64", "x64"]: + map_size = 2**40 + elif machine in ["i386", "i686", "x86"]: + map_size = 2**30 + elif machine.startswith("aarch64") or machine.startswith("armv8") or machine.startswith("riscv64"): + map_size = 2**37 + else: + map_size = min(2**40, sys.maxsize // 4) self.db = lmdb.open( str(directory),