Skip to content

Commit

Permalink
Use most-significant bits to compute distance-0 position from hash (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
dpdani authored Apr 19, 2024
1 parent 8470dcf commit c855772
Show file tree
Hide file tree
Showing 16 changed files with 237 additions and 132 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = []
dev = [
"build==1.0.3",
"pytest==7.4.2",
"pytest-reraise==2.1.2",
"black==23.9.1",
"ruff==0.0.292",
]
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set(PY_BUILD_CMAKE_MODULE_NAME "cereggii")

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
add_compile_options("-mcx16")
add_compile_options("-msse4.2")

message("Python3_INCLUDE_DIR=" ${Python3_INCLUDE_DIR})
execute_process(COMMAND python -c "import sysconfig; print(sysconfig.get_path('include'), end='')" OUTPUT_VARIABLE Python3_INCLUDE_DIR)
Expand Down
1 change: 1 addition & 0 deletions src/cereggii/_cereggii.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class AtomicDict:
# """
def compact(self) -> None: ...
def debug(self) -> dict: ...
def rehash(self, o: object) -> int: ...

class AtomicRef:
"""An object reference that may be updated atomically."""
Expand Down
29 changes: 14 additions & 15 deletions src/cereggii/atomic_dict/atomic_dict.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,15 @@ AtomicDict_init(AtomicDict *self, PyObject *args, PyObject *kwargs)
if (hash == -1)
goto fail;

self->len++;
int inserted = AtomicDict_UnsafeInsert(self, key, hash, value, self->len); // we want to avoid pos = 0
self->len++; // we want to avoid pos = 0
AtomicDict_Entry *entry = AtomicDict_GetEntryAt(self->len, meta);
Py_INCREF(key);
Py_INCREF(value);
entry->flags = ENTRY_FLAGS_RESERVED;
entry->hash = hash;
entry->key = key;
entry->value = value;
int inserted = AtomicDict_UnsafeInsert(meta, hash, self->len);
if (inserted == -1) {
Py_DECREF(meta);
log_size++;
Expand Down Expand Up @@ -271,6 +278,9 @@ AtomicDict_init(AtomicDict *self, PyObject *args, PyObject *kwargs)
fail:
Py_XDECREF(meta);
Py_XDECREF(init_dict);
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError, "error during initialization.");
}
return -1;
}

Expand Down Expand Up @@ -317,17 +327,9 @@ AtomicDict_traverse(AtomicDict *self, visitproc visit, void *arg)
* calls to this function don't try to insert the same key into the same AtomicDict.
**/
int
AtomicDict_UnsafeInsert(AtomicDict *self, PyObject *key, Py_hash_t hash, PyObject *value, Py_ssize_t pos)
AtomicDict_UnsafeInsert(AtomicDict_Meta *meta, Py_hash_t hash, uint64_t pos)
{
AtomicDict_Meta *meta = NULL;
meta = (AtomicDict_Meta *) AtomicRef_Get(self->metadata);
// pos === node_index
AtomicDict_Entry *entry = AtomicDict_GetEntryAt(pos, meta);
entry->flags = ENTRY_FLAGS_RESERVED;
entry->hash = hash;
entry->key = key;
entry->value = value;

AtomicDict_Node temp;
AtomicDict_Node node = {
.index = pos,
Expand All @@ -348,18 +350,15 @@ AtomicDict_UnsafeInsert(AtomicDict *self, PyObject *key, Py_hash_t hash, PyObjec
// non-atomic robin hood
node.distance = probe;
AtomicDict_WriteNodeAt(ix + probe, &node, meta);
// ix = ix + probe - temp.distance;
ix -= temp.distance;
probe = temp.distance;
node = temp;
}
}
// probes exhausted
Py_DECREF(meta);
return -1;
done:
Py_DECREF(meta);
Py_INCREF(key);
Py_INCREF(value);
return 0;
}

Expand Down
4 changes: 2 additions & 2 deletions src/cereggii/atomic_dict/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ AtomicDict_GetEmptyEntry(AtomicDict *self, AtomicDict_Meta *meta, AtomicDict_Res
return -1;
}

inline uint64_t
inline int64_t
AtomicDict_BlockOf(uint64_t entry_ix)
{
return entry_ix >> ATOMIC_DICT_LOG_ENTRIES_IN_BLOCK;
return (int64_t) entry_ix >> ATOMIC_DICT_LOG_ENTRIES_IN_BLOCK;
}

inline uint64_t
Expand Down
7 changes: 6 additions & 1 deletion src/cereggii/atomic_dict/insert.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ AtomicDict_ExpectedInsertOrUpdateCloseToDistance0(AtomicDict_Meta *meta,
AtomicDict_RobinHoodInsert(meta, temp, to_insert, (int) (distance_0 % meta->nodes_in_zone));

if (rhr == grow) {
if (skip_entry_check) {
return 0;
}

*must_grow = 1;
goto fail;
}
Expand Down Expand Up @@ -179,7 +183,8 @@ AtomicDict_ExpectedInsertOrUpdate(AtomicDict_Meta *meta, PyObject *key, Py_hash_
done = 0;
expectation = 1;
uint64_t distance_0 = AtomicDict_Distance0Of(hash, meta);
uint64_t distance = distance_0 % meta->nodes_in_zone; // shorter distances handled by the fast-path
// uint64_t distance = distance_0 % meta->nodes_in_zone; // shorter distances handled by the fast-path
uint64_t distance = 0;
AtomicDict_BufferedNodeReader reader;
reader.zone = -1;
PyObject *current = NULL;
Expand Down
8 changes: 4 additions & 4 deletions src/cereggii/atomic_dict/lookup.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ AtomicDict_Lookup(AtomicDict_Meta *meta, PyObject *key, Py_hash_t hash,
if (
is_compact && (
(ix + probe + reservations - node.distance > ix)
|| (probe >= meta->log_size)
|| (probe >= meta->max_distance)
)) {
goto not_found;
}
Expand Down Expand Up @@ -85,7 +85,7 @@ AtomicDict_Lookup(AtomicDict_Meta *meta, PyObject *key, Py_hash_t hash,
found:
result->error = 0;
result->found = 1;
result->position = ix + probe + reservations;
result->position = (ix + probe + reservations) & (meta->size - 1);
result->node = node;
}

Expand Down Expand Up @@ -126,7 +126,7 @@ AtomicDict_LookupEntry(AtomicDict_Meta *meta, uint64_t entry_ix, Py_hash_t hash,
if (
is_compact && (
(ix + probe + reservations - node.distance > ix)
|| (probe >= meta->log_size)
|| (probe >= meta->max_distance)
)) {
goto not_found;
}
Expand All @@ -147,7 +147,7 @@ AtomicDict_LookupEntry(AtomicDict_Meta *meta, uint64_t entry_ix, Py_hash_t hash,
found:
result->error = 0;
result->found = 1;
result->position = ix + probe + reservations;
result->position = (ix + probe + reservations) & (meta->size - 1);
result->node = node;
}

Expand Down
1 change: 1 addition & 0 deletions src/cereggii/atomic_dict/meta.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ AtomicDictMeta_New(uint8_t log_size)
meta->index_mask = ((1UL << log_size) - 1) << (node_sizes.node_size - log_size);
meta->distance_mask = ((1UL << node_sizes.distance_size) - 1) << node_sizes.tag_size;
meta->tag_mask = (Py_hash_t) (1UL << node_sizes.tag_size) - 1;
meta->d0_shift = SIZEOF_PY_HASH_T * CHAR_BIT - meta->log_size;
switch (node_sizes.node_size) {
case 8:
meta->shift_mask = 8 - 1;
Expand Down
21 changes: 17 additions & 4 deletions src/cereggii/atomic_dict/node_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,23 @@ AtomicDict_ZoneOf(uint64_t ix, AtomicDict_Meta *meta)
return AtomicDict_RegionOf(ix, meta) & (ULONG_MAX - 1UL);
}

#define ABS(x) (((x) ^ ((x) >> (SIZEOF_PY_HASH_T * CHAR_BIT - 1))) - ((x) >> (SIZEOF_PY_HASH_T * CHAR_BIT - 1)))

#define UPPER_SEED 12923598712359872066ull
#define LOWER_SEED 7467732452331123588ull
#define REHASH(x) (uint64_t) (__builtin_ia32_crc32di((x), LOWER_SEED) | (__builtin_ia32_crc32di((x), UPPER_SEED) << 32))

PyObject *
AtomicDict_ReHash(AtomicDict *Py_UNUSED(self), PyObject *ob)
{
Py_hash_t hash = PyObject_Hash(ob);
return PyLong_FromUnsignedLongLong(REHASH(hash));
}

inline uint64_t
AtomicDict_Distance0Of(Py_hash_t hash, AtomicDict_Meta *meta)
{
return hash & (meta->size - 1);
return REHASH(hash) >> meta->d0_shift;
}

inline uint64_t
Expand Down Expand Up @@ -116,10 +129,10 @@ AtomicDict_ComputeBeginEndWrite(AtomicDict_Meta *meta, AtomicDict_Node *read_buf
}
assert(*begin_write != -1);
*end_write = -1;
for (j = *begin_write + 1; j < meta->nodes_in_zone; ++j) {
for (j = meta->nodes_in_zone - 1; j > *begin_write; --j) {
AtomicDict_ComputeRawNode(&temp[j], meta);
if (temp[j].node == read_buffer[j].node) {
*end_write = j;
if (temp[j].node != read_buffer[j].node) {
*end_write = j + 1;
break;
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/cereggii/atomic_dict/robin_hood.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ AtomicDict_RobinHoodDelete(AtomicDict_Meta *meta, AtomicDict_Node *nodes, int to
nodes[probe].distance--;
}

if (probe + 1 < meta->nodes_in_zone && (
nodes[probe + 1].node == 0 || nodes[probe + 1].distance == 0
)) {
AtomicDict_ParseNodeFromRaw(0, &nodes[probe], meta);
}
// if (probe + 1 < meta->nodes_in_zone && (
// nodes[probe + 1].node == 0 || nodes[probe + 1].distance == 0
// )) {
// AtomicDict_ParseNodeFromRaw(0, &nodes[probe], meta);
// }

return ok;
}
1 change: 1 addition & 0 deletions src/cereggii/cereggii.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ PyTypeObject AtomicRef_Type = {

static PyMethodDef AtomicDict_methods[] = {
{"debug", (PyCFunction) AtomicDict_Debug, METH_NOARGS, NULL},
{"rehash", (PyCFunction) AtomicDict_ReHash, METH_O, NULL},
{"compact", (PyCFunction) AtomicDict_Compact_callable, METH_NOARGS, NULL},
{"get", (PyCFunction) AtomicDict_GetItemOrDefaultVarargs, METH_VARARGS | METH_KEYWORDS, NULL},
{"len_bounds", (PyCFunction) AtomicDict_LenBounds, METH_NOARGS, NULL},
Expand Down
2 changes: 2 additions & 0 deletions src/include/atomic_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,7 @@ int AtomicDict_traverse(AtomicDict *self, visitproc visit, void *arg);

void AtomicDict_dealloc(AtomicDict *self);

PyObject *AtomicDict_ReHash(AtomicDict *self, PyObject *ob);


#endif //CEREGGII_ATOMIC_DICT_H
5 changes: 3 additions & 2 deletions src/include/atomic_dict_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ struct AtomicDict_Meta {
uint64_t distance_mask;
Py_hash_t tag_mask;
uint64_t shift_mask;
uint64_t d0_shift;

AtomicDict_Node tombstone;
AtomicDict_Node zero;
Expand Down Expand Up @@ -137,7 +138,7 @@ void AtomicDictMeta_ShrinkBlocks(AtomicDict *self, AtomicDict_Meta *from_meta, A

AtomicDict_Block *AtomicDictBlock_New(AtomicDict_Meta *meta);

uint64_t AtomicDict_BlockOf(uint64_t entry_ix);
int64_t AtomicDict_BlockOf(uint64_t entry_ix);

uint64_t AtomicDict_PositionInBlockOf(uint64_t entry_ix);

Expand Down Expand Up @@ -346,7 +347,7 @@ void AtomicDict_LookupEntry(AtomicDict_Meta *meta, uint64_t entry_ix, Py_hash_t

int AtomicDict_Delete(AtomicDict_Meta *meta, PyObject *key, Py_hash_t hash);

int AtomicDict_UnsafeInsert(AtomicDict *self, PyObject *key, Py_hash_t hash, PyObject *value, Py_ssize_t pos);
int AtomicDict_UnsafeInsert(AtomicDict_Meta *meta, Py_hash_t hash, uint64_t pos);

PyObject *AtomicDict_ExpectedInsertOrUpdate(AtomicDict_Meta *meta, PyObject *key, Py_hash_t hash,
PyObject *expected, PyObject *desired,
Expand Down
1 change: 1 addition & 0 deletions tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
keys_for_hash_for_log_size.pickle
30 changes: 30 additions & 0 deletions tests/atomic_dict_hashing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pickle
from pathlib import Path

from cereggii import AtomicDict


stored_hashes = Path(__file__).parent / "keys_for_hash_for_log_size.pickle"

if not stored_hashes.exists():
d = AtomicDict()

max_search_log_size = 10
keys_for_hash_for_log_size = {}

for log_size in range(6, max_search_log_size):
keys_for_hash_for_log_size[log_size] = {}

for pos in range(1 << log_size):
keys_for_hash_for_log_size[log_size][pos] = []

for _ in range((1 << log_size) * 32):
h = d.rehash(_) >> (64 - log_size)
if h == pos:
keys_for_hash_for_log_size[log_size][pos].append(_)

with open(stored_hashes, "wb") as f:
pickle.dump(keys_for_hash_for_log_size, f)
else:
with open(stored_hashes, "rb") as f:
keys_for_hash_for_log_size = pickle.load(f) # noqa: S301
Loading

0 comments on commit c855772

Please sign in to comment.