From 98c8204a11ca0a1f354cbffeb821881e4441b59e Mon Sep 17 00:00:00 2001 From: Alex Sharov Date: Tue, 10 Oct 2023 09:36:47 +0700 Subject: [PATCH] mdbx v13 release (#120) --- mdbx/mdbx.c | 234 ++++++++++++++++++++++++++++++------------- mdbx/mdbx.h | 43 ++++++-- mdbxdist/mdbx.c | 234 ++++++++++++++++++++++++++++++------------- mdbxdist/mdbx_chk.c | 3 +- mdbxdist/mdbx_copy.c | 3 +- mdbxdist/mdbx_drop.c | 3 +- mdbxdist/mdbx_dump.c | 3 +- mdbxdist/mdbx_load.c | 3 +- mdbxdist/mdbx_stat.c | 3 +- 9 files changed, 372 insertions(+), 157 deletions(-) diff --git a/mdbx/mdbx.c b/mdbx/mdbx.c index e973925..634fcc2 100644 --- a/mdbx/mdbx.c +++ b/mdbx/mdbx.c @@ -12,7 +12,7 @@ * . */ #define xMDBX_ALLOY 1 -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3734,6 +3734,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ @@ -4875,6 +4876,9 @@ atomic_store64(MDBX_atomic_uint64_t *p, const uint64_t value, enum MDBX_memory_order order) { STATIC_ASSERT(sizeof(MDBX_atomic_uint64_t) == 8); #if MDBX_64BIT_ATOMIC +#if __GNUC_PREREQ(11, 0) + STATIC_ASSERT(__alignof__(MDBX_atomic_uint64_t) >= sizeof(uint64_t)); +#endif /* GNU C >= 11 */ #ifdef MDBX_HAVE_C11ATOMICS assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value, mo_c11_store(order)); @@ -7667,18 +7671,15 @@ const char *mdbx_dump_val(const MDBX_val *key, char *const buf, char *const detent = buf + bufsize - 2; char *ptr = buf; *ptr++ = '<'; - for (size_t i = 0; i < key->iov_len; i++) { - const ptrdiff_t left = detent - ptr; - assert(left > 0); - int len = snprintf(ptr, left, "%02x", data[i]); - if (len < 0 || len >= left) - break; - ptr += len; - } - if (ptr < detent) { - ptr[0] = '>'; - ptr[1] = '\0'; + for (size_t i = 0; i < key->iov_len && ptr < detent; i++) { + const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + *ptr++ = hex[data[i] >> 4]; + *ptr++ = hex[data[i] & 15]; } + if (ptr < detent) + *ptr++ = '>'; + *ptr = '\0'; } return buf; } @@ -10505,27 +10506,47 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { #define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target))) #endif /* MDBX_ATTRIBUTE_TARGET */ -#if defined(__SSE2__) +#ifndef MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +/* Workaround for GCC's bug with `-m32 -march=i686 -Ofast` + * gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1: + * error: inlining failed in call to 'always_inline' '_mm_movemask_ps': + * target specific option mismatch */ +#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || \ + defined(__e2k__) || defined(__clang__) || defined(__amd64__) || \ + defined(__SSE2__) +#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0 +#else +#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1 +#endif +#endif /* MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND */ + +#if defined(__SSE2__) && defined(__SSE__) #define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ #elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__) #define __SSE2__ #define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) -#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse2") +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2") #endif /* __SSE2__ */ #if defined(__AVX2__) #define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) -#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("avx2") +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2") #endif /* __AVX2__ */ +#if defined(MDBX_ATTRIBUTE_TARGET_AVX2) #if defined(__AVX512BW__) #define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */ #elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \ (__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0)) -#define MDBX_ATTRIBUTE_TARGET_AVX512BW MDBX_ATTRIBUTE_TARGET("avx512bw") +#define MDBX_ATTRIBUTE_TARGET_AVX512BW \ + MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw") #endif /* __AVX512BW__ */ +#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */ #ifdef MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned @@ -10599,6 +10620,15 @@ diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, return _mm256_movemask_ps(*(const __m256 *)&cmp); } +MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned +diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, + const __m128i pattern) { + const __m128i f = _mm_loadu_si128((const __m128i *)ptr); + const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); + const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); + return _mm_movemask_ps(*(const __m128 *)&cmp); +} + MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t * scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { assert(seq > 0 && len > seq); @@ -10644,7 +10674,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { } #endif /* __SANITIZE_ADDRESS__ */ if (range - 3 > detent) { - mask = diffcmp2mask_sse2(range - 3, offset, *(const __m128i *)&pattern); + mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); if (mask) return range + 28 - __builtin_clz(mask); range -= 4; @@ -10718,7 +10748,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { range -= 8; } if (range - 3 > detent) { - mask = diffcmp2mask_sse2(range - 3, offset, *(const __m128i *)&pattern); + mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); if (mask) return range + 28 - __builtin_clz(mask); range -= 4; @@ -11248,7 +11278,7 @@ static __inline pgr_t page_alloc_finalize(MDBX_env *const env, #if MDBX_ENABLE_PROFGC size_t majflt_after; prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before; - prof->majflt += majflt_after - majflt_before; + prof->majflt += (uint32_t)(majflt_after - majflt_before); #endif /* MDBX_ENABLE_PROFGC */ return ret; } @@ -12122,13 +12152,9 @@ retry:; } const bool inside_txn = (env->me_txn0->mt_owner == osal_thread_self()); - meta_ptr_t head; - if (inside_txn | locked) - head = meta_recent(env, &env->me_txn0->tw.troika); - else { - const meta_troika_t troika = meta_tap(env); - head = meta_recent(env, &troika); - } + const meta_troika_t troika = + (inside_txn | locked) ? env->me_txn0->tw.troika : meta_tap(env); + const meta_ptr_t head = meta_recent(env, &troika); const uint64_t unsynced_pages = atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed); if (unsynced_pages == 0) { @@ -12141,10 +12167,19 @@ retry:; if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) && unlikely(head.ptr_c->mm_geo.next > bytes2pgno(env, env->me_dxb_mmap.current))) { - rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now, - head.ptr_c->mm_geo.upper, implicit_grow); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + + if (unlikely(env->me_stuck_meta >= 0) && + troika.recent != (uint8_t)env->me_stuck_meta) { + NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " + "meta-page (%u)", + "sync datafile", env->me_stuck_meta, troika.recent); + rc = MDBX_RESULT_TRUE; + } else { + rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now, + head.ptr_c->mm_geo.upper, implicit_grow); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } } const size_t autosync_threshold = @@ -12223,6 +12258,14 @@ retry:; eASSERT(env, inside_txn || locked); eASSERT(env, !inside_txn || (flags & MDBX_SHRINK_ALLOWED) == 0); + if (!head.is_steady && unlikely(env->me_stuck_meta >= 0) && + troika.recent != (uint8_t)env->me_stuck_meta) { + NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " + "meta-page (%u)", + "sync datafile", env->me_stuck_meta, troika.recent); + rc = MDBX_RESULT_TRUE; + goto bailout; + } if (!head.is_steady || ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) { DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, data_page(head.ptr_c)->mp_pgno, durable_caption(head.ptr_c), @@ -13122,7 +13165,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); -#endif +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_owner = tid; return MDBX_SUCCESS; } @@ -13190,7 +13233,7 @@ int mdbx_txn_renew(MDBX_txn *txn) { rc = txn_renew(txn, MDBX_TXN_RDONLY); if (rc == MDBX_SUCCESS) { - txn->mt_owner = osal_thread_self(); + tASSERT(txn, txn->mt_owner == osal_thread_self()); DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', @@ -13804,8 +13847,10 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + atomic_add32(&env->me_ignore_EDEADLK, 1); txn_valgrind(env, nullptr); -#endif + atomic_sub32(&env->me_ignore_EDEADLK, 1); +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, @@ -13834,7 +13879,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) if (txn == env->me_txn0) txn_valgrind(env, nullptr); -#endif +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; txn->mt_owner = 0; @@ -14249,6 +14294,14 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { } static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { +#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается ниже внутри update_gc() в + * цикле очистки и цикле заполнения зарезервированных элементов. */ + memset(pnl.iov_base, 0xBB, pnl.iov_len); +#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ + /* PNL is initially empty, zero out at least the length */ memset(pnl.iov_base, 0, sizeof(pgno_t)); if ((env->me_flags & (MDBX_WRITEMAP | MDBX_NOMEMINIT)) == 0) @@ -14564,6 +14617,15 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { if (unlikely(rc != MDBX_SUCCESS)) goto bailout; +#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. + */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ + if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) ? left - chunk @@ -14601,6 +14663,16 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; + +#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. + */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ + /* Retry if tw.retired_pages[] grew during the Put() */ } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); @@ -15085,7 +15157,7 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { MDBX_PNL_SETSIZE(txn->tw.relist, 0); #if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.wloops += ctx->loop; + env->me_lck->mti_pgop_stat.gc_prof.wloops += (uint32_t)ctx->loop; #endif /* MDBX_ENABLE_PROFGC */ TRACE("<<< %zu loops, rc = %d", ctx->loop, rc); return rc; @@ -15920,6 +15992,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { (size_t)(commit_txnid - txn->mt_txnid)); } #endif + meta.unsafe_sign = MDBX_DATASIGN_NONE; meta_set_txnid(env, &meta, commit_txnid); rc = sync_locked(env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, @@ -17768,8 +17841,9 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; #endif /* MDBX_ENABLE_MADVISE */ - err = osal_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, - env->me_dbgeo.upper, lck_rc ? MMAP_OPTION_TRUNCATE : 0); + err = osal_mmap( + env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, env->me_dbgeo.upper, + (lck_rc && env->me_stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -17969,7 +18043,12 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, } const meta_ptr_t recent = meta_recent(env, &troika); - if (memcmp(&header.mm_geo, &recent.ptr_c->mm_geo, sizeof(header.mm_geo))) { + if (/* не учитываем различия в geo.next */ + header.mm_geo.grow_pv != recent.ptr_c->mm_geo.grow_pv || + header.mm_geo.shrink_pv != recent.ptr_c->mm_geo.shrink_pv || + header.mm_geo.lower != recent.ptr_c->mm_geo.lower || + header.mm_geo.upper != recent.ptr_c->mm_geo.upper || + header.mm_geo.now != recent.ptr_c->mm_geo.now) { if ((env->me_flags & MDBX_RDONLY) != 0 || /* recovery mode */ env->me_stuck_meta >= 0) { WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO @@ -18419,8 +18498,12 @@ __cold static int __must_check_result override_meta(MDBX_env *env, if (unlikely(MDBX_IS_ERROR(rc))) return MDBX_PROBLEM; - if (shape && memcmp(model, shape, sizeof(MDBX_meta)) == 0) + if (shape && memcmp(model, shape, sizeof(MDBX_meta)) == 0) { + NOTICE("skip overriding meta-%zu since no changes " + "for txnid #%" PRIaTXN, + target, txnid); return MDBX_SUCCESS; + } if (env->me_flags & MDBX_WRITEMAP) { #if MDBX_ENABLE_PGOP_STAT @@ -18474,14 +18557,16 @@ __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { MDBX_EXCLUSIVE)) return MDBX_EPERM; - const MDBX_meta *target_meta = METAPAGE(env, target); - txnid_t new_txnid = safe64_txnid_next(constmeta_txnid(target_meta)); - for (size_t n = 0; n < NUM_METAS; ++n) { + const MDBX_meta *const target_meta = METAPAGE(env, target); + txnid_t new_txnid = constmeta_txnid(target_meta); + if (new_txnid < MIN_TXNID) + new_txnid = MIN_TXNID; + for (unsigned n = 0; n < NUM_METAS; ++n) { if (n == target) continue; - MDBX_meta meta = *METAPAGE(env, target); - if (validate_meta(env, &meta, pgno2page(env, n), (pgno_t)n, nullptr) != - MDBX_SUCCESS) { + MDBX_page *const page = pgno2page(env, n); + MDBX_meta meta = *page_meta(page); + if (validate_meta(env, &meta, page, n, nullptr) != MDBX_SUCCESS) { int err = override_meta(env, n, 0, nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -19270,7 +19355,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } else { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); -#endif +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ } osal_free(env_pathname.buffer_for_free); return rc; @@ -21194,9 +21279,6 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { - MDBX_page *sub_root = nullptr; - MDBX_val xdata, *rdata, dkey, olddata; - MDBX_db nested_dupdb; int err; DKBUF_DEBUG; MDBX_env *const env = mc->mc_txn->mt_env; @@ -21204,7 +21286,6 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, DDBI(mc), DKEY_DEBUG(key), key->iov_len, DVAL_DEBUG((flags & MDBX_RESERVE) ? nullptr : data), data->iov_len); - int dupdata_flag = 0; if ((flags & MDBX_CURRENT) != 0 && (mc->mc_flags & C_SUB) == 0) { if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE))) return MDBX_EINVAL; @@ -21263,10 +21344,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, rc = MDBX_NO_ROOT; } else if ((flags & MDBX_CURRENT) == 0) { bool exact = false; + MDBX_val lastkey, olddata; if ((flags & MDBX_APPEND) && mc->mc_db->md_entries > 0) { - rc = cursor_last(mc, &dkey, &olddata); + rc = cursor_last(mc, &lastkey, &olddata); if (likely(rc == MDBX_SUCCESS)) { - const int cmp = mc->mc_dbx->md_cmp(key, &dkey); + const int cmp = mc->mc_dbx->md_cmp(key, &lastkey); if (likely(cmp > 0)) { mc->mc_ki[mc->mc_top]++; /* step forward for appending */ rc = MDBX_NOTFOUND; @@ -21331,7 +21413,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } mc->mc_flags &= ~C_DEL; - rdata = data; + MDBX_val xdata, *rdata = data; size_t mcount = 0, dcount = 0; if (unlikely(flags & MDBX_MULTIPLE)) { dcount = data[1].iov_len; @@ -21376,11 +21458,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_flags |= C_INITIALIZED; } - bool insert_key, insert_data, do_sub = false; - insert_key = insert_data = (rc != MDBX_SUCCESS); + MDBX_val dkey, olddata; + MDBX_db nested_dupdb; + MDBX_page *sub_root = nullptr; + bool insert_key, insert_data; uint16_t fp_flags = P_LEAF; MDBX_page *fp = env->me_pbuf; fp->mp_txnid = mc->mc_txn->mt_front; + insert_key = insert_data = (rc != MDBX_SUCCESS); + dkey.iov_base = nullptr; if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->mc_ki[mc->mc_top]); @@ -21555,7 +21641,6 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, /* Back up original data item */ memcpy(dkey.iov_base = fp + 1, olddata.iov_base, dkey.iov_len = olddata.iov_len); - dupdata_flag = 1; /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF | P_SUBP; @@ -21659,11 +21744,10 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } } - rdata = &xdata; - flags |= F_DUPDATA; - do_sub = true; if (!insert_key) node_del(mc, 0); + rdata = &xdata; + flags |= F_DUPDATA; goto new_sub; } @@ -21748,8 +21832,8 @@ new_sub:; * storing the user data in the keys field, so there are strict * size limits on dupdata. The actual data fields of the child * DB are all zero size. */ - if (do_sub) { - int xflags; + if (flags & F_DUPDATA) { + unsigned xflags; size_t ecount; put_sub: xdata.iov_len = 0; @@ -21770,13 +21854,11 @@ new_sub:; if (sub_root) mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; /* converted, write the original data first */ - if (dupdata_flag) { + if (dkey.iov_base) { rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); if (unlikely(rc)) goto bad_sub; - /* we've done our job */ - dkey.iov_len = 0; } if (!(node_flags(node) & F_SUBDATA) || sub_root) { /* Adjust other cursors pointing to mp */ @@ -21793,7 +21875,7 @@ new_sub:; continue; if (m2->mc_pg[i] == mp) { if (m2->mc_ki[i] == mc->mc_ki[i]) { - err = cursor_xinit2(m2, mx, dupdata_flag); + err = cursor_xinit2(m2, mx, dkey.iov_base != nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; } else if (!insert_key && m2->mc_ki[i] < nkeys) { @@ -21837,6 +21919,7 @@ new_sub:; if (mcount < dcount) { data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); insert_key = insert_data = false; + dkey.iov_base = nullptr; goto more; } } @@ -25091,6 +25174,10 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, tASSERT(txn, XCURSOR_INITED(&cx.outer) && cx.outer.mc_xcursor->mx_db.md_entries > 1); rc = MDBX_EMULTIVAL; + if ((flags & MDBX_NOOVERWRITE) == 0) { + flags -= MDBX_CURRENT; + rc = cursor_del(&cx.outer, MDBX_ALLDUPS); + } } } } @@ -33276,10 +33363,10 @@ __dll_export const struct MDBX_version_info mdbx_version = { 0, 12, - 6, - 0, - {"2023-04-29T21:30:35+03:00", "44de01dd81ac366a7d37111eaf72726edebe5528", "c019631a8c88a98a11d814e4111a2a9ae8cb4099", - "v0.12.6-0-gc019631a"}, + 7, + 20, + {"2023-10-09T22:12:06+03:00", "1f76e0a48d39074b6ca2a30b74c31b858d09cb2b", "2b0eae08f565b55d035d08cb87dea89566cf0747", + "v0.12.7-20-g2b0eae08"}, sourcery}; __dll_export @@ -35027,6 +35114,11 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, #error "FIXME" #endif /* MDBX_LOCKING */ +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + if (rc == EDEADLK && atomic_load32(&env->me_ignore_EDEADLK, mo_Relaxed) > 0) + return rc; +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ + ERROR("mutex (un)lock failed, %s", mdbx_strerror(err)); if (rc != EDEADLK) env->me_flags |= MDBX_FATAL_ERROR; diff --git a/mdbx/mdbx.h b/mdbx/mdbx.h index cc42298..575961b 100644 --- a/mdbx/mdbx.h +++ b/mdbx/mdbx.h @@ -2846,9 +2846,9 @@ LIBMDBX_INLINE_API(int, mdbx_env_get_syncperiod, * * Only a single thread may call this function. All transactions, databases, * and cursors must already be closed before calling this function. Attempts - * to use any such handles after calling this function will cause a `SIGSEGV`. - * The environment handle will be freed and must not be used again after this - * call. + * to use any such handles after calling this function is UB and would cause + * a `SIGSEGV`. The environment handle will be freed and must not be used again + * after this call. * * \param [in] env An environment handle returned by * \ref mdbx_env_create(). @@ -4031,7 +4031,7 @@ LIBMDBX_API int mdbx_txn_renew(MDBX_txn *txn); /** \brief The fours integers markers (aka "canary") associated with the * environment. * \ingroup c_crud - * \see mdbx_canary_set() + * \see mdbx_canary_put() * \see mdbx_canary_get() * * The `x`, `y` and `z` values could be set by \ref mdbx_canary_put(), while the @@ -4069,10 +4069,10 @@ LIBMDBX_API int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary); /** \brief Returns fours integers markers (aka "canary") associated with the * environment. * \ingroup c_crud - * \see mdbx_canary_set() + * \see mdbx_canary_put() * * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). - * \param [in] canary The address of an MDBX_canary structure where the + * \param [in] canary The address of an \ref MDBX_canary structure where the * information will be copied. * * \returns A non-zero error value on failure and 0 on success. */ @@ -4398,9 +4398,14 @@ LIBMDBX_API int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del); * items requires the use of \ref mdbx_cursor_get(). * * \note The memory pointed to by the returned values is owned by the - * database. The caller need not dispose of the memory, and may not - * modify it in any way. For values returned in a read-only transaction - * any modification attempts will cause a `SIGSEGV`. + * database. The caller MUST not dispose of the memory, and MUST not modify it + * in any way regardless in a read-only nor read-write transactions! + * For case a database opened without the \ref MDBX_WRITEMAP modification + * attempts likely will cause a `SIGSEGV`. However, when a database opened with + * the \ref MDBX_WRITEMAP or in case values returned inside read-write + * transaction are located on a "dirty" (modified and pending to commit) pages, + * such modification will silently accepted and likely will lead to DB and/or + * data corruption. * * \note Values returned from the database are valid only until a * subsequent update operation, or the end of the transaction. @@ -4834,6 +4839,16 @@ LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); * to which data refers. * \see mdbx_get() * + * \note The memory pointed to by the returned values is owned by the + * database. The caller MUST not dispose of the memory, and MUST not modify it + * in any way regardless in a read-only nor read-write transactions! + * For case a database opened without the \ref MDBX_WRITEMAP modification + * attempts likely will cause a `SIGSEGV`. However, when a database opened with + * the \ref MDBX_WRITEMAP or in case values returned inside read-write + * transaction are located on a "dirty" (modified and pending to commit) pages, + * such modification will silently accepted and likely will lead to DB and/or + * data corruption. + * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). * \param [in,out] key The key for a retrieved item. * \param [in,out] data The data of a retrieved item. @@ -4860,6 +4875,16 @@ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, * array to which `pairs` refers. * \see mdbx_cursor_get() * + * \note The memory pointed to by the returned values is owned by the + * database. The caller MUST not dispose of the memory, and MUST not modify it + * in any way regardless in a read-only nor read-write transactions! + * For case a database opened without the \ref MDBX_WRITEMAP modification + * attempts likely will cause a `SIGSEGV`. However, when a database opened with + * the \ref MDBX_WRITEMAP or in case values returned inside read-write + * transaction are located on a "dirty" (modified and pending to commit) pages, + * such modification will silently accepted and likely will lead to DB and/or + * data corruption. + * * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). * \param [out] count The number of key and value item returned, on success * it always be the even because the key-value diff --git a/mdbxdist/mdbx.c b/mdbxdist/mdbx.c index e973925..634fcc2 100644 --- a/mdbxdist/mdbx.c +++ b/mdbxdist/mdbx.c @@ -12,7 +12,7 @@ * . */ #define xMDBX_ALLOY 1 -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3734,6 +3734,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ @@ -4875,6 +4876,9 @@ atomic_store64(MDBX_atomic_uint64_t *p, const uint64_t value, enum MDBX_memory_order order) { STATIC_ASSERT(sizeof(MDBX_atomic_uint64_t) == 8); #if MDBX_64BIT_ATOMIC +#if __GNUC_PREREQ(11, 0) + STATIC_ASSERT(__alignof__(MDBX_atomic_uint64_t) >= sizeof(uint64_t)); +#endif /* GNU C >= 11 */ #ifdef MDBX_HAVE_C11ATOMICS assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p))); atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value, mo_c11_store(order)); @@ -7667,18 +7671,15 @@ const char *mdbx_dump_val(const MDBX_val *key, char *const buf, char *const detent = buf + bufsize - 2; char *ptr = buf; *ptr++ = '<'; - for (size_t i = 0; i < key->iov_len; i++) { - const ptrdiff_t left = detent - ptr; - assert(left > 0); - int len = snprintf(ptr, left, "%02x", data[i]); - if (len < 0 || len >= left) - break; - ptr += len; - } - if (ptr < detent) { - ptr[0] = '>'; - ptr[1] = '\0'; + for (size_t i = 0; i < key->iov_len && ptr < detent; i++) { + const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + *ptr++ = hex[data[i] >> 4]; + *ptr++ = hex[data[i] & 15]; } + if (ptr < detent) + *ptr++ = '>'; + *ptr = '\0'; } return buf; } @@ -10505,27 +10506,47 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) { #define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target))) #endif /* MDBX_ATTRIBUTE_TARGET */ -#if defined(__SSE2__) +#ifndef MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +/* Workaround for GCC's bug with `-m32 -march=i686 -Ofast` + * gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1: + * error: inlining failed in call to 'always_inline' '_mm_movemask_ps': + * target specific option mismatch */ +#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || \ + defined(__e2k__) || defined(__clang__) || defined(__amd64__) || \ + defined(__SSE2__) +#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0 +#else +#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1 +#endif +#endif /* MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND */ + +#if defined(__SSE2__) && defined(__SSE__) #define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ #elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__) #define __SSE2__ #define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) -#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse2") +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2") #endif /* __SSE2__ */ #if defined(__AVX2__) #define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */ -#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) -#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("avx2") +#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND +#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2") #endif /* __AVX2__ */ +#if defined(MDBX_ATTRIBUTE_TARGET_AVX2) #if defined(__AVX512BW__) #define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */ #elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \ + !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \ (__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0)) -#define MDBX_ATTRIBUTE_TARGET_AVX512BW MDBX_ATTRIBUTE_TARGET("avx512bw") +#define MDBX_ATTRIBUTE_TARGET_AVX512BW \ + MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw") #endif /* __AVX512BW__ */ +#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */ #ifdef MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned @@ -10599,6 +10620,15 @@ diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, return _mm256_movemask_ps(*(const __m256 *)&cmp); } +MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned +diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, + const __m128i pattern) { + const __m128i f = _mm_loadu_si128((const __m128i *)ptr); + const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset)); + const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern); + return _mm_movemask_ps(*(const __m128 *)&cmp); +} + MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t * scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { assert(seq > 0 && len > seq); @@ -10644,7 +10674,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { } #endif /* __SANITIZE_ADDRESS__ */ if (range - 3 > detent) { - mask = diffcmp2mask_sse2(range - 3, offset, *(const __m128i *)&pattern); + mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); if (mask) return range + 28 - __builtin_clz(mask); range -= 4; @@ -10718,7 +10748,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { range -= 8; } if (range - 3 > detent) { - mask = diffcmp2mask_sse2(range - 3, offset, *(const __m128i *)&pattern); + mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); if (mask) return range + 28 - __builtin_clz(mask); range -= 4; @@ -11248,7 +11278,7 @@ static __inline pgr_t page_alloc_finalize(MDBX_env *const env, #if MDBX_ENABLE_PROFGC size_t majflt_after; prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before; - prof->majflt += majflt_after - majflt_before; + prof->majflt += (uint32_t)(majflt_after - majflt_before); #endif /* MDBX_ENABLE_PROFGC */ return ret; } @@ -12122,13 +12152,9 @@ retry:; } const bool inside_txn = (env->me_txn0->mt_owner == osal_thread_self()); - meta_ptr_t head; - if (inside_txn | locked) - head = meta_recent(env, &env->me_txn0->tw.troika); - else { - const meta_troika_t troika = meta_tap(env); - head = meta_recent(env, &troika); - } + const meta_troika_t troika = + (inside_txn | locked) ? env->me_txn0->tw.troika : meta_tap(env); + const meta_ptr_t head = meta_recent(env, &troika); const uint64_t unsynced_pages = atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed); if (unsynced_pages == 0) { @@ -12141,10 +12167,19 @@ retry:; if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) && unlikely(head.ptr_c->mm_geo.next > bytes2pgno(env, env->me_dxb_mmap.current))) { - rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now, - head.ptr_c->mm_geo.upper, implicit_grow); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + + if (unlikely(env->me_stuck_meta >= 0) && + troika.recent != (uint8_t)env->me_stuck_meta) { + NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " + "meta-page (%u)", + "sync datafile", env->me_stuck_meta, troika.recent); + rc = MDBX_RESULT_TRUE; + } else { + rc = dxb_resize(env, head.ptr_c->mm_geo.next, head.ptr_c->mm_geo.now, + head.ptr_c->mm_geo.upper, implicit_grow); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } } const size_t autosync_threshold = @@ -12223,6 +12258,14 @@ retry:; eASSERT(env, inside_txn || locked); eASSERT(env, !inside_txn || (flags & MDBX_SHRINK_ALLOWED) == 0); + if (!head.is_steady && unlikely(env->me_stuck_meta >= 0) && + troika.recent != (uint8_t)env->me_stuck_meta) { + NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent " + "meta-page (%u)", + "sync datafile", env->me_stuck_meta, troika.recent); + rc = MDBX_RESULT_TRUE; + goto bailout; + } if (!head.is_steady || ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) { DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, data_page(head.ptr_c)->mp_pgno, durable_caption(head.ptr_c), @@ -13122,7 +13165,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); -#endif +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_owner = tid; return MDBX_SUCCESS; } @@ -13190,7 +13233,7 @@ int mdbx_txn_renew(MDBX_txn *txn) { rc = txn_renew(txn, MDBX_TXN_RDONLY); if (rc == MDBX_SUCCESS) { - txn->mt_owner = osal_thread_self(); + tASSERT(txn, txn->mt_owner == osal_thread_self()); DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', @@ -13804,8 +13847,10 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + atomic_add32(&env->me_ignore_EDEADLK, 1); txn_valgrind(env, nullptr); -#endif + atomic_sub32(&env->me_ignore_EDEADLK, 1); +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, @@ -13834,7 +13879,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) if (txn == env->me_txn0) txn_valgrind(env, nullptr); -#endif +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; txn->mt_owner = 0; @@ -14249,6 +14294,14 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { } static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { +#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается ниже внутри update_gc() в + * цикле очистки и цикле заполнения зарезервированных элементов. */ + memset(pnl.iov_base, 0xBB, pnl.iov_len); +#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ + /* PNL is initially empty, zero out at least the length */ memset(pnl.iov_base, 0, sizeof(pgno_t)); if ((env->me_flags & (MDBX_WRITEMAP | MDBX_NOMEMINIT)) == 0) @@ -14564,6 +14617,15 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { if (unlikely(rc != MDBX_SUCCESS)) goto bailout; +#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. + */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ + if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) ? left - chunk @@ -14601,6 +14663,16 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; + +#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) + /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() + * вызванное через макрос DVAL_DEBUG() на выходе + * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле + * очистки, так и ниже в цикле заполнения зарезервированных элементов. + */ + memset(data.iov_base, 0xBB, data.iov_len); +#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ + /* Retry if tw.retired_pages[] grew during the Put() */ } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); @@ -15085,7 +15157,7 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { MDBX_PNL_SETSIZE(txn->tw.relist, 0); #if MDBX_ENABLE_PROFGC - env->me_lck->mti_pgop_stat.gc_prof.wloops += ctx->loop; + env->me_lck->mti_pgop_stat.gc_prof.wloops += (uint32_t)ctx->loop; #endif /* MDBX_ENABLE_PROFGC */ TRACE("<<< %zu loops, rc = %d", ctx->loop, rc); return rc; @@ -15920,6 +15992,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { (size_t)(commit_txnid - txn->mt_txnid)); } #endif + meta.unsafe_sign = MDBX_DATASIGN_NONE; meta_set_txnid(env, &meta, commit_txnid); rc = sync_locked(env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, @@ -17768,8 +17841,9 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE; #endif /* MDBX_ENABLE_MADVISE */ - err = osal_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, - env->me_dbgeo.upper, lck_rc ? MMAP_OPTION_TRUNCATE : 0); + err = osal_mmap( + env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, env->me_dbgeo.upper, + (lck_rc && env->me_stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -17969,7 +18043,12 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, } const meta_ptr_t recent = meta_recent(env, &troika); - if (memcmp(&header.mm_geo, &recent.ptr_c->mm_geo, sizeof(header.mm_geo))) { + if (/* не учитываем различия в geo.next */ + header.mm_geo.grow_pv != recent.ptr_c->mm_geo.grow_pv || + header.mm_geo.shrink_pv != recent.ptr_c->mm_geo.shrink_pv || + header.mm_geo.lower != recent.ptr_c->mm_geo.lower || + header.mm_geo.upper != recent.ptr_c->mm_geo.upper || + header.mm_geo.now != recent.ptr_c->mm_geo.now) { if ((env->me_flags & MDBX_RDONLY) != 0 || /* recovery mode */ env->me_stuck_meta >= 0) { WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO @@ -18419,8 +18498,12 @@ __cold static int __must_check_result override_meta(MDBX_env *env, if (unlikely(MDBX_IS_ERROR(rc))) return MDBX_PROBLEM; - if (shape && memcmp(model, shape, sizeof(MDBX_meta)) == 0) + if (shape && memcmp(model, shape, sizeof(MDBX_meta)) == 0) { + NOTICE("skip overriding meta-%zu since no changes " + "for txnid #%" PRIaTXN, + target, txnid); return MDBX_SUCCESS; + } if (env->me_flags & MDBX_WRITEMAP) { #if MDBX_ENABLE_PGOP_STAT @@ -18474,14 +18557,16 @@ __cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) { MDBX_EXCLUSIVE)) return MDBX_EPERM; - const MDBX_meta *target_meta = METAPAGE(env, target); - txnid_t new_txnid = safe64_txnid_next(constmeta_txnid(target_meta)); - for (size_t n = 0; n < NUM_METAS; ++n) { + const MDBX_meta *const target_meta = METAPAGE(env, target); + txnid_t new_txnid = constmeta_txnid(target_meta); + if (new_txnid < MIN_TXNID) + new_txnid = MIN_TXNID; + for (unsigned n = 0; n < NUM_METAS; ++n) { if (n == target) continue; - MDBX_meta meta = *METAPAGE(env, target); - if (validate_meta(env, &meta, pgno2page(env, n), (pgno_t)n, nullptr) != - MDBX_SUCCESS) { + MDBX_page *const page = pgno2page(env, n); + MDBX_meta meta = *page_meta(page); + if (validate_meta(env, &meta, page, n, nullptr) != MDBX_SUCCESS) { int err = override_meta(env, n, 0, nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -19270,7 +19355,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } else { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); -#endif +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ } osal_free(env_pathname.buffer_for_free); return rc; @@ -21194,9 +21279,6 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { - MDBX_page *sub_root = nullptr; - MDBX_val xdata, *rdata, dkey, olddata; - MDBX_db nested_dupdb; int err; DKBUF_DEBUG; MDBX_env *const env = mc->mc_txn->mt_env; @@ -21204,7 +21286,6 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, DDBI(mc), DKEY_DEBUG(key), key->iov_len, DVAL_DEBUG((flags & MDBX_RESERVE) ? nullptr : data), data->iov_len); - int dupdata_flag = 0; if ((flags & MDBX_CURRENT) != 0 && (mc->mc_flags & C_SUB) == 0) { if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE))) return MDBX_EINVAL; @@ -21263,10 +21344,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, rc = MDBX_NO_ROOT; } else if ((flags & MDBX_CURRENT) == 0) { bool exact = false; + MDBX_val lastkey, olddata; if ((flags & MDBX_APPEND) && mc->mc_db->md_entries > 0) { - rc = cursor_last(mc, &dkey, &olddata); + rc = cursor_last(mc, &lastkey, &olddata); if (likely(rc == MDBX_SUCCESS)) { - const int cmp = mc->mc_dbx->md_cmp(key, &dkey); + const int cmp = mc->mc_dbx->md_cmp(key, &lastkey); if (likely(cmp > 0)) { mc->mc_ki[mc->mc_top]++; /* step forward for appending */ rc = MDBX_NOTFOUND; @@ -21331,7 +21413,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } mc->mc_flags &= ~C_DEL; - rdata = data; + MDBX_val xdata, *rdata = data; size_t mcount = 0, dcount = 0; if (unlikely(flags & MDBX_MULTIPLE)) { dcount = data[1].iov_len; @@ -21376,11 +21458,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_flags |= C_INITIALIZED; } - bool insert_key, insert_data, do_sub = false; - insert_key = insert_data = (rc != MDBX_SUCCESS); + MDBX_val dkey, olddata; + MDBX_db nested_dupdb; + MDBX_page *sub_root = nullptr; + bool insert_key, insert_data; uint16_t fp_flags = P_LEAF; MDBX_page *fp = env->me_pbuf; fp->mp_txnid = mc->mc_txn->mt_front; + insert_key = insert_data = (rc != MDBX_SUCCESS); + dkey.iov_base = nullptr; if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->mc_ki[mc->mc_top]); @@ -21555,7 +21641,6 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, /* Back up original data item */ memcpy(dkey.iov_base = fp + 1, olddata.iov_base, dkey.iov_len = olddata.iov_len); - dupdata_flag = 1; /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF | P_SUBP; @@ -21659,11 +21744,10 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } } - rdata = &xdata; - flags |= F_DUPDATA; - do_sub = true; if (!insert_key) node_del(mc, 0); + rdata = &xdata; + flags |= F_DUPDATA; goto new_sub; } @@ -21748,8 +21832,8 @@ new_sub:; * storing the user data in the keys field, so there are strict * size limits on dupdata. The actual data fields of the child * DB are all zero size. */ - if (do_sub) { - int xflags; + if (flags & F_DUPDATA) { + unsigned xflags; size_t ecount; put_sub: xdata.iov_len = 0; @@ -21770,13 +21854,11 @@ new_sub:; if (sub_root) mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; /* converted, write the original data first */ - if (dupdata_flag) { + if (dkey.iov_base) { rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); if (unlikely(rc)) goto bad_sub; - /* we've done our job */ - dkey.iov_len = 0; } if (!(node_flags(node) & F_SUBDATA) || sub_root) { /* Adjust other cursors pointing to mp */ @@ -21793,7 +21875,7 @@ new_sub:; continue; if (m2->mc_pg[i] == mp) { if (m2->mc_ki[i] == mc->mc_ki[i]) { - err = cursor_xinit2(m2, mx, dupdata_flag); + err = cursor_xinit2(m2, mx, dkey.iov_base != nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; } else if (!insert_key && m2->mc_ki[i] < nkeys) { @@ -21837,6 +21919,7 @@ new_sub:; if (mcount < dcount) { data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); insert_key = insert_data = false; + dkey.iov_base = nullptr; goto more; } } @@ -25091,6 +25174,10 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, tASSERT(txn, XCURSOR_INITED(&cx.outer) && cx.outer.mc_xcursor->mx_db.md_entries > 1); rc = MDBX_EMULTIVAL; + if ((flags & MDBX_NOOVERWRITE) == 0) { + flags -= MDBX_CURRENT; + rc = cursor_del(&cx.outer, MDBX_ALLDUPS); + } } } } @@ -33276,10 +33363,10 @@ __dll_export const struct MDBX_version_info mdbx_version = { 0, 12, - 6, - 0, - {"2023-04-29T21:30:35+03:00", "44de01dd81ac366a7d37111eaf72726edebe5528", "c019631a8c88a98a11d814e4111a2a9ae8cb4099", - "v0.12.6-0-gc019631a"}, + 7, + 20, + {"2023-10-09T22:12:06+03:00", "1f76e0a48d39074b6ca2a30b74c31b858d09cb2b", "2b0eae08f565b55d035d08cb87dea89566cf0747", + "v0.12.7-20-g2b0eae08"}, sourcery}; __dll_export @@ -35027,6 +35114,11 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, #error "FIXME" #endif /* MDBX_LOCKING */ +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + if (rc == EDEADLK && atomic_load32(&env->me_ignore_EDEADLK, mo_Relaxed) > 0) + return rc; +#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ + ERROR("mutex (un)lock failed, %s", mdbx_strerror(err)); if (rc != EDEADLK) env->me_flags |= MDBX_FATAL_ERROR; diff --git a/mdbxdist/mdbx_chk.c b/mdbxdist/mdbx_chk.c index 74bde38..45bbb71 100644 --- a/mdbxdist/mdbx_chk.c +++ b/mdbxdist/mdbx_chk.c @@ -34,7 +34,7 @@ * top-level directory of the distribution or, alternatively, at * . */ -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3756,6 +3756,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/mdbxdist/mdbx_copy.c b/mdbxdist/mdbx_copy.c index b9bf2d9..fdfc2b8 100644 --- a/mdbxdist/mdbx_copy.c +++ b/mdbxdist/mdbx_copy.c @@ -34,7 +34,7 @@ * top-level directory of the distribution or, alternatively, at * . */ -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3756,6 +3756,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/mdbxdist/mdbx_drop.c b/mdbxdist/mdbx_drop.c index 3f23262..d742c85 100644 --- a/mdbxdist/mdbx_drop.c +++ b/mdbxdist/mdbx_drop.c @@ -36,7 +36,7 @@ * top-level directory of the distribution or, alternatively, at * . */ -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3758,6 +3758,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/mdbxdist/mdbx_dump.c b/mdbxdist/mdbx_dump.c index 5cc90c8..fbf3101 100644 --- a/mdbxdist/mdbx_dump.c +++ b/mdbxdist/mdbx_dump.c @@ -34,7 +34,7 @@ * top-level directory of the distribution or, alternatively, at * . */ -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3756,6 +3756,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/mdbxdist/mdbx_load.c b/mdbxdist/mdbx_load.c index 0cca771..98b89ac 100644 --- a/mdbxdist/mdbx_load.c +++ b/mdbxdist/mdbx_load.c @@ -34,7 +34,7 @@ * top-level directory of the distribution or, alternatively, at * . */ -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3756,6 +3756,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/mdbxdist/mdbx_stat.c b/mdbxdist/mdbx_stat.c index 0aabb57..0ddaca9 100644 --- a/mdbxdist/mdbx_stat.c +++ b/mdbxdist/mdbx_stat.c @@ -34,7 +34,7 @@ * top-level directory of the distribution or, alternatively, at * . */ -#define MDBX_BUILD_SOURCERY a0e7c54f688eecaf45ddd7493b737f88a97e4e8b0fdaa55c9d3b00d69e0c8548_v0_12_6_0_gc019631a +#define MDBX_BUILD_SOURCERY 748ccee885a921bfe8ef7b24e71957dd3922fe37083ceb8048cb89c28c5d8f9b_v0_12_7_20_g2b0eae08 #ifdef MDBX_CONFIG_H #include MDBX_CONFIG_H #endif @@ -3756,6 +3756,7 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */