Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collapse write barrier function for HV and SHV #1501

Open
wants to merge 9 commits into
base: static_h
Choose a base branch
from
536 changes: 294 additions & 242 deletions include/hermes/VM/AlignedHeapSegment.h

Large diffs are not rendered by default.

212 changes: 164 additions & 48 deletions include/hermes/VM/CardTableNC.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ namespace hermes {
namespace vm {

/// The card table optimizes young gen collections by restricting the amount of
/// heap belonging to the old gen that must be scanned. The card table expects
/// to be constructed inside an AlignedHeapSegment's storage, at some position
/// before the allocation region, and covers the extent of that storage's
/// memory.
/// heap belonging to the old gen that must be scanned. The card table expects
/// to be constructed at the beginning of a segment's storage, and covers the
/// extent of that storage's memory. There are two cases:
/// 1. For FixedSizeHeapSegment, the inline CardStatus array and Boundary array
/// in the card table is large enough.
/// 2. For JumboHeapSegment, the two arrays are allocated separately.
/// In either case, the pointers to the CardStatus array and Boundary array are
/// stored in \c cards and \c boundaries field of SHSegmentInfo.
///
/// Also supports the following query: Given a card in the heap that intersects
/// with the used portion of its segment, find its "crossing object" -- the
Expand Down Expand Up @@ -58,16 +62,19 @@ class CardTable {
const char *address_{nullptr};
};

enum class CardStatus : char { Clean = 0, Dirty = 1 };

/// The size (and base-two log of the size) of cards used in the card table.
static constexpr size_t kLogCardSize = 9; // ==> 512-byte cards.
static constexpr size_t kCardSize = 1 << kLogCardSize; // ==> 512-byte cards.
static constexpr size_t kSegmentSize = 1 << HERMESVM_LOG_HEAP_SEGMENT_SIZE;

/// The number of valid indices into the card table.
static constexpr size_t kValidIndices = kSegmentSize >> kLogCardSize;
/// Maximum size of segment that can have inline cards and boundaries array.
static constexpr size_t kSegmentUnitSize = 1
<< HERMESVM_LOG_HEAP_SEGMENT_SIZE;

/// The size of the card table.
static constexpr size_t kCardTableSize = kValidIndices;
/// The size of the maximum inline card table. CardStatus array and boundary
/// array for larger segment has larger size and is stored separately.
static constexpr size_t kInlineCardTableSize =
kSegmentUnitSize >> kLogCardSize;

/// For convenience, this is a conversion factor to determine how many bytes
/// in the heap correspond to a single byte in the card table. This is
Expand All @@ -77,29 +84,58 @@ class CardTable {
/// guaranteed by a static_assert below.
static constexpr size_t kHeapBytesPerCardByte = kCardSize;

/// A prefix of every segment is occupied by auxilary data
/// structures. The card table is the first such data structure.
/// The card table maps to the segment. Only the suffix of the card
/// table that maps to the suffix of entire segment that is used for
/// allocation is ever used; the prefix that maps to the card table
/// itself is not used. (Nor is the portion that of the card table
/// that maps to the other auxiliary data structure, the mark bit
/// array, but we don't attempt to calculate that here.)
/// It is useful to know the size of this unused region of
/// the card table, so it can be used for other purposes.
/// Note that the total size of the card table is 2 times
/// kCardTableSize, since the CardTable contains two byte arrays of
/// that size (cards_ and _boundaries_).
static constexpr size_t kFirstUsedIndex =
(2 * kCardTableSize) >> kLogCardSize;

CardTable() = default;
/// A prefix of every segment is occupied by auxiliary data structures. The
/// card table is the first such data structure. The card table maps to the
/// segment. Only the suffix of the card table that maps to the suffix of
/// entire segment that is used for allocation is ever used; the prefix that
/// maps to the card table itself is not used, nor is the portion of the card
/// table that maps to the other auxiliary data structure: the mark bit array
/// and guard pages. This small space can be used for other purpose, such as
/// storing the SHSegmentInfo (we assert in AlignedHeapSegment that its
/// size won't exceed this unused space). The actual first used index should
/// take into account all these structures. Here we only calculate for
/// CardTable and size of SHSegmentInfo. It's only used as starting index for
/// clearing/dirtying range of bits.
/// Note that the total size of the card table is 2 times kCardTableSize,
/// since the CardTable contains two byte arrays of that size (cards_ and
/// boundaries_). And this index must be larger than the size of SHSegmentInfo
/// to avoid corrupting it when clearing/dirtying bits.
static constexpr size_t kFirstUsedIndex = std::max(
sizeof(SHSegmentInfo),
(2 * kInlineCardTableSize) >> kLogCardSize);

CardTable(size_t segmentSize) {
assert(
segmentSize && segmentSize % kSegmentUnitSize == 0 &&
"segmentSize must be a multiple of kSegmentUnitSize");

segmentInfo_.shiftedSegmentSize =
segmentSize >> HERMESVM_LOG_HEAP_SEGMENT_SIZE;
if (segmentSize == kSegmentUnitSize) {
// Just use the inline storage.
setCards(inlineCardStatusArray_);
setBoundaries(inlineBoundaryArray_);
} else {
size_t cardTableSize = segmentSize >> kLogCardSize;
// CardStatus is clean by default, so must zero-initialize it.
setCards(new AtomicIfConcurrentGC<CardStatus>[cardTableSize] {});
setBoundaries(new int8_t[cardTableSize]);
}
}
/// CardTable is not copyable or movable: It must be constructed in-place.
CardTable(const CardTable &) = delete;
CardTable(CardTable &&) = delete;
CardTable &operator=(const CardTable &) = delete;
CardTable &operator=(CardTable &&) = delete;

~CardTable() {
// If CardStatus/Boundary array is allocated separately, free them.
if (cards() != inlineCardStatusArray_) {
delete[] cards();
delete[] boundaries();
}
}

/// Returns the card table index corresponding to a byte at the given address.
/// \pre \p addr must be within the bounds of the segment owning this card
/// table or at most 1 card after it, that is to say
Expand All @@ -112,18 +148,25 @@ class CardTable {
/// of how this is used.
inline size_t addressToIndex(const void *addr) const LLVM_NO_SANITIZE("null");

/// Returns the address corresponding to the given card table
/// index.
/// Returns the address corresponding to the given card table index.
///
/// \pre \p index is bounded:
///
/// 0 <= index <= kValidIndices
/// 0 <= index <= getEndIndex()
inline const char *indexToAddress(size_t index) const;

/// Make the card table entry for the given address dirty.
/// \pre \p addr is required to be an address covered by the card table.
/// This only works for memory in normal objects (i.e., do not support large
/// allocation) and is more efficient than the version for large objects.
inline void dirtyCardForAddress(const void *addr);

/// Make the card table entry for the given address dirty.
/// \pre \p addr is required to be an address covered by the card table.
/// This reads the cards array pointer from SHSegmentInfo, so works for normal
/// and large objects.
inline void dirtyCardForAddressInLargeObj(const void *addr);

/// Make the card table entries for cards that intersect the given address
/// range dirty. The range is a closed interval [low, high].
/// \pre \p low and \p high are required to be addresses covered by the card
Expand All @@ -132,18 +175,27 @@ class CardTable {

/// Returns whether the card table entry for the given address is dirty.
/// \pre \p addr is required to be an address covered by the card table.
/// This only works for memory in normal objects (i.e., do not support large
/// allocation) and is more efficient than the version for large objects.
inline bool isCardForAddressDirty(const void *addr) const;

/// Returns whether the card table entry for the given index is dirty.
/// \pre \p index is required to be a valid card table index.
/// This only works for memory in normal objects (i.e., do not support large
/// allocation) and is more efficient than the version for large objects.
inline bool isCardForIndexDirty(const size_t index) const;

/// Version of isCardForAddressDirty()/isCardForIndexDirty that works for
/// normal and large objects.
inline bool isCardForAddressDirtyInLargeObj(const void *addr) const;
inline bool isCardForIndexDirtyInLargeObj(const size_t index) const;

/// If there is a dirty card at or after \p fromIndex, at an index less than
/// \p endIndex, returns the index of the dirty card, else returns none.
inline OptValue<size_t> findNextDirtyCard(size_t fromIndex, size_t endIndex)
const;

/// If there is a card card at or after \p fromIndex, at an index less than
/// If there is a card at or after \p fromIndex, at an index less than
/// \p endIndex, returns the index of the clean card, else returns none.
inline OptValue<size_t> findNextCleanCard(size_t fromIndex, size_t endIndex)
const;
Expand Down Expand Up @@ -184,12 +236,24 @@ class CardTable {
/// is the first object.)
GCCell *firstObjForCard(unsigned index) const;

/// Get the segment size from SHSegmentInfo. This is only used in debug code
/// or when clearing the entire card table.
size_t getSegmentSize() const {
return (size_t)segmentInfo_.shiftedSegmentSize
<< HERMESVM_LOG_HEAP_SEGMENT_SIZE;
}

/// The end index of the card table (all valid indices should be smaller).
size_t getEndIndex() const {
return getSegmentSize() >> kLogCardSize;
}

#ifdef HERMES_EXTRA_DEBUG
/// Temporary debugging hack: yield the numeric value of the boundaries_ array
/// for the given \p index.
/// TODO(T48709128): remove this when the problem is diagnosed.
int8_t cardObjectTableValue(unsigned index) const {
return boundaries_[index];
return boundaries()[index];
}

/// These methods protect and unprotect, respectively, the memory
Expand All @@ -215,12 +279,28 @@ class CardTable {

private:
#ifndef NDEBUG
/// Returns the pointer to the end of the storage containing \p ptr
/// (exclusive).
static void *storageEnd(const void *ptr);
/// Returns the pointer to the end of the storage starting at \p lowLim.
void *storageEnd(const void *lowLim) const {
return reinterpret_cast<char *>(
reinterpret_cast<uintptr_t>(lowLim) + getSegmentSize());
}
#endif

enum class CardStatus : char { Clean = 0, Dirty = 1 };
void setCards(AtomicIfConcurrentGC<CardStatus> *cards) {
segmentInfo_.cards = cards;
}

AtomicIfConcurrentGC<CardStatus> *cards() const {
return static_cast<AtomicIfConcurrentGC<CardStatus> *>(segmentInfo_.cards);
}

void setBoundaries(int8_t *boundaries) {
segmentInfo_.boundaries = boundaries;
}

int8_t *boundaries() const {
return segmentInfo_.boundaries;
}

/// \return The lowest address whose card can be dirtied in this array. i.e.
/// The smallest address such that
Expand Down Expand Up @@ -255,14 +335,27 @@ class CardTable {

void cleanOrDirtyRange(size_t from, size_t to, CardStatus cleanOrDirty);

/// This needs to be atomic so that the background thread in Hades can safely
/// dirty cards when compacting.
std::array<AtomicIfConcurrentGC<CardStatus>, kCardTableSize> cards_{};
union {
/// The bytes occupied by segmentInfo_ are guaranteed to be not override by
/// writes to cards_ array. See static assertions in AlignedHeapSegment.
/// Pointers to the underlying CardStatus array and boundary array are
/// stored in it. Note that we could also store the boundary array in a
/// union along with inlineBoundaryArray_, since that array has unused
/// prefix bytes as well. It will save 8 bytes here. But it makes the size
/// check more complex as we need to ensure that the segment size is large
/// enough so that inlineBoundaryArray_ has enough unused prefix bytes to
/// store the pointer.
SHSegmentInfo segmentInfo_;
/// This needs to be atomic so that the background thread in Hades can
/// safely dirty cards when compacting.
AtomicIfConcurrentGC<CardStatus>
inlineCardStatusArray_[kInlineCardTableSize]{};
};

/// See the comment at kHeapBytesPerCardByte above to see why this is
/// necessary.
static_assert(
sizeof(cards_[0]) == 1,
sizeof(inlineCardStatusArray_[0]) == 1,
"Validate assumption that card table entries are one byte");

/// Each card has a corresponding signed byte in the boundaries_ table. A
Expand All @@ -275,7 +368,7 @@ class CardTable {
/// time: If we allocate a large object that crosses many cards, the first
/// crossed cards gets a non-negative value, and each subsequent one uses the
/// maximum exponent that stays within the card range for the object.
int8_t boundaries_[kCardTableSize];
int8_t inlineBoundaryArray_[kInlineCardTableSize];
};

/// Implementations of inlines.
Expand Down Expand Up @@ -305,7 +398,7 @@ inline size_t CardTable::addressToIndex(const void *addr) const {
}

inline const char *CardTable::indexToAddress(size_t index) const {
assert(index <= kValidIndices && "index must be within the index range");
assert(index <= getEndIndex() && "index must be within the index range");
const char *res = base() + (index << kLogCardSize);
assert(
base() <= res && res <= storageEnd(base()) &&
Expand All @@ -314,7 +407,16 @@ inline const char *CardTable::indexToAddress(size_t index) const {
}

inline void CardTable::dirtyCardForAddress(const void *addr) {
cards_[addressToIndex(addr)].store(
// Make sure that this is not called on a large segment.
assert(
inlineCardStatusArray_ == cards() &&
"CardStatus array for this CardTable is allocated separately");
inlineCardStatusArray_[addressToIndex(addr)].store(
CardStatus::Dirty, std::memory_order_relaxed);
}

inline void CardTable::dirtyCardForAddressInLargeObj(const void *addr) {
cards()[addressToIndex(addr)].store(
CardStatus::Dirty, std::memory_order_relaxed);
}

Expand All @@ -323,8 +425,22 @@ inline bool CardTable::isCardForAddressDirty(const void *addr) const {
}

inline bool CardTable::isCardForIndexDirty(size_t index) const {
assert(index < kValidIndices && "index is required to be in range.");
return cards_[index].load(std::memory_order_relaxed) == CardStatus::Dirty;
assert(index < getEndIndex() && "index is required to be in range.");
// Make sure that this is not called on a large segment.
assert(
inlineCardStatusArray_ == cards() &&
"CardStatus array for this CardTable is allocated separately");
return inlineCardStatusArray_[index].load(std::memory_order_relaxed) ==
CardStatus::Dirty;
}

inline bool CardTable::isCardForAddressDirtyInLargeObj(const void *addr) const {
return isCardForIndexDirtyInLargeObj(addressToIndex(addr));
}

inline bool CardTable::isCardForIndexDirtyInLargeObj(size_t index) const {
assert(index < getEndIndex() && "index is required to be in range.");
return cards()[index].load(std::memory_order_relaxed) == CardStatus::Dirty;
}

inline OptValue<size_t> CardTable::findNextDirtyCard(
Expand All @@ -348,9 +464,9 @@ inline CardTable::Boundary CardTable::nextBoundary(const char *level) const {
}

inline const char *CardTable::base() const {
// As we know the card table is laid out inline before the allocation region
// of its aligned heap segment, we can use its own this pointer as the base
// address.
// As we know the card table is laid out inline at the beginning of the
// segment storage, which is before the allocation region, we can use its own
// this pointer as the base address.
return reinterpret_cast<const char *>(this);
}

Expand Down
28 changes: 12 additions & 16 deletions include/hermes/VM/GCBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ enum XorPtrKeyID {
/// Return the maximum amount of bytes holdable by this heap.
/// gcheapsize_t max() const;
/// Return the total amount of bytes of storage this GC will require.
/// This will be a multiple of AlignedHeapSegment::storageSize().
/// This will be a multiple of FixedSizeHeapSegment::storageSize().
/// gcheapsize_t storageFootprint() const;
///
class GCBase {
Expand Down Expand Up @@ -1152,29 +1152,25 @@ class GCBase {

#ifdef HERMESVM_GC_RUNTIME
/// Default implementations for read and write barriers: do nothing.
void writeBarrier(const GCHermesValue *loc, HermesValue value);
void writeBarrier(const GCSmallHermesValue *loc, SmallHermesValue value);
template <typename HVType>
void writeBarrier(const GCHermesValueBase<HVType> *loc, HVType value);
void writeBarrier(const GCPointerBase *loc, const GCCell *value);
void constructorWriteBarrier(const GCHermesValue *loc, HermesValue value);
template <typename HVType>
void constructorWriteBarrier(
const GCSmallHermesValue *loc,
SmallHermesValue value);
const GCHermesValueBase<HVType> *loc,
HVType value);
void constructorWriteBarrier(const GCPointerBase *loc, const GCCell *value);
void writeBarrierRange(const GCHermesValue *start, uint32_t numHVs);
void writeBarrierRange(const GCSmallHermesValue *start, uint32_t numHVs);
void constructorWriteBarrierRange(
const GCHermesValue *start,
uint32_t numHVs);
template <typename HVType>
void constructorWriteBarrierRange(
const GCSmallHermesValue *start,
const GCHermesValueBase<HVType> *start,
uint32_t numHVs);
void snapshotWriteBarrier(const GCHermesValue *loc);
void snapshotWriteBarrier(const GCSmallHermesValue *loc);
template <typename HVType>
void snapshotWriteBarrier(const GCHermesValueBase<HVType> *loc);
void snapshotWriteBarrier(const GCPointerBase *loc);
void snapshotWriteBarrier(const GCSymbolID *symbol);
void snapshotWriteBarrierRange(const GCHermesValue *start, uint32_t numHVs);
template <typename HVType>
void snapshotWriteBarrierRange(
const GCSmallHermesValue *start,
const GCHermesValueBase<HVType> *start,
uint32_t numHVs);
void weakRefReadBarrier(HermesValue value);
void weakRefReadBarrier(GCCell *value);
Expand Down
Loading
Loading