Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GC] Add option G1BarrierSimple to use simple g1 post barrier #829

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,29 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm

void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register start, Register count, Register scratch, RegSet saved_regs) {
if (G1BarrierSimple) {

Label L_loop, L_done;
const Register end = count;

__ cbz(count, L_done); // zero count - nothing to do

__ lea(end, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop
__ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
__ lsr(start, start, CardTable::card_shift);
__ lsr(end, end, CardTable::card_shift);
__ sub(count, end, start); // number of bytes to copy

__ load_byte_map_base(scratch);
__ add(start, start, scratch);
__ bind(L_loop);
__ strb(zr, Address(start, count));
__ subs(count, count, 1);
__ br(Assembler::GE, L_loop);
__ bind(L_done);
return;
}

__ push(saved_regs, sp);
assert_different_registers(start, count, scratch);
assert_different_registers(c_rarg0, count);
Expand Down Expand Up @@ -209,6 +232,15 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Label done;
Label runtime;

if (G1BarrierSimple) {
const Register card_addr = tmp;
__ lsr(card_addr, store_addr, CardTable::card_shift);
__ load_byte_map_base(tmp2);
__ add(card_addr, card_addr, tmp2);
__ strb(zr, Address(card_addr));
__ bind(done);
return;
}
// Does store cross heap regions?

__ eor(tmp, store_addr, new_val);
Expand Down Expand Up @@ -449,6 +481,13 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
__ load_parameter(0, card_offset);
__ lsr(card_offset, card_offset, CardTable::card_shift);
__ load_byte_map_base(byte_map_base);

if (G1BarrierSimple) {
__ strb(zr, Address(byte_map_base, card_offset));
__ bind(done);
__ epilogue();
return;
}
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);
Expand Down
48 changes: 48 additions & 0 deletions src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@

#define __ masm->

#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)

void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count) {
bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
Expand Down Expand Up @@ -97,6 +99,35 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm

void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
if (G1BarrierSimple) {
CardTableBarrierSet* ct =
barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
intptr_t disp = (intptr_t) ct->card_table()->byte_map_base();

Label L_loop, L_done;
const Register end = count;
assert_different_registers(addr, end);

__ testl(count, count);
__ jcc(Assembler::zero, L_done); // zero count - nothing to do

__ leaq(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size
__ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
__ shrptr(addr, CardTable::card_shift);
__ shrptr(end, CardTable::card_shift);
__ subptr(end, addr); // end --> cards count

__ mov64(tmp, disp);
__ addptr(addr, tmp);
__ bind(L_loop);
__ movb(Address(addr, count, Address::times_1), 0);
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_loop);
__ bind(L_done);

return;
}

__ pusha(); // push registers (overkill)
#ifdef _LP64
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
Expand Down Expand Up @@ -302,6 +333,12 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
__ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
__ addptr(card_addr, cardtable);

if (G1BarrierSimple) {
__ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
__ bind(done);
return;
}

__ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

Expand Down Expand Up @@ -551,6 +588,17 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*

NOT_LP64(__ get_thread(thread);)

if (G1BarrierSimple) {
__ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
__ bind(done);
__ pop(rcx);
__ pop(rax);

__ epilogue();

return;
}

__ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);

Expand Down
40 changes: 40 additions & 0 deletions src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,13 @@ void G1BarrierSetC2::post_barrier(GraphKit* kit,
// Combine card table base and card offset
Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset );

if (G1BarrierSimple) {
__ store(__ ctrl(), card_adr, dirty_card, T_BYTE, Compile::AliasIdxRaw, MemNode::unordered);
// Final sync IdealKit and GraphKit.
kit->final_sync(ideal);
return;
}

// If we know the value being stored does it cross regions?

if (val != NULL) {
Expand Down Expand Up @@ -658,6 +665,39 @@ bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const {

void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");

mmyxym marked this conversation as resolved.
Show resolved Hide resolved
if (G1BarrierSimple) {
Node* this_region = node->in(0);
// Search "if (marking != 0)" check and set it to "false".
// There is no G1 pre barrier if previous stored value is NULL
// (for example, after initialization).
if (this_region->is_Region() && this_region->req() == 3) {
int ind = 1;
if (!this_region->in(ind)->is_IfFalse()) {
ind = 2;
}
if (this_region->in(ind)->is_IfFalse() &&
this_region->in(ind)->in(0)->Opcode() == Op_If) {
Node* bol = this_region->in(ind)->in(0)->in(1);
assert(bol->is_Bool(), "");
Node* cmpx = bol->in(1);
if (bol->as_Bool()->_test._test == BoolTest::ne &&
cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
cmpx->in(1)->is_Load()) {
Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
}
}
}
}
CardTableBarrierSetC2::eliminate_gc_barrier(macro, node);
return;
}

assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
// It could be only one user, URShift node, in Object.clone() intrinsic
// but the new allocation is passed to arraycopy stub and it could not
Expand Down
11 changes: 10 additions & 1 deletion src/hotspot/share/gc/g1/g1Arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,16 @@ void G1Arguments::initialize() {
vm_exit_during_initialization("The flag -XX:+UseG1GC can not be combined with -XX:ParallelGCThreads=0", NULL);
}

if (FLAG_IS_DEFAULT(G1ConcRefinementThreads)) {
if (G1BarrierSimple) {
#if !defined(_LP64) || !(defined(X86) || defined(AARCH64))
warning("G1BarrierSimple is not supported with current platform"
"; ignoring G1BarrierSimple flag.");
FLAG_SET_DEFAULT(G1BarrierSimple, false);
#else
FLAG_SET_DEFAULT(G1ConcRefinementThreads, 0);
FLAG_SET_DEFAULT(G1ConcRSLogCacheSize, 0);
#endif
} else if (FLAG_IS_DEFAULT(G1ConcRefinementThreads)) {
FLAG_SET_ERGO(uint, G1ConcRefinementThreads, ParallelGCThreads);
}

Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/share/gc/g1/g1BarrierSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ void G1BarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_u
}

void G1BarrierSet::write_ref_field_post_slow(volatile jbyte* byte) {
assert(!G1BarrierSimple, "sanity");
// In the slow path, we know a card is not young
assert(*byte != G1CardTable::g1_young_card_val(), "slow path invoked without filtering");
OrderAccess::storeload();
Expand All @@ -120,6 +121,10 @@ void G1BarrierSet::invalidate(MemRegion mr) {
volatile jbyte* byte = _card_table->byte_for(mr.start());
jbyte* last_byte = _card_table->byte_for(mr.last());
Thread* thr = Thread::current();
mmyxym marked this conversation as resolved.
Show resolved Hide resolved
if (G1BarrierSimple) {
memset((void*)byte, G1CardTable::dirty_card_val(), last_byte - byte + 1);
return;
}
// skip all consecutive young cards
for (; byte <= last_byte && *byte == G1CardTable::g1_young_card_val(); byte++);

Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/gc/g1/g1BarrierSet.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ inline void G1BarrierSet::write_ref_field_pre(T* field) {
template <DecoratorSet decorators, typename T>
inline void G1BarrierSet::write_ref_field_post(T* field, oop new_val) {
volatile jbyte* byte = _card_table->byte_for(field);
if (G1BarrierSimple) {
*byte = G1CardTable::dirty_card_val();
return;
}
if (*byte != G1CardTable::g1_young_card_val()) {
// Take a slow path for cards in old
write_ref_field_post_slow(byte);
Expand Down
9 changes: 8 additions & 1 deletion src/hotspot/share/gc/g1/g1CardTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ bool G1CardTable::mark_card_deferred(size_t card_index) {
}

void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
assert(!G1BarrierSimple, "Should not be called with G1BarrierSimple");

jbyte *const first = byte_for(mr.start());
jbyte *const last = byte_after(mr.last());

Expand All @@ -61,7 +63,9 @@ void G1CardTable::g1_mark_as_young(const MemRegion& mr) {

#ifndef PRODUCT
void G1CardTable::verify_g1_young_region(MemRegion mr) {
verify_region(mr, g1_young_gen, true);
if (!G1BarrierSimple) {
verify_region(mr, g1_young_gen, true);
}
}
#endif

Expand Down Expand Up @@ -97,6 +101,9 @@ void G1CardTable::initialize(G1RegionToSpaceMapper* mapper) {
}

bool G1CardTable::is_in_young(oop obj) const {
if (G1BarrierSimple) {
return G1CollectedHeap::heap()->heap_region_containing(obj)->is_young();
}
volatile jbyte* p = byte_for(obj);
return *p == G1CardTable::g1_young_card_val();
}
24 changes: 19 additions & 5 deletions src/hotspot/share/gc/g1/g1CollectedHeap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2181,6 +2181,12 @@ void G1CollectedHeap::heap_region_par_iterate_from_start(HeapRegionClosure* cl,
_hrm.par_iterate(cl, hrclaimer, 0);
}

void G1CollectedHeap::heap_region_par_iterate_chunk_based(HeapRegionClosure* cl,
HeapRegionChunkClaimer* chunk_claimer,
uint worker_id) const {
_hrm.par_iterate(cl, chunk_claimer, chunk_claimer->offset_for_worker(worker_id));
}

void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
_collection_set.iterate(cl);
}
Expand Down Expand Up @@ -2671,7 +2677,9 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
if (g1h->is_in_closed_subset(ct->addr_for(card_ptr))) {
if (*card_ptr != G1CardTable::dirty_card_val()) {
*card_ptr = G1CardTable::dirty_card_val();
_dcq.enqueue(card_ptr);
if (!G1BarrierSimple) {
_dcq.enqueue(card_ptr);
}
mmyxym marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Expand Down Expand Up @@ -3188,6 +3196,7 @@ class G1ParTask : public AbstractGangTask {
G1RootProcessor* _root_processor;
TaskTerminator _terminator;
uint _n_workers;
HeapRegionChunkClaimer _chunk_claimer;

public:
G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadStateSet* per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
Expand All @@ -3197,7 +3206,8 @@ class G1ParTask : public AbstractGangTask {
_queues(task_queues),
_root_processor(root_processor),
_terminator(n_workers, _queues),
_n_workers(n_workers)
_n_workers(n_workers),
_chunk_claimer(n_workers)
{}

void work(uint worker_id) {
Expand All @@ -3223,7 +3233,7 @@ class G1ParTask : public AbstractGangTask {
// treating the nmethods visited to act as roots for concurrent marking.
// We only want to make sure that the oops in the nmethods are adjusted with regard to the
// objects copied by the current evacuation.
_g1h->g1_rem_set()->oops_into_collection_set_do(pss, worker_id);
_g1h->g1_rem_set()->oops_into_collection_set_do(pss, worker_id, &_chunk_claimer);

double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;

Expand Down Expand Up @@ -3718,8 +3728,12 @@ void G1CollectedHeap::redirty_logged_cards() {
dirty_card_queue_set().reset_for_par_iteration();
workers()->run_task(&redirty_task);

DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
dcq.merge_bufferlists(&dirty_card_queue_set());
if (G1BarrierSimple) {
dirty_card_queue_set().clear();
} else {
DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
dcq.merge_bufferlists(&dirty_card_queue_set());
}
assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");

g1_policy()->phase_times()->record_redirty_logged_cards_time_ms((os::elapsedTime() - redirty_logged_cards_start) * 1000.0);
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/share/gc/g1/g1CollectedHeap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ class G1CollectedHeap : public CollectedHeap {

// Other related classes.
friend class HeapRegionClaimer;
friend class HeapRegionChunkClaimer;

// Testing classes.
friend class G1CheckCSetFastTableClosure;
Expand Down Expand Up @@ -1167,6 +1168,10 @@ class G1CollectedHeap : public CollectedHeap {
void heap_region_par_iterate_from_start(HeapRegionClosure* cl,
HeapRegionClaimer* hrclaimer) const;

void heap_region_par_iterate_chunk_based(HeapRegionClosure* cl,
HeapRegionChunkClaimer* chunk_claimer,
uint worker_id) const;

// Iterate over the regions (if any) in the current collection set.
void collection_set_iterate(HeapRegionClosure* blk);

Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ inline void
G1CollectedHeap::dirty_young_block(HeapWord* start, size_t word_size) {
assert_heap_not_locked();

if (G1BarrierSimple) {
// Not necessary to mark card young
return;
}

// Assign the containing region to containing_hr so that we don't
// have to keep calling heap_region_containing() in the
// asserts below.
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ void G1ConcurrentRefineThread::deactivate() {
}

void G1ConcurrentRefineThread::run_service() {
assert(!G1BarrierSimple, "Concurrent Refinement should be disabled");
_vtime_start = os::elapsedVTime();

while (!should_terminate()) {
Expand Down
Loading
Loading