Skip to content

Commit

Permalink
writeback cache fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tinebp committed Jul 31, 2024
1 parent fc50b66 commit 3fe8f96
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 76 deletions.
60 changes: 34 additions & 26 deletions hw/rtl/cache/VX_bank_flush.sv
Original file line number Diff line number Diff line change
Expand Up @@ -27,41 +27,49 @@ module VX_bank_flush #(
) (
input wire clk,
input wire reset,
input wire flush_in_valid,
output wire flush_in_ready,
output wire flush_out_init,
output wire flush_out_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
output wire [NUM_WAYS-1:0] flush_out_way,
input wire flush_out_ready,
input wire flush_begin,
output wire flush_end,
output wire flush_init,
output wire flush_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
output wire [NUM_WAYS-1:0] flush_way,
input wire flush_ready,
input wire mshr_empty
);
// ways interation is only needed when eviction is enabled
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);

localparam STATE_IDLE = 2'd0;
localparam STATE_INIT = 2'd1;
localparam STATE_FLUSH = 2'd2;
localparam STATE_DONE = 2'd3;
localparam STATE_IDLE = 0;
localparam STATE_INIT = 1;
localparam STATE_WAIT = 2;
localparam STATE_FLUSH = 3;
localparam STATE_DONE = 4;

reg [2:0] state_r, state_n;

reg [CTR_WIDTH-1:0] counter_r;
reg [1:0] state_r, state_n;

always @(*) begin
state_n = state_r;
case (state_r)
STATE_IDLE: begin
if (flush_in_valid && mshr_empty) begin
state_n = STATE_FLUSH;
if (flush_begin) begin
state_n = STATE_WAIT;
end
end
STATE_INIT: begin
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
state_n = STATE_IDLE;
end
end
STATE_WAIT: begin
// wait for pending requests to complete
if (mshr_empty) begin
state_n = STATE_FLUSH;
end
end
STATE_FLUSH: begin
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_out_ready) begin
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
state_n = STATE_DONE;
end
end
Expand All @@ -79,7 +87,8 @@ module VX_bank_flush #(
end else begin
state_r <= state_n;
if (state_r != STATE_IDLE) begin
if ((state_r == STATE_INIT) || flush_out_ready) begin
if ((state_r == STATE_INIT)
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
counter_r <= counter_r + CTR_WIDTH'(1);
end
end else begin
Expand All @@ -88,21 +97,20 @@ module VX_bank_flush #(
end
end

assign flush_in_ready = (state_r == STATE_DONE);

assign flush_out_init = (state_r == STATE_INIT);
assign flush_out_valid = (state_r == STATE_FLUSH);
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
assign flush_end = (state_r == STATE_DONE);
assign flush_init = (state_r == STATE_INIT);
assign flush_valid = (state_r == STATE_FLUSH);
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];

if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
reg [NUM_WAYS-1:0] flush_out_way_r;
reg [NUM_WAYS-1:0] flush_way_r;
always @(*) begin
flush_out_way_r = '0;
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
flush_way_r = '0;
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
end
assign flush_out_way = flush_out_way_r;
assign flush_way = flush_way_r;
end else begin
assign flush_out_way = {NUM_WAYS{1'b1}};
assign flush_way = {NUM_WAYS{1'b1}};
end

endmodule
16 changes: 9 additions & 7 deletions hw/rtl/cache/VX_cache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.TAG_WIDTH (TAG_WIDTH)
) core_bus2_if[NUM_REQS]();

wire [NUM_BANKS-1:0] per_bank_flush_valid;
wire [NUM_BANKS-1:0] per_bank_flush_ready;
wire [NUM_BANKS-1:0] per_bank_flush_begin;
wire [NUM_BANKS-1:0] per_bank_flush_end;

wire [NUM_BANKS-1:0] per_bank_core_req_fire;

Expand All @@ -127,8 +127,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.core_bus_in_if (core_bus_if),
.core_bus_out_if (core_bus2_if),
.bank_req_fire (per_bank_core_req_fire),
.flush_valid (per_bank_flush_valid),
.flush_ready (per_bank_flush_ready)
.flush_begin (per_bank_flush_begin),
.flush_end (per_bank_flush_end)
);

///////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -324,6 +324,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.NUM_OUTPUTS (NUM_BANKS),
.DATAW (CORE_REQ_DATAW),
.PERF_CTR_BITS (`PERF_CTR_BITS),
.ARBITER ("F"),
.OUT_BUF (REQ_XBAR_BUF)
) req_xbar (
.clk (clk),
Expand Down Expand Up @@ -432,8 +433,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),

.flush_valid (per_bank_flush_valid[bank_id]),
.flush_ready (per_bank_flush_ready[bank_id])
.flush_begin (per_bank_flush_begin[bank_id]),
.flush_end (per_bank_flush_end[bank_id])
);

if (NUM_BANKS == 1) begin
Expand All @@ -457,7 +458,8 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_stream_xbar #(
.NUM_INPUTS (NUM_BANKS),
.NUM_OUTPUTS (NUM_REQS),
.DATAW (CORE_RSP_DATAW)
.DATAW (CORE_RSP_DATAW),
.ARBITER ("F")
) rsp_xbar (
.clk (clk),
.reset (rsp_xbar_reset),
Expand Down
66 changes: 33 additions & 33 deletions hw/rtl/cache/VX_cache_bank.sv
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ module VX_cache_bank #(
output wire mem_rsp_ready,

// flush
input wire flush_valid,
output wire flush_ready
input wire flush_begin,
output wire flush_end
);

localparam PIPELINE_STAGES = 2;
Expand Down Expand Up @@ -162,11 +162,11 @@ module VX_cache_bank #(
wire mshr_pending_st0, mshr_pending_st1;
wire mshr_empty;

wire line_flush_valid;
wire line_flush_init;
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
wire [NUM_WAYS-1:0] line_flush_way;
wire line_flush_ready;
wire flush_valid;
wire init_valid;
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
wire [NUM_WAYS-1:0] flush_way;
wire flush_ready;

// flush unit
VX_bank_flush #(
Expand All @@ -176,16 +176,16 @@ module VX_cache_bank #(
.NUM_WAYS (NUM_WAYS),
.WRITEBACK (WRITEBACK)
) flush_unit (
.clk (clk),
.reset (reset),
.flush_in_valid (flush_valid),
.flush_in_ready (flush_ready),
.flush_out_init (line_flush_init),
.flush_out_valid (line_flush_valid),
.flush_out_line (line_flush_sel),
.flush_out_way (line_flush_way),
.flush_out_ready (line_flush_ready),
.mshr_empty (mshr_empty)
.clk (clk),
.reset (reset),
.flush_begin (flush_begin),
.flush_end (flush_end),
.flush_init (init_valid),
.flush_valid (flush_valid),
.flush_line (flush_sel),
.flush_way (flush_way),
.flush_ready (flush_ready),
.mshr_empty (mshr_empty)
);

wire rdw_hazard1_sel;
Expand All @@ -198,16 +198,16 @@ module VX_cache_bank #(
// mshr replay has highest priority to maximize utilization since there is no miss.
// handle memory responses next to prevent deadlock with potential memory request from a miss.
// flush has precedence over core requests to ensure that the cache is in a consistent state.
wire replay_grant = ~line_flush_init;
wire replay_grant = ~init_valid;
wire replay_enable = replay_grant && replay_valid;

wire fill_grant = ~line_flush_init && ~replay_enable;
wire fill_grant = ~init_valid && ~replay_enable;
wire fill_enable = fill_grant && mem_rsp_valid;

wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
wire flush_enable = flush_grant && line_flush_valid;
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
wire flush_enable = flush_grant && flush_valid;

wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
wire creq_enable = creq_grant && core_req_valid;

assign replay_ready = replay_grant
Expand All @@ -219,31 +219,31 @@ module VX_cache_bank #(
&& ~rdw_hazard2_sel
&& ~pipe_stall;

assign line_flush_ready = flush_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~pipe_stall;
assign flush_ready = flush_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~pipe_stall;

assign core_req_ready = creq_grant
&& ~mreq_queue_alm_full
&& ~mshr_alm_full
&& ~pipe_stall;

wire init_fire = line_flush_init;
wire init_fire = init_valid;
wire replay_fire = replay_valid && replay_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire line_flush_fire = line_flush_valid && line_flush_ready;
wire flush_fire = flush_valid && flush_ready;
wire core_req_fire = core_req_valid && core_req_ready;

assign valid_sel = init_fire || replay_fire || mem_rsp_fire || line_flush_fire || core_req_fire;
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
assign creq_flush_sel = core_req_valid && core_req_flush;

assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));

if (WRITE_ENABLE) begin
Expand All @@ -270,7 +270,7 @@ module VX_cache_bank #(
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
);

Expand Down Expand Up @@ -663,8 +663,8 @@ module VX_cache_bank #(

`ifdef DBG_TRACE_CACHE
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_fire);
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
always @(posedge clk) begin
if (input_stall || pipe_stall) begin
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
Expand Down
2 changes: 1 addition & 1 deletion hw/rtl/cache/VX_cache_data.sv
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ module VX_cache_data #(
end

// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM read.
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.

wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] line_wren;
Expand Down
24 changes: 15 additions & 9 deletions hw/rtl/cache/VX_cache_flush.sv
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@ module VX_cache_flush #(
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
input wire [NUM_BANKS-1:0] bank_req_fire,
output wire [NUM_BANKS-1:0] flush_valid,
input wire [NUM_BANKS-1:0] flush_ready
output wire [NUM_BANKS-1:0] flush_begin,
input wire [NUM_BANKS-1:0] flush_end
);
localparam STATE_IDLE = 0;
localparam STATE_WAIT = 1;
localparam STATE_WAIT1 = 1;
localparam STATE_FLUSH = 2;
localparam STATE_DONE = 3;
localparam STATE_WAIT2 = 3;
localparam STATE_DONE = 4;

reg [2:0] state, state_n;

// track in-flight core requests

Expand Down Expand Up @@ -76,7 +79,6 @@ module VX_cache_flush #(
`UNUSED_VAR (bank_req_fire)
end

reg [1:0] state, state_n;
reg [NUM_BANKS-1:0] flush_done, flush_done_n;

wire [NUM_REQS-1:0] flush_req_mask;
Expand Down Expand Up @@ -112,17 +114,21 @@ module VX_cache_flush #(
case (state)
STATE_IDLE: begin
if (flush_req_enable) begin
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
end
end
STATE_WAIT: begin
STATE_WAIT1: begin
if (no_inflight_reqs) begin
state_n = STATE_FLUSH;
end
end
STATE_FLUSH: begin
// generate a flush request pulse
state_n = STATE_WAIT2;
end
STATE_WAIT2: begin
// wait for all banks to finish flushing
flush_done_n = flush_done | flush_ready;
flush_done_n = flush_done | flush_end;
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
state_n = STATE_DONE;
flush_done_n = '0;
Expand Down Expand Up @@ -154,6 +160,6 @@ module VX_cache_flush #(
end
end

assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};

endmodule

0 comments on commit 3fe8f96

Please sign in to comment.