Skip to content

Commit

Permalink
local memory area optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
tinebp committed Aug 3, 2024
1 parent 52c5f1f commit 4c1b3fd
Showing 1 changed file with 34 additions and 19 deletions.
53 changes: 34 additions & 19 deletions hw/rtl/mem/VX_local_mem.sv
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
wire [NUM_BANKS-1:0] per_bank_req_ready;

wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos;

wire [NUM_REQS-1:0] req_valid_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
Expand All @@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
req_bank_addr[i],
mem_bus_if[i].req_data.byteen,
mem_bus_if[i].req_data.data,
mem_bus_if[i].req_data.tag};
mem_bus_if[i].req_data.tag
};
assign mem_bus_if[i].req_ready = req_ready_in[i];
end

Expand All @@ -135,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.sel_in (req_bank_idx),
.ready_in (req_ready_in),
.valid_out (per_bank_req_valid),
.data_out (per_bank_req_data_all),
.data_out (per_bank_req_data_aos),
.sel_out (per_bank_req_idx),
.ready_out (per_bank_req_ready)
);
Expand All @@ -146,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
per_bank_req_addr[i],
per_bank_req_byteen[i],
per_bank_req_data[i],
per_bank_req_tag[i]} = per_bank_req_data_all[i];
per_bank_req_tag[i]
} = per_bank_req_data_aos[i];
end

// banks access
Expand All @@ -160,37 +162,50 @@ module VX_local_mem import VX_gpu_pkg::*; #(
`RESET_RELAY (bank_reset, reset);

for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire bank_req_valid, bank_req_ready;
wire bank_rsp_valid, bank_rsp_ready;
wire [WORD_WIDTH-1:0] bank_rsp_data;

VX_sp_ram #(
.DATAW (WORD_WIDTH),
.SIZE (WORDS_PER_BANK),
.WRENW (WORD_SIZE)
.WRENW (WORD_SIZE),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.reset (1'b0),
.read (1'b1),
.reset (reset),
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
.wren (per_bank_req_byteen[i]),
.addr (per_bank_req_addr[i]),
.wdata (per_bank_req_data[i]),
.rdata (bank_rsp_data)
);

// drop write response
assign bank_req_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i];
assign per_bank_req_ready[i] = bank_req_ready || per_bank_req_rw[i];
// read-during-write hazard detection
reg [BANK_ADDR_WIDTH-1:0] last_wr_addr;
reg last_wr_valid;
always @(posedge clk) begin
if (reset) begin
last_wr_valid <= 0;
end else begin
last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i];
end
last_wr_addr <= per_bank_req_addr[i];
end
wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr);

// drop write response and stall on read-during-write hazard
assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard;
assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard;

// register BRAM output
VX_elastic_buffer #(
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH),
.SIZE (1)
VX_pipe_buffer #(
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH)
) bank_buf (
.clk (clk),
.reset (bank_reset),
.valid_in (bank_req_valid),
.ready_in (bank_req_ready),
.valid_in (bank_rsp_valid),
.ready_in (bank_rsp_ready),
.data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}),
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}),
.valid_out (per_bank_rsp_valid[i]),
Expand All @@ -200,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(

// bank responses gather

wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data2;
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos;

for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_rsp_data2[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
end

wire [NUM_REQS-1:0] rsp_valid_out;
Expand All @@ -222,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
`UNUSED_PIN (collisions),
.sel_in (per_bank_rsp_idx),
.valid_in (per_bank_rsp_valid),
.data_in (per_bank_rsp_data2),
.data_in (per_bank_rsp_data_aos),
.ready_in (per_bank_rsp_ready),
.valid_out (rsp_valid_out),
.data_out (rsp_data_out),
Expand Down

0 comments on commit 4c1b3fd

Please sign in to comment.