Skip to content

Commit

Permalink
SimX multiports support fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tinebp committed Dec 3, 2024
1 parent 24ca4f0 commit 30b0daf
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 180 deletions.
68 changes: 32 additions & 36 deletions sim/simx/cache_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,68 +21,64 @@ class CacheCluster : public SimObject<CacheCluster> {
public:
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<SimPort<MemReq>> MemReqPorts;
std::vector<SimPort<MemRsp>> MemRspPorts;

CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_inputs,
uint32_t num_caches,
uint32_t num_requests,
uint32_t num_units,
const CacheSim::Config& cache_config)
: SimObject(ctx, name)
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(num_requests, this))
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(num_requests, this))
, MemReqPort(this)
, MemRspPort(this)
, caches_(MAX(num_caches, 0x1)) {
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(cache_config.num_inputs, this))
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(cache_config.num_inputs, this))
, MemReqPorts(cache_config.mem_ports, this)
, MemRspPorts(cache_config.mem_ports, this)
, caches_(MAX(num_units, 0x1)) {

CacheSim::Config cache_config2(cache_config);
if (0 == num_caches) {
num_caches = 1;
if (0 == num_units) {
num_units = 1;
cache_config2.bypass = true;
}

char sname[100];

std::vector<MemArbiter::Ptr> input_arbs(num_inputs);
for (uint32_t j = 0; j < num_inputs; ++j) {
snprintf(sname, 100, "%s-input-arb%d", name, j);
input_arbs.at(j) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
for (uint32_t i = 0; i < num_requests; ++i) {
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(j)->ReqIn.at(i));
input_arbs.at(j)->RspIn.at(i).bind(&this->CoreRspPorts.at(j).at(i));
}
}

std::vector<MemArbiter::Ptr> mem_arbs(cache_config.num_inputs);
// Arbitrate incoming core interfaces
std::vector<MemArbiter::Ptr> input_arbs(cache_config.num_inputs);
for (uint32_t i = 0; i < cache_config.num_inputs; ++i) {
snprintf(sname, 100, "%s-mem-arb%d", name, i);
mem_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
snprintf(sname, 100, "%s-input-arb%d", name, i);
input_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_inputs, num_units);
for (uint32_t j = 0; j < num_inputs; ++j) {
input_arbs.at(j)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(j));
mem_arbs.at(i)->RspIn.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(i)->ReqIn.at(j));
input_arbs.at(i)->RspIn.at(j).bind(&this->CoreRspPorts.at(j).at(i));
}
}

snprintf(sname, 100, "%s-cache-arb", name);
auto cache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
// Arbitrate outgoing memory interfaces
std::vector<MemArbiter::Ptr> mem_arbs(cache_config.mem_ports);
for (uint32_t i = 0; i < cache_config.mem_ports; ++i) {
snprintf(sname, 100, "%s-mem-arb%d", name, i);
mem_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_units, 1);
mem_arbs.at(i)->ReqOut.at(0).bind(&this->MemReqPorts.at(i));
this->MemRspPorts.at(i).bind(&mem_arbs.at(i)->RspOut.at(0));
}

for (uint32_t i = 0; i < num_caches; ++i) {
// Connect caches
for (uint32_t i = 0; i < num_units; ++i) {
snprintf(sname, 100, "%s-cache%d", name, i);
caches_.at(i) = CacheSim::Create(sname, cache_config2);

for (uint32_t j = 0; j < cache_config.num_inputs; ++j) {
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
input_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
caches_.at(i)->CoreRspPorts.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
}

caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
for (uint32_t j = 0; j < cache_config.mem_ports; ++j) {
caches_.at(i)->MemReqPorts.at(j).bind(&mem_arbs.at(j)->ReqIn.at(i));
mem_arbs.at(j)->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(j));
}
}

cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
}

~CacheCluster() {}
Expand Down
111 changes: 37 additions & 74 deletions sim/simx/cache_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <vector>
#include <list>
#include <queue>
#include <string.h>

using namespace vortex;

Expand Down Expand Up @@ -306,7 +305,7 @@ class CacheSim::Impl {
params_t params_;
std::vector<bank_t> banks_;
MemArbiter::Ptr bank_arb_;
MemArbiter::Ptr bypass_arb_;
std::vector<MemArbiter::Ptr> nc_arbs_;
std::vector<SimPort<MemReq>> mem_req_ports_;
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
std::vector<bank_req_t> pipeline_reqs_;
Expand All @@ -322,88 +321,51 @@ class CacheSim::Impl {
, config_(config)
, params_(config)
, banks_((1 << config.B), {config, params_})
, nc_arbs_(config.mem_ports)
, mem_req_ports_((1 << config.B), simobject)
, mem_rsp_ports_((1 << config.B), simobject)
, pipeline_reqs_((1 << config.B), config.ports_per_bank)
{
char sname[100];
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());

if (config_.bypass) {
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
auto bypass_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, config_.num_inputs, config_.mem_ports);
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
simobject->CoreReqPorts.at(i).bind(&bypass_arb_->ReqIn.at(i));
bypass_arb_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
simobject->CoreReqPorts.at(i).bind(&bypass_arb->ReqIn.at(i));
bypass_arb->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
}
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
bypass_arb->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i));
simobject->MemRspPorts.at(i).bind(&bypass_arb->RspOut.at(i));
}
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
return;
}

if (strcmp(simobject->name().c_str(), "l3cache")) {
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, 2);
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));

if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_arb_->ReqIn.at(i));
bank_arb_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
bank_arb_->ReqOut.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&bank_arb_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
} else {
// TODO: Change this into a crossbar
uint32_t max = MAX(2, config_.num_inputs);
//printf("%s connecting\n", simobject_->name().c_str());
//3
if (config.B != 0) {
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, max, max);
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
bypass_arb_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_arb_->RspOut.at(i));
}
} else {
bypass_arb_ = MemArbiter::Create(sname, ArbiterType::Priority, 2);
bypass_arb_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_arb_->RspOut.at(0));
}
// create non-cacheable arbiter
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
snprintf(sname, 100, "%s-nc-arb%d", simobject->name().c_str(), i);
nc_arbs_.at(i) = MemArbiter::Create(sname, ArbiterType::Priority, 2, 1);
}

if (config.B != 0)
{
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_arb_ = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
{
//1
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
mem_req_ports_.at(i).bind(&bank_arb_->ReqIn.at(i));
bank_arb_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
//2
if (config_.num_inputs > 1) {
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
bank_arb_->ReqOut.at(i % (1 << config.B)).bind(&bypass_arb_->ReqIn.at(i));
bypass_arb_->RspIn.at(i).bind(&bank_arb_->RspOut.at(i % (1 << config.B)));
}
} else {
bank_arb_->ReqOut.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&bank_arb_->RspOut.at(0));
}
}
else
{
mem_req_ports_.at(0).bind(&bypass_arb_->ReqIn.at(0));
bypass_arb_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
// Connect non-cacheable arbiter output to outgoing memory ports
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
nc_arbs_.at(i)->ReqOut.at(0).bind(&simobject->MemReqPorts.at(i));
simobject->MemRspPorts.at(i).bind(&nc_arbs_.at(i)->RspOut.at(0));
}

// Create bank's memory arbiter
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
auto bank_mem_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B), config_.mem_ports);
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_mem_arb->ReqIn.at(i));
bank_mem_arb->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}

// Connect bank's memory arbiter to non-cacheable arbiter's input 0
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
bank_mem_arb->ReqOut.at(i).bind(&nc_arbs_.at(i)->ReqIn.at(0));
nc_arbs_.at(i)->RspIn.at(0).bind(&bank_mem_arb->RspOut.at(i));
}

// calculate cache initialization cycles
Expand Down Expand Up @@ -434,8 +396,8 @@ class CacheSim::Impl {
}

// handle cache bypasss responses
{
auto& bypass_port = bypass_arb_->RspIn.at(1);
for (uint32_t i = 0, n = config_.mem_ports; i < n; ++i) {
auto& bypass_port = nc_arbs_.at(i)->RspIn.at(1);
if (!bypass_port.empty()) {
auto& mem_rsp = bypass_port.front();
this->processBypassResponse(mem_rsp);
Expand Down Expand Up @@ -568,7 +530,8 @@ class CacheSim::Impl {
{
MemReq mem_req(core_req);
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
bypass_arb_->ReqIn.at(1).push(mem_req, 1);
uint32_t mem_port = req_id % config_.mem_ports;
nc_arbs_.at(mem_port)->ReqIn.at(1).push(mem_req, 1);
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
}

Expand Down
42 changes: 15 additions & 27 deletions sim/simx/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Cluster::Cluster(const SimContext& ctx,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "cluster")
, mem_req_port(this)
, mem_rsp_port(this)
, mem_req_ports(L2_MEM_PORTS, this)
, mem_rsp_ports(L2_MEM_PORTS, this)
, cluster_id_(cluster_id)
, processor_(processor)
, sockets_(NUM_SOCKETS)
Expand All @@ -35,26 +35,9 @@ Cluster::Cluster(const SimContext& ctx,

// create sockets

snprintf(sname, 100, "cluster%d-icache-arb", cluster_id);
auto icache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);

snprintf(sname, 100, "cluster%d-dcache-arb", cluster_id);
auto dcache_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);

for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
auto socket = Socket::Create(socket_id,
this,
arch,
dcrs);

socket->icache_mem_req_port.bind(&icache_arb->ReqIn.at(i));
icache_arb->RspIn.at(i).bind(&socket->icache_mem_rsp_port);

socket->dcache_mem_req_port.bind(&dcache_arb->ReqIn.at(i));
dcache_arb->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);

sockets_.at(i) = socket;
sockets_.at(i) = Socket::Create(socket_id, this, arch, dcrs);
}

// Create l2cache
Expand All @@ -77,14 +60,19 @@ Cluster::Cluster(const SimContext& ctx,
2, // pipeline latency
});

l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));

icache_arb->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_arb->RspOut.at(0));
// connect l2cache core interfaces
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
for (uint32_t j = 0; j < L1_MEM_PORTS; ++j) {
sockets_.at(i)->mem_req_ports.at(j).bind(&l2cache_->CoreReqPorts.at(i * L1_MEM_PORTS + j));
l2cache_->CoreRspPorts.at(i * L1_MEM_PORTS + j).bind(&sockets_.at(i)->mem_rsp_ports.at(j));
}
}

dcache_arb->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
l2cache_->CoreRspPorts.at(1).bind(&dcache_arb->RspOut.at(0));
// connect l2cache memory interfaces
for (uint32_t i = 0; i < L2_MEM_PORTS; ++i) {
l2cache_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
this->mem_rsp_ports.at(i).bind(&l2cache_->MemRspPorts.at(i));
}
}

Cluster::~Cluster() {
Expand Down
20 changes: 10 additions & 10 deletions sim/simx/cluster.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down Expand Up @@ -32,13 +32,13 @@ class Cluster : public SimObject<Cluster> {
CacheSim::PerfStats l2cache;
};

SimPort<MemReq> mem_req_port;
SimPort<MemRsp> mem_rsp_port;
std::vector<SimPort<MemReq>> mem_req_ports;
std::vector<SimPort<MemRsp>> mem_rsp_ports;

Cluster(const SimContext& ctx,
Cluster(const SimContext& ctx,
uint32_t cluster_id,
ProcessorImpl* processor,
const Arch &arch,
ProcessorImpl* processor,
const Arch &arch,
const DCRS &dcrs);

~Cluster();
Expand All @@ -63,16 +63,16 @@ class Cluster : public SimObject<Cluster> {

bool running() const;

int get_exitcode() const;
int get_exitcode() const;

void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);

PerfStats perf_stats() const;

private:
uint32_t cluster_id_;
ProcessorImpl* processor_;
std::vector<Socket::Ptr> sockets_;
std::vector<Socket::Ptr> sockets_;
std::vector<CoreMask> barriers_;
CacheSim::Ptr l2cache_;
uint32_t cores_per_socket_;
Expand Down
Loading

0 comments on commit 30b0daf

Please sign in to comment.