From 607e32ccd07398dd510f353309f019ace6efc5bf Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 28 Nov 2024 16:30:47 +0100 Subject: [PATCH 1/6] update perfmon dependency Change-Id: Ie710cb70353546e79ca3dc8b421984dd41671273 --- perfmon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perfmon b/perfmon index eef1d0d8..d8859e3a 160000 --- a/perfmon +++ b/perfmon @@ -1 +1 @@ -Subproject commit eef1d0d88523a183ca514755b295daee5acb303a +Subproject commit d8859e3a5480721a13651cf20a35a1727afcd570 From e8233e3d399f06643bc31541716d1fd0abc2fed4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 2 Dec 2024 12:27:08 +0100 Subject: [PATCH 2/6] make c-state output more compact Change-Id: Iec5a3d899e3defa4bf83720e969927b0dfa3b513 --- src/pcm.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/pcm.cpp b/src/pcm.cpp index ba14b620..ddce890e 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -334,19 +334,20 @@ void print_output(PCM * m, cout << setNextColor() << " N/A\n"; cout << resetColor(); - cout << setNextColor() <<"\n Instructions retired: " << unit_format(getInstructionsRetired(sstate1, sstate2)) << " ;" - << setNextColor() <<" Active cycles: " << unit_format(getCycles(sstate1, sstate2)) << " ;" - << setNextColor() <<" Time (TSC): " << unit_format(getInvariantTSC(cstates1[0], cstates2[0])) << "ticks ;" - << setNextColor() << " C0 (active,non-halted) core residency: " << (getCoreCStateResidency(0, sstate1, sstate2)*100.) << " %\n"; - cout << "\n"; + cout << setNextColor() << "\n Instructions retired: " << unit_format(getInstructionsRetired(sstate1, sstate2)) << " ;" + << setNextColor() << " Active cycles: " << unit_format(getCycles(sstate1, sstate2)) << " ;" + << setNextColor() << " Time (TSC): " << unit_format(getInvariantTSC(cstates1[0], cstates2[0])) << "ticks;\n\n"; + + cout << resetColor() << setNextColor() << " Core C-state residencies: "<< setNextColor() << "C0 (active,non-halted): " << (getCoreCStateResidency(0, sstate1, sstate2)*100.) << " %;"; for (int s = 1; s <= PCM::MAX_C_STATE; ++s) { if (m->isCoreCStateResidencySupported(s)) { - std::cout << setNextColor() << " C" << s << " core residency: " << (getCoreCStateResidency(s, sstate1, sstate2)*100.) << " %;"; + std::cout << setNextColor() << " C" << s << ": " << (getCoreCStateResidency(s, sstate1, sstate2)*100.) << " %;"; } } cout << "\n" ; + cout << resetColor() << setNextColor() << " Package C-state residencies: "; std::vector CoreCStateStackedBar, PackageCStateStackedBar; for (int s = 0; s <= PCM::MAX_C_STATE; ++s) { @@ -359,7 +360,7 @@ void print_output(PCM * m, } if (m->isPackageCStateResidencySupported(s)) { - std::cout << setNextColor() << " C" << s << " package residency: " << (getPackageCStateResidency(s, sstate1, sstate2)*100.) << " %;"; + std::cout << setNextColor() << " C" << s << ": " << (getPackageCStateResidency(s, sstate1, sstate2)*100.) << " %;"; PackageCStateStackedBar.push_back(StackedBarItem(getPackageCStateResidency(s, sstate1, sstate2), "", fill)); } } From 55cf22ae30168fd6c0de37430c7d5793772e502d Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 2 Dec 2024 15:53:35 +0100 Subject: [PATCH 3/6] support SYS energy API Change-Id: I2313b939f9cc85f66fa90fbea873c2365f67582d --- src/cpucounters.cpp | 11 +++++++ src/cpucounters.h | 71 +++++++++++++++++++++++++++++++++++++++++++- src/pcm.cpp | 7 ++++- src/types.h | 1 + src/width_extender.h | 12 ++++++-- 5 files changed, 98 insertions(+), 4 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 3b186164..02bb2e56 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1749,6 +1749,12 @@ void PCM::initEnergyMonitoring() pp_energy_status.push_back(std::make_shared( new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP1_ENERGY_STATUS), 32, 10000)); } + + if (systemEnergyMetricAvailable() && MSR.size() && (system_energy_status.get() == nullptr)) + { + system_energy_status = std::make_shared( + new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_SYS_ENERGY_STATUS, 0x00000000FFFFFFFF), 32, 10000); + } } static const uint32 UBOX0_DEV_IDS[] = { @@ -6964,6 +6970,11 @@ void PCM::getAllCounterStates(SystemCounterState & systemState, std::vectorread(); + } } void PCM::getUncoreCounterStates(SystemCounterState & systemState, std::vector & socketStates) diff --git a/src/cpucounters.h b/src/cpucounters.h index 6dc8043f..e09be6ec 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -810,6 +810,7 @@ class PCM_API PCM std::vector > energy_status; std::vector > dram_energy_status; std::vector > pp_energy_status; + std::shared_ptr system_energy_status; std::vector>> cxlPMUs; // socket X CXL ports X UNIT {0,1} std::vector > memory_bw_local; @@ -2510,6 +2511,25 @@ class PCM_API PCM ); } + bool systemEnergyMetricAvailable() const + { + return ( + useSKLPath() + || cpu_family_model == PCM::SKX + || cpu_family_model == PCM::ICX + || cpu_family_model == PCM::ADL + || cpu_family_model == PCM::RPL + || cpu_family_model == PCM::MTL + || cpu_family_model == PCM::LNL + || cpu_family_model == PCM::ARL + || cpu_family_model == PCM::SPR + || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GNR + || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GRR + ); + } + bool packageThermalMetricsAvailable() const { return packageEnergyMetricsAvailable(); @@ -3368,6 +3388,25 @@ uint64 getConsumedEnergy(const int powerPlane, const CounterStateType& before, c return after.PPEnergyStatus[powerPlane] - before.PPEnergyStatus[powerPlane]; } +/*! \brief Returns energy consumed by system + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +uint64 getSystemConsumedEnergy(const CounterStateType& before, const CounterStateType& after) +{ + return after.systemEnergyStatus - before.systemEnergyStatus; +} + +/*! \brief Checks is systemEnergyStatusValid is valid in the state +* \param s CPU counter state +*/ +template +bool systemEnergyStatusValid(const CounterStateType& s) +{ + return s.systemEnergyStatus != 0; +} + /*! \brief Returns energy consumed by DRAM (measured in internal units) \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3435,6 +3474,31 @@ double getConsumedJoules(const int powerPlane, const CounterStateType& before, c return double(getConsumedEnergy(powerPlane, before, after)) * m->getJoulesPerEnergyUnit(); } +/*! \brief Returns Joules consumed by system + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +double getSystemConsumedJoules(const CounterStateType& before, const CounterStateType& after) +{ + PCM* m = PCM::getInstance(); + if (!m) return -1.; + + auto unit = m->getJoulesPerEnergyUnit(); + + switch (m->getCPUFamilyModel()) + { + case PCM::SPR: + case PCM::EMR: + case PCM::GNR: + case PCM::SRF: + unit = 1.0; + break; + } + + return double(getSystemConsumedEnergy(before, after)) * unit; +} + /*! \brief Returns Joules consumed by DRAM \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3860,11 +3924,14 @@ class SystemCounterState : public SocketCounterState friend std::vector getPCICFGEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); friend std::vector getMMIOEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); friend std::vector getPMTEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); + template friend bool systemEnergyStatusValid(const CounterStateType& s); + template friend uint64 getSystemConsumedEnergy(const CounterStateType& before, const CounterStateType& after); std::vector > incomingQPIPackets; // each 64 byte std::vector > outgoingQPIFlits; // idle or data/non-data flits depending on the architecture std::vector > TxL0Cycles; uint64 uncoreTSC; + uint64 systemEnergyStatus; std::unordered_map , PCM::PCICFGRegisterEncodingHash, PCM::PCICFGRegisterEncodingCmp> PCICFGValues{}; std::unordered_map, PCM::MMIORegisterEncodingHash, PCM::MMIORegisterEncodingCmp> MMIOValues{}; std::unordered_map, PCM::PMTRegisterEncodingHash2> PMTValues{}; @@ -3890,7 +3957,8 @@ class SystemCounterState : public SocketCounterState friend uint64 getOutgoingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & now); SystemCounterState() : - uncoreTSC(0) + uncoreTSC(0), + systemEnergyStatus(0) { PCM * m = PCM::getInstance(); accel_counters.resize(m->getNumberofAccelCounters()); @@ -3922,6 +3990,7 @@ class SystemCounterState : public SocketCounterState return *this; } + virtual ~ SystemCounterState() {} }; diff --git a/src/pcm.cpp b/src/pcm.cpp index ddce890e..c57d9a3a 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -336,7 +336,12 @@ void print_output(PCM * m, cout << resetColor(); cout << setNextColor() << "\n Instructions retired: " << unit_format(getInstructionsRetired(sstate1, sstate2)) << " ;" << setNextColor() << " Active cycles: " << unit_format(getCycles(sstate1, sstate2)) << " ;" - << setNextColor() << " Time (TSC): " << unit_format(getInvariantTSC(cstates1[0], cstates2[0])) << "ticks;\n\n"; + << setNextColor() << " Time (TSC): " << unit_format(getInvariantTSC(cstates1[0], cstates2[0])) << "ticks;"; + if (m->systemEnergyMetricAvailable() && systemEnergyStatusValid(sstate1) && systemEnergyStatusValid(sstate2)) + { + cout << setNextColor() << " SYS energy: " << getSystemConsumedJoules(sstate1, sstate2) << " J;"; + } + cout << "\n\n"; cout << resetColor() << setNextColor() << " Core C-state residencies: "<< setNextColor() << "C0 (active,non-halted): " << (getCoreCStateResidency(0, sstate1, sstate2)*100.) << " %;"; for (int s = 1; s <= PCM::MAX_C_STATE; ++s) diff --git a/src/types.h b/src/types.h index f6f33ce2..7a986ea4 100644 --- a/src/types.h +++ b/src/types.h @@ -506,6 +506,7 @@ constexpr auto MSR_SMI_COUNT = 0x34; */ constexpr auto MSR_PKG_ENERGY_STATUS = 0x611; +constexpr auto MSR_SYS_ENERGY_STATUS = 0x64D; constexpr auto MSR_RAPL_POWER_UNIT = 0x606; constexpr auto MSR_PKG_POWER_INFO = 0x614; diff --git a/src/width_extender.h b/src/width_extender.h index 7a78f11d..1043667c 100644 --- a/src/width_extender.h +++ b/src/width_extender.h @@ -40,12 +40,20 @@ class CounterWidthExtender { std::shared_ptr msr; uint64 msr_addr; - MsrHandleCounter(std::shared_ptr msr_, uint64 msr_addr_) : msr(msr_), msr_addr(msr_addr_) { } + uint64 msr_mask; + MsrHandleCounter( std::shared_ptr msr_, + const uint64 msr_addr_, + const uint64 msr_mask_ = ~uint64(0ULL)) : + msr(msr_), + msr_addr(msr_addr_), + msr_mask(msr_mask_) + { + } uint64 operator () () { uint64 value = 0; msr->read(msr_addr, &value); - return value; + return value & msr_mask; } }; From febf701b4c7d97f3201805deb50dad76da006fcc Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 3 Dec 2024 19:56:12 +0100 Subject: [PATCH 4/6] pcm: add csv output for system power Change-Id: I7a8ad160746f1b84562a153f2fb76b30a9aa8277 --- src/pcm.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pcm.cpp b/src/pcm.cpp index c57d9a3a..fb1d8458 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -766,6 +766,8 @@ void print_csv_header(PCM * m, print_csv_header_helper(header, 2); if (m->dramEnergyMetricsAvailable()) print_csv_header_helper(header); + if (m->systemEnergyMetricAvailable()) + print_csv_header_helper(header); if (m->LLCReadMissLatencyMetricsAvailable()) print_csv_header_helper(header); if (m->uncoreFrequencyMetricAvailable()) @@ -946,6 +948,8 @@ void print_csv_header(PCM * m, } if (m->dramEnergyMetricsAvailable()) cout << "DRAM Energy (Joules),"; + if (m->systemEnergyMetricAvailable()) + cout << "SYSTEM Energy (Joules),"; if (m->LLCReadMissLatencyMetricsAvailable()) cout << "LLCRDMISSLAT (ns),"; if (m->uncoreFrequencyMetricAvailable()) @@ -1200,6 +1204,8 @@ void print_csv(PCM * m, cout << getConsumedJoules(0, sstate1, sstate2) << "," << getConsumedJoules(1, sstate1, sstate2) << ","; if (m->dramEnergyMetricsAvailable()) cout << getDRAMConsumedJoules(sstate1, sstate2) << ","; + if (m->systemEnergyMetricAvailable()) + cout << getSystemConsumedJoules(sstate1, sstate2) << ","; if (m->LLCReadMissLatencyMetricsAvailable()) cout << getLLCReadMissLatency(sstate1, sstate2) << ","; if (m->uncoreFrequencyMetricAvailable()) From e5ba86dfab927d336507d8ff708da937d87afa7b Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 6 Dec 2024 12:21:33 +0100 Subject: [PATCH 5/6] update to simdjson v3.11.0 Change-Id: Iac0aeeb2359e6b8664409a5c9997b6f07a490940 --- src/simdjson | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simdjson b/src/simdjson index e341c8b4..b4242d3b 160000 --- a/src/simdjson +++ b/src/simdjson @@ -1 +1 @@ -Subproject commit e341c8b43861b43de29c48ab65f292d997096953 +Subproject commit b4242d3b4ffb97854b035175be077aab712a2d46 From 6b52bbec7adf300aab458d52440268d19641c486 Mon Sep 17 00:00:00 2001 From: Otto Bruggeman Date: Tue, 10 Dec 2024 12:48:22 +0100 Subject: [PATCH 6/6] create unique core ids and fix threads_per_core --- src/cpucounters.cpp | 34 +++--- src/pcm-sensor-server.cpp | 24 +++- src/topology.cpp | 16 ++- src/topology.h | 236 ++++++++++++++++++++++++++++++-------- src/topologyentry.h | 18 +++ 5 files changed, 262 insertions(+), 66 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 02bb2e56..f683b380 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1444,29 +1444,19 @@ bool PCM::discoverSystemTopology() } if(num_sockets == 0) { num_sockets = (int32)(std::max)(socketIdMap.size(), (size_t)1); + // std::cerr << " num_sockets = " << num_sockets << "\n"; } socketIdMap_type::iterator s = socketIdMap.begin(); for (uint32 sid = 0; s != socketIdMap.end(); ++s) { s->second = sid++; - // first is apic id, second is logical socket id - systemTopology->addSocket( s->first, s->second ); } - for (int32 cid = 0; cid < num_cores; ++cid) - { - //std::cerr << "Cid: " << cid << "\n"; - systemTopology->addThread( cid, topology[cid] ); - } - - // All threads are here now so we can set the refCore for a socket - for ( auto& socket : systemTopology->sockets() ) - socket->setRefCore(); - // use map to change apic socket id to the logical socket id for (int i = 0; (i < (int)num_cores) && (!socketIdMap.empty()); ++i) { + // std::cerr << "socket_id: " << topology[i].socket_id << ", socketIdMap tells me: " << socketIdMap[topology[i].socket_id] << "\n"; if(isCoreOnline((int32)i)) topology[i].socket_id = socketIdMap[topology[i].socket_id]; } @@ -1483,7 +1473,7 @@ bool PCM::discoverSystemTopology() { for (int i = 0; i < (int)num_cores; ++i) { - if (topology[i].socket_id == topology[0].socket_id && topology[i].core_id == topology[0].core_id) + if (topology[i].isSameCore( topology[0] )) ++threads_per_core; } assert(threads_per_core != 0); @@ -1491,6 +1481,22 @@ bool PCM::discoverSystemTopology() if(num_phys_cores_per_socket == 0 && num_cores == num_online_cores) num_phys_cores_per_socket = num_cores / num_sockets / threads_per_core; if(num_online_cores == 0) num_online_cores = num_cores; + s = socketIdMap.begin(); + for (; s != socketIdMap.end(); ++s) + { + systemTopology->addSocket( s->second ); + } + + for (int32 cid = 0; cid < num_cores; ++cid) + { + //std::cerr << "Cid: " << cid << "\n"; + systemTopology->addThread( cid, topology[cid] ); + } + + // All threads are here now so we can set the refCore for a socket + for ( auto& socket : systemTopology->sockets() ) + socket->setRefCore(); + int32 i = 0; socketRefCore.resize(num_sockets, -1); @@ -3421,11 +3427,11 @@ void PCM::destroyMSR() PCM::~PCM() { + deleteAndNullify(systemTopology); if (instance) { destroyMSR(); instance = NULL; - deleteAndNullify(systemTopology); } } diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp index 4845c01c..f8c3c3b4 100644 --- a/src/pcm-sensor-server.cpp +++ b/src/pcm-sensor-server.cpp @@ -645,7 +645,7 @@ class PrometheusPrinter : Visitor } virtual void dispatch( Core* c ) override { - addToHierarchy( std::string( "core=\"" ) + std::to_string( c->coreID() ) + "\"" ); + addToHierarchy( std::string( "core=\"" ) + std::to_string( c->dieGroupID()*256 + c->dieID()*64 + c->tileID()*16 + c->moduleID()*4 + c->coreID() ) + "\"" ); auto vec = c->threads(); iterateVectorAndCallAccept( vec ); @@ -801,6 +801,7 @@ class PrometheusPrinter : Visitor } removeFromHierarchy(); } + void printSystemCounterState( SystemCounterState const& before, SystemCounterState const& after ) { addToHierarchy( "source=\"uncore\"" ); PCM* pcm = PCM::getInstance(); @@ -3759,6 +3760,7 @@ int mainThrows(int argc, char * argv[]) { bool useRealtimePriority = false; #endif bool forceRTMAbortMode = false; + bool printTopology = false; unsigned short port = 0; unsigned short debug_level = 0; std::string certificateFile; @@ -3774,7 +3776,12 @@ int mainThrows(int argc, char * argv[]) { MainLoop mainLoop; std::string ev_file_name; - if ( argc > 1 ) { + const char* PPTEnv = std::getenv( "PCMSENSORSERVER_PRINT_TOPOLOGY" ); + if ( PPTEnv ) { + if ( *PPTEnv == '1' ) { + printTopology = true; + } + } else if ( argc > 1 ) { std::string arg_value; for ( int i=1; i < argc; ++i ) { @@ -3901,7 +3908,7 @@ int mainThrows(int argc, char * argv[]) { } } - #ifdef __linux__ +#ifdef __linux__ // check kernel version for driver dependency. if (accel != ACCEL_NOCONFIG) { @@ -4035,6 +4042,17 @@ int mainThrows(int argc, char * argv[]) { accs_->programAccelCounters(); } + if ( printTopology ) { + TopologyPrinter* tp = new TopologyPrinter(); + tp->dispatch( PCM::getInstance()->getSystemTopology() ); + std::vector & tpData = tp->topologyDataStrings(); + std::sort( tpData.begin(), tpData.end(), TopologyStringCompare ); + for( auto& line: tpData ) { + std::cout << line << "\n"; + } + deleteAndNullify( tp ); + exit( 0 ); + } #if defined (USE_SSL) if ( useSSL ) { if ( port == 0 ) diff --git a/src/topology.cpp b/src/topology.cpp index 7368d82f..4572606e 100644 --- a/src/topology.cpp +++ b/src/topology.cpp @@ -30,8 +30,8 @@ UncoreCounterState ClientUncore::uncoreCounterState( void ) const return ucs; } -Socket::Socket( PCM* m, int32 apicID, int32 logicalID ) - : pcm_(m), refCore_(nullptr), apicID_(apicID), logicalID_(logicalID) +Socket::Socket( PCM* m, int32 logicalID ) + : pcm_(m), refCore_(nullptr), logicalID_(logicalID) { if ( pcm_->isServerCPU() ) uncore_ = new ServerUncore( pcm_, logicalID ); @@ -102,4 +102,16 @@ void Aggregator::dispatch( SystemRoot const& syp ) { readAccelCounters(sycs_); } +bool TopologyStringCompare( const std::string& topology1, const std::string& topology2 ) { + if ( topology1.size() == 0 ) return true; + if ( topology2.size() == 0 ) return false; + + int topo1asint, topo2asint; + std::stringstream ss1(topology1); + std::stringstream ss2(topology2); + ss1 >> topo1asint; + ss2 >> topo2asint; + return topo1asint < topo2asint; +} + }// namespace pcm diff --git a/src/topology.h b/src/topology.h index 47432377..62529a07 100644 --- a/src/topology.h +++ b/src/topology.h @@ -63,7 +63,7 @@ enum Status { class HyperThread : public SystemObject { public: - HyperThread( PCM* m, int32 threadID, int32 osID, enum Status status ) : pcm_(m), threadID_(threadID), osID_(osID), status_(status) {} + HyperThread( PCM* m, int32 osID, TopologyEntry te, enum Status status ) : pcm_(m), osID_(osID), te_(te), status_(status) {} virtual ~HyperThread() { pcm_ = nullptr; } virtual void accept( Visitor& v ) override { @@ -77,16 +77,50 @@ class HyperThread : public SystemObject return ccs; } + std::string topologyDataString() const { + std::stringstream ss; + ss << osID_ << "\t" << te_.socket_id << "\t" << te_.die_grp_id << "\t" << te_.die_id << "\t" << te_.tile_id << "\t" << te_.core_id << "\t" << te_.thread_id << "\t"; + return ss.str(); + } + + TopologyEntry topologyEntry() const { + return te_; + } + void addMSRHandle( std::shared_ptr handle ) { msrHandle_ = handle; } + int32 osID() const { + return osID_; + } + int32 threadID() const { - return threadID_; + return te_.thread_id; } - int32 osID() const { - return osID_; + int32 coreID() const { + return te_.core_id; + } + + int32 moduleID() const { + return te_.module_id; + } + + int32 tileID() const { + return te_.tile_id; + } + + int32 dieID() const { + return te_.die_id; + } + + int32 dieGroupID() const { + return te_.die_grp_id; + } + + int32 socketID() const { + return te_.socket_id; } // We simply pass by value, this way the refcounting works best and as expected @@ -99,23 +133,22 @@ class HyperThread : public SystemObject } private: - PCM* pcm_; + PCM* pcm_; std::shared_ptr msrHandle_; - int32 threadID_; - int32 osID_; - enum Status status_; + // osID is the expected osID, offlined cores have te.os_id == -1 + int32 osID_; + TopologyEntry te_; + enum Status status_; }; class Core : public SystemObject { - constexpr static int32 MAX_THREADS_PER_CORE = 4; - public: - Core( PCM* m, int32 coreID, int32 tileID, int32 socketID ) { - pcm_ = m; - coreID_ = coreID; - tileID_ = tileID; - socketID_ = socketID; + Core( PCM* m ) : pcm_(m) { + // PCM* m is not 0, we're being called from the PCM constructor + // Just before this Core object is constructed, the value for + // threads_per_core is determined + MAX_THREADS_PER_CORE = pcm_->getThreadsPerCore(); } virtual ~Core() { pcm_ = nullptr; @@ -136,10 +169,10 @@ class Core : public SystemObject return ccs; } - void addHyperThreadInfo( int32 threadID, int32 osID ) { - if ( threadID >= MAX_THREADS_PER_CORE ) { + void addHyperThreadInfo( int32 osID, TopologyEntry te ) { + if ( te.thread_id >= MAX_THREADS_PER_CORE ) { std::stringstream ss; - ss << "ERROR: Core: threadID cannot be larger than " << MAX_THREADS_PER_CORE << ".\n"; + ss << "ERROR: Core: thread_id cannot be larger than " << MAX_THREADS_PER_CORE << ".\n"; throw std::runtime_error( ss.str() ); } if ( threads_.size() == 0 || @@ -149,8 +182,8 @@ class Core : public SystemObject } ) == threads_.end() ) { - // std::cerr << "Core::addHyperThreadInfo: " << threadID << ", " << osID << "\n"; - threads_.push_back( new HyperThread( pcm_, threadID, osID, Status::Online ) ); + // std::cerr << "Core::addHyperThreadInfo: " << te.thread_id << ", " << te.os_id << "\n"; + threads_.push_back( new HyperThread( pcm_, osID, te, Status::Online ) ); } } @@ -179,15 +212,39 @@ class Core : public SystemObject } int32 coreID() const { - return coreID_; + if ( 0 == threads_.size() ) + throw std::runtime_error("BUG: No threads yet but asking for a coreID!"); + return threads_.front()->coreID(); + } + + int32 moduleID() const { + if ( 0 == threads_.size() ) + throw std::runtime_error("BUG: No threads yet but asking for a moduleID!"); + return threads_.front()->moduleID(); } int32 tileID() const { - return tileID_; + if ( 0 == threads_.size() ) + throw std::runtime_error("BUG: No threads yet but asking for a tileID!"); + return threads_.front()->tileID(); + } + + int32 dieID() const { + if ( 0 == threads_.size() ) + throw std::runtime_error("BUG: No threads yet but asking for a tileID!"); + return threads_.front()->dieID(); + } + + int32 dieGroupID() const { + if ( 0 == threads_.size() ) + throw std::runtime_error("BUG: No threads yet but asking for a tileID!"); + return threads_.front()->dieGroupID(); } int32 socketID() const { - return socketID_; + if ( 0 == threads_.size() ) + throw std::runtime_error("BUG: No threads yet but asking for a socketID!"); + return threads_.front()->socketID(); } bool isOnline() const { @@ -200,9 +257,7 @@ class Core : public SystemObject private: PCM* pcm_; std::vector threads_; - int32 coreID_; - int32 tileID_; - int32 socketID_; + int32 MAX_THREADS_PER_CORE; }; class Uncore : public SystemObject @@ -268,12 +323,12 @@ class Socket : public SystemObject { Socket(const Socket &) = delete; Socket & operator = (const Socket &) = delete; public: - Socket( PCM* m, int32 apicID, int32 logicalID ); + Socket( PCM* m, int32 logicalID ); virtual ~Socket() { pcm_ = nullptr; + refCore_ = nullptr; // cores_ is owner, set it to null before deleting it one below for ( auto& core : cores_ ) deleteAndNullify(core); - refCore_ = nullptr; // cores_ is owner so it is already deleted by here deleteAndNullify(uncore_); } @@ -305,11 +360,10 @@ class Socket : public SystemObject { SocketCounterState socketCounterState( void ) const; - Core* findCoreByTileID( int32 tileID ) { - for ( auto& core : cores_ ) { - if ( core->tileID() == tileID ) + Core* findCoreByTopologyEntry( TopologyEntry te ) { + for ( auto& core : cores_ ) + if ( core->hyperThread( 0 )->topologyEntry().isSameCore( te ) ) return core; - } return nullptr; } @@ -321,10 +375,6 @@ class Socket : public SystemObject { return uncore_; } - int32 apicId() const { - return apicID_; - } - int32 socketID() const { return logicalID_; } @@ -338,7 +388,6 @@ class Socket : public SystemObject { PCM* pcm_; Core* refCore_; Uncore* uncore_; - int32 apicID_; int32 logicalID_; }; @@ -361,29 +410,29 @@ class SystemRoot : public SystemObject { v.dispatch( *this ); } - void addSocket( int32 apic_id, int32 logical_id ) { - Socket* s = new Socket( pcm_, apic_id, logical_id ); + void addSocket( int32 logical_id ) { + Socket* s = new Socket( pcm_, logical_id ); sockets_.push_back( s ); } // osID is the expected os_id, this is used in case te.os_id = -1 (offlined core) void addThread( int32 osID, TopologyEntry& te ) { + // std::cerr << "SystemRoot::addThread: coreid: " << te.core_id << ", module_id: " << te.module_id << ", tile_id: " << te.tile_id << ", die_id: " << te.die_id << ", die_grp_id: " << te.die_grp_id << ", socket_id: " << te.socket_id << ", os_id: " << osID << "\n"; // quick check during development to see if expected osId == te.os_id for onlined cores // assert( te.os_id != -1 && osID == te.os_id ); bool entryAdded = false; for ( auto& socket : sockets_ ) { - if ( socket->apicId() == te.socket_id ) { + if ( socket->socketID() == te.socket_id ) { Core* core = nullptr; - if ( (core = socket->findCoreByTileID( te.tile_id )) == nullptr ) { - // std::cerr << "SystemRoot::addThread: " << te.tile_id << ", " << osID << "\n"; - core = new Core( pcm_, te.core_id, te.tile_id, te.socket_id ); + if ( (core = socket->findCoreByTopologyEntry( te )) == nullptr ) { + core = new Core( pcm_ ); // std::cerr << "new Core ThreadID: " << te.thread_id << "\n"; - core->addHyperThreadInfo( te.thread_id, osID ); + core->addHyperThreadInfo( osID, te ); socket->addCore( core ); // std::cerr << "Added core " << te.core_id << " with os_id " << osID << ", threadid " << te.thread_id << " and tileid " << te.tile_id << " to socket " << te.socket_id << ".\n"; } else { // std::cerr << "existing Core ThreadID: " << te.thread_id << "\n"; - core->addHyperThreadInfo( te.thread_id, osID ); + core->addHyperThreadInfo( osID, te ); // std::cerr << "Augmented core " << te.core_id << " with osID " << osID << " and threadid " << te.thread_id << " for the hyperthread to socket " << te.socket_id << ".\n"; } entryAdded = true; @@ -393,7 +442,7 @@ class SystemRoot : public SystemObject { if ( !entryAdded ) { // if ( te.os_id == -1 ) // std::cerr << "TE not added because os_id == -1, core is offline\n"; - offlinedThreadsAtStart_.push_back( new HyperThread( pcm_, -1, osID, Status::Offline ) ); + offlinedThreadsAtStart_.push_back( new HyperThread( pcm_, osID, te, Status::Offline ) ); } } @@ -446,7 +495,7 @@ class SystemRoot : public SystemObject { /* Method used here: while walking the tree and iterating the vector * elements, collect the counters. Once all elements have been walked - * the vectors are filled with the aggregates. + * the vectors contain the aggregates. */ class Aggregator : Visitor { @@ -547,4 +596,97 @@ class Aggregator : Visitor std::chrono::steady_clock::time_point dispatchedAt_{}; }; +/* Method used here: while walking the cores in the tree and iterating the + * vector elements, print the core related ids into a large string. Once all + * cores have been walked the vector of strings contains all ids. + */ +class TopologyPrinter : Visitor +{ +public: + TopologyPrinter() : wq_( WorkQueue::getInstance() ) + { + PCM* const pcm = PCM::getInstance(); + // Resize user provided vectors to the right size + threadIDsVector_.resize( pcm->getNumCores() ); + // Internal use only, need to be the same size as the user provided vectors + threadIDsFutures_.resize( pcm->getNumCores() ); + } + + virtual ~TopologyPrinter() { + wq_ = nullptr; + } + +public: + virtual void dispatch( SystemRoot const& syp ) override { + // std::cerr << "TopologyPrinter::dispatch( SystemRoot )\n"; + for ( auto* socket : syp.sockets() ) + socket->accept( *this ); + + auto tidFuturesIter = threadIDsFutures_.begin(); + auto tidIter = threadIDsVector_.begin(); + // int i; + // i = 0; + for ( ; tidFuturesIter != threadIDsFutures_.end() && tidIter != threadIDsVector_.end(); ++tidFuturesIter, ++tidIter ) { + // std::cerr << "Works tidFuture: " << ++i << "\n"; + (*tidIter) = (*tidFuturesIter).get(); + } + } + + virtual void dispatch( Socket* sop ) override { + // std::cerr << "TopologyPrinter::dispatch( Socket )\n"; + // Fetch Topology Data + for ( auto* core : sop->cores() ) + core->accept( *this ); + } + + virtual void dispatch( Core* cop ) override { + // std::cerr << "TopologyPrinter::dispatch( Core )\n"; + // Loop each HyperThread + for ( auto* thread : cop->threads() ) { + // Fetch the Topology Data + thread->accept( *this ); + } + } + + virtual void dispatch( HyperThread* htp ) override { + // std::cerr << "TopologyPrinter::dispatch( HyperThread )\n"; + // std::cerr << "Dispatch htp with osID=" << htp->osID() << "\n"; + auto job = new LambdaJob( + []( HyperThread* h ) -> std::string { + DBG( 5, "Lambda fetching Topology Data async" ); + std::string s; + if ( !h->isOnline() ) + return s; + return h->topologyDataString(); + }, htp + ); + threadIDsFutures_[ htp->osID() ] = job->getFuture(); + wq_->addWork( job ); + } + + virtual void dispatch( ServerUncore* /*sup*/ ) override { + // std::cerr << "TopologyPrinter::dispatch( ServerUncore )\n"; + } + + virtual void dispatch( ClientUncore* /*cup*/ ) override { + // std::cerr << "TopologyPrinter::dispatch( ClientUncore )\n"; + } + + std::vector & topologyDataStrings( void ) { + return threadIDsVector_; + } + + std::chrono::steady_clock::time_point dispatchedAt( void ) const { + return dispatchedAt_; + } + +private: + WorkQueue* wq_; + std::vector threadIDsVector_; + std::vector> threadIDsFutures_; + std::chrono::steady_clock::time_point dispatchedAt_{}; +}; + +bool TopologyStringCompare( const std::string& topology1, const std::string& topology2 ); + } // namespace pcm diff --git a/src/topologyentry.h b/src/topologyentry.h index d86f9f9b..43608107 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -67,6 +67,24 @@ struct PCM_API TopologyEntry // describes a core } return "unknown"; } + bool isSameSocket( TopologyEntry& te ) { + return this->socket_id == te.socket_id; + } + bool isSameDieGroup( TopologyEntry& te ) { + return this->die_grp_id == te.die_grp_id && isSameSocket(te); + } + bool isSameDie( TopologyEntry& te ) { + return this->die_id == te.die_id && isSameDieGroup(te); + } + bool isSameTile( TopologyEntry& te ) { + return this->tile_id == te.tile_id && isSameDie(te); + } + bool isSameModule( TopologyEntry& te ) { + return this->module_id == te.module_id && isSameTile (te); + } + bool isSameCore( TopologyEntry& te ) { + return this->core_id == te.core_id && isSameModule(te); + } }; inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id)