diff --git a/src/mmio.cpp b/src/mmio.cpp index 9e4c406c..bceba05d 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -87,7 +87,8 @@ WinPmemMMIORange::WinPmemMMIORange(uint64 baseAddr_, uint64 /* size_ */, bool re mutex.unlock(); } -MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent) +MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent_, const int core) : + silent(silent_) { auto hDriver = openMSRDriver(); if (hDriver != INVALID_HANDLE_VALUE) @@ -98,7 +99,7 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent CloseHandle(hDriver); if (status == TRUE && reslength == sizeof(uint64) && result == 1) { - impl = std::make_shared(baseAddr_, size_, readonly_); + impl = std::make_shared(baseAddr_, size_, readonly_, core); return; } else @@ -109,11 +110,18 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent } } } - + if (core >= 0) + { + throw std::runtime_error("WinPmem does not support core affinity"); + } impl = std::make_shared(baseAddr_, size_, readonly_); } -OwnMMIORange::OwnMMIORange(uint64 baseAddr_, uint64 size_, bool /* readonly_ */) +OwnMMIORange::OwnMMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool /* readonly_ */, + const int core_) : + core(core_) { hDriver = openMSRDriver(); MMAP_Request req{}; @@ -132,20 +140,24 @@ OwnMMIORange::OwnMMIORange(uint64 baseAddr_, uint64 size_, bool /* readonly_ */) uint32 OwnMMIORange::read32(uint64 offset) { + CoreAffinityScope _(core); return *((uint32*)(mmapAddr + offset)); } uint64 OwnMMIORange::read64(uint64 offset) { + CoreAffinityScope _(core); return *((uint64*)(mmapAddr + offset)); } void OwnMMIORange::write32(uint64 offset, uint32 val) { + CoreAffinityScope _(core); *((uint32*)(mmapAddr + offset)) = val; } void OwnMMIORange::write64(uint64 offset, uint64 val) { + CoreAffinityScope _(core); *((uint64*)(mmapAddr + offset)) = val; } @@ -164,10 +176,16 @@ OwnMMIORange::~OwnMMIORange() #include "PCIDriverInterface.h" -MMIORange::MMIORange(uint64 physical_address, uint64 size_, bool, bool silent) : +MMIORange::MMIORange(const uint64 physical_address, const uint64 size_, const bool, const bool silent_, const int core_) : mmapAddr(NULL), - size(size_) + size(size_), + silent(silent_), + core(core_) { + if (core_ >= 0) + { + throw std::runtime_error("MMIORange on MacOSX does not support core affinity"); + } if (size > 4096) { if (!silent) @@ -183,6 +201,7 @@ MMIORange::MMIORange(uint64 physical_address, uint64 size_, bool, bool silent) : uint32 MMIORange::read32(uint64 offset) { + warnAlignment<4>("MMIORange::read32", silent, offset); uint32 val = 0; PCIDriver_readMemory32((uint8_t *)mmapAddr + offset, &val); return val; @@ -190,6 +209,7 @@ uint32 MMIORange::read32(uint64 offset) uint64 MMIORange::read64(uint64 offset) { + warnAlignment<8>("MMIORange::read64", silent, offset); uint64 val = 0; PCIDriver_readMemory64((uint8_t *)mmapAddr + offset, &val); return val; @@ -211,11 +231,13 @@ MMIORange::~MMIORange() #elif defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__) -MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent) : +MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent_, const int core_) : fd(-1), mmapAddr(NULL), size(size_), - readonly(readonly_) + readonly(readonly_), + silent(silent_), + core(core_) { const int oflag = readonly ? O_RDONLY : O_RDWR; int handle = ::open("/dev/mem", oflag); @@ -252,16 +274,22 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent uint32 MMIORange::read32(uint64 offset) { + warnAlignment<4>("MMIORange::read32", silent, offset); + CoreAffinityScope _(core); return *((uint32 *)(mmapAddr + offset)); } uint64 MMIORange::read64(uint64 offset) { + warnAlignment<8>("MMIORange::read64", silent, offset); + CoreAffinityScope _(core); return *((uint64 *)(mmapAddr + offset)); } void MMIORange::write32(uint64 offset, uint32 val) { + warnAlignment<4>("MMIORange::write32", silent, offset); + CoreAffinityScope _(core); if (readonly) { std::cerr << "PCM Error: attempting to write to a read-only MMIORange\n"; @@ -271,6 +299,8 @@ void MMIORange::write32(uint64 offset, uint32 val) } void MMIORange::write64(uint64 offset, uint64 val) { + warnAlignment<8>("MMIORange::write64", silent, offset); + CoreAffinityScope _(core); if (readonly) { std::cerr << "PCM Error: attempting to write to a read-only MMIORange\n"; diff --git a/src/mmio.h b/src/mmio.h index 2d42535b..d2c10281 100644 --- a/src/mmio.h +++ b/src/mmio.h @@ -22,10 +22,23 @@ #endif #include "mutex.h" +#include "utils.h" #include namespace pcm { + class CoreAffinityScope // sets core affinity if core >= 0, nop otherwise + { + std::shared_ptr affinity{nullptr}; + CoreAffinityScope(const CoreAffinityScope&) = delete; + CoreAffinityScope& operator = (const CoreAffinityScope&) = delete; + public: + CoreAffinityScope(const int core) + : affinity((core >= 0) ? std::make_shared(core) : nullptr) + { + } + }; + #ifdef _MSC_VER class MMIORangeInterface @@ -98,10 +111,14 @@ class OwnMMIORange : public MMIORangeInterface { HANDLE hDriver; char * mmapAddr; + const int core; OwnMMIORange(const OwnMMIORange&) = delete; OwnMMIORange& operator = (const OwnMMIORange&) = delete; public: - OwnMMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true); + OwnMMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool readonly_ = true, + const int core_ = -1); uint32 read32(uint64 offset); uint64 read64(uint64 offset); void write32(uint64 offset, uint32 val); @@ -112,24 +129,33 @@ class OwnMMIORange : public MMIORangeInterface class MMIORange { std::shared_ptr impl; + const bool silent; MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: - MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true, bool silent = false); + MMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool readonly_ = true, + const bool silent_ = false, + const int core = -1); uint32 read32(uint64 offset) { + warnAlignment<4>("MMIORange::read32", silent, offset); return impl->read32(offset); } uint64 read64(uint64 offset) { + warnAlignment<8>("MMIORange::read64", silent, offset); return impl->read64(offset); } void write32(uint64 offset, uint32 val) { + warnAlignment<4>("MMIORange::write32", silent, offset); impl->write32(offset, val); } void write64(uint64 offset, uint64 val) { + warnAlignment<8>("MMIORange::write64", silent, offset); impl->write64(offset, val); } }; @@ -146,10 +172,16 @@ class MMIORange #ifndef __APPLE__ const bool readonly; #endif + const bool silent; + const int core; MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: - MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true, bool silent = false); + MMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool readonly_ = true, + const bool silent_ = false, + const int core_ = -1); uint32 read32(uint64 offset); uint64 read64(uint64 offset); void write32(uint64 offset, uint32 val); diff --git a/src/pci.cpp b/src/pci.cpp index f2e30306..d7f485df 100644 --- a/src/pci.cpp +++ b/src/pci.cpp @@ -25,9 +25,10 @@ #include "Winmsrdriver\msrstruct.h" #include "winring0/OlsDef.h" #include "winring0/OlsApiInitExt.h" -#include "utils.h" #endif +#include "utils.h" + #if defined (__FreeBSD__) || defined(__DragonFly__) #include #endif @@ -83,6 +84,7 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -113,6 +115,7 @@ int32 PciHandle::read32(uint64 offset, uint32 * value) int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -139,6 +142,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<4>("PciHandle::read64", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -208,18 +212,21 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_read32(pci_address, value); } int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_write32(pci_address, value); } int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<4>("PciHandle::read64", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_read64(pci_address, value); } @@ -289,6 +296,7 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); struct pci_io pi; int ret; @@ -308,6 +316,7 @@ int32 PciHandle::read32(uint64 offset, uint32 * value) int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); struct pci_io pi; int ret; @@ -327,6 +336,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<4>("PciHandle::read64", false, offset); struct pci_io pi; int32 ret; @@ -415,16 +425,19 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); return ::pread(fd, (void *)value, sizeof(uint32), offset); } int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); return ::pwrite(fd, (const void *)&value, sizeof(uint32), offset); } int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<4>("PciHandle::read64", false, offset); size_t res = ::pread(fd, (void *)value, sizeof(uint64), offset); if(res != sizeof(uint64)) { @@ -532,16 +545,19 @@ bool PciHandleM::exists(uint32 /*groupnr_*/, uint32 /* bus_*/, uint32 /* device_ int32 PciHandleM::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandleM::read32", false, offset); return ::pread(fd, (void *)value, sizeof(uint32), offset + base_addr); } int32 PciHandleM::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandleM::write32", false, offset); return ::pwrite(fd, (const void *)&value, sizeof(uint32), offset + base_addr); } int32 PciHandleM::read64(uint64 offset, uint64 * value) { + warnAlignment<4>("PciHandleM::read64", false, offset); return ::pread(fd, (void *)value, sizeof(uint64), offset + base_addr); } @@ -682,6 +698,7 @@ bool PciHandleMM::exists(uint32 /*groupnr_*/, uint32 /*bus_*/, uint32 /*device_* int32 PciHandleMM::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandleMM::read32", false, offset); *value = *((uint32 *)(mmapAddr + offset)); return sizeof(uint32); @@ -689,6 +706,7 @@ int32 PciHandleMM::read32(uint64 offset, uint32 * value) int32 PciHandleMM::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandleMM::write32", false, offset); *((uint32 *)(mmapAddr + offset)) = value; return sizeof(uint32); @@ -696,6 +714,7 @@ int32 PciHandleMM::write32(uint64 offset, uint32 value) int32 PciHandleMM::read64(uint64 offset, uint64 * value) { + warnAlignment<4>("PciHandleMM::read64", false, offset); read32(offset, (uint32 *)value); read32(offset + sizeof(uint32), ((uint32 *)value) + 1); diff --git a/src/pci.h b/src/pci.h index ec44e99f..d7c03264 100644 --- a/src/pci.h +++ b/src/pci.h @@ -261,8 +261,8 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) // std::cerr << "Intel device scan. found " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << std::dec; uint32 status{0}; PciHandleType h(group, bus, device, function); - h.read32(6, &status); // read status - if (status & 0x10) // has capability list + h.read32(4, &status); // read status + if (status & 0x100000) // has capability list { // std::cerr << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; VSEC header; diff --git a/src/pcm-mmio.cpp b/src/pcm-mmio.cpp index bc720d18..ba942dc6 100644 --- a/src/pcm-mmio.cpp +++ b/src/pcm-mmio.cpp @@ -25,20 +25,35 @@ using namespace pcm; void print_usage(const char* progname) { - std::cout << "Usage " << progname << " [-w value] [-q] [-d] address\n\n"; + std::cout << "Usage " << progname << " [-w value] [-q] [-d] [-c core] address\n\n"; std::cout << " Reads/writes MMIO (memory mapped) register in the specified address\n"; std::cout << " -w value : write the value before reading \n"; std::cout << " -b low:high : read or write only low..high bits of the register\n"; std::cout << " -q : read/write 64-bit quad word (default is 32-bit double word)\n"; std::cout << " -d : output all numbers in dec (default is hex)\n"; std::cout << " -n size : number of bytes read from specified address(batch read mode), max bytes=" << MAX_BATCH_OPERATE_BYTES << "\n"; + std::cout << " -c core : perform the operation from specified core\n"; std::cout << " --version : print application version\n"; std::cout << "\n"; } template -void doOp(const std::pair & bits, const uint64 address, const uint64 offset, const uint32 batch_bytes, const bool write, T value, RD readOp, WR writeOp, const bool dec) +void doOp( const std::pair & bits, + const uint64 address, const uint64 offset, + const uint32 batch_bytes, const bool write, + T value, + RD readOp, + WR writeOp, + const bool dec, + const int core) { + auto printCoreEndl = [&]() { + if (core >= 0) + { + std::cout << " on core " << core; + } + std::cout << "\n\n"; + }; if (batch_bytes == 0) //single mode { if (!dec) std::cout << std::hex << std::showbase; @@ -55,13 +70,15 @@ void doOp(const std::pair & bits, const uint64 address, const uint6 extractBitsPrintHelper(bits, value, dec); std::cout << " from " << std::dec << bit; if (!dec) std::cout << std::hex << std::showbase; - std::cout << "-bit MMIO register " << address << "\n\n"; + std::cout << "-bit MMIO register " << address; + printCoreEndl(); } else //batch mode { uint32 i = 0, j= 0; - std::cout << std::hex << " Dumping MMIO register range from 0x" << address << - ", number of bytes=0x" << batch_bytes << "\n\n"; + std::cout << std::hex << " Dumping MMIO register range from 0x" << address << + ", number of bytes=0x" << batch_bytes; + printCoreEndl(); for(i = 0; i < batch_bytes; i+=MAX_BATCH_READ_ROW_DISPLAY_BYTES) { std::ostringstream row_disp_str(std::ostringstream::out); @@ -95,9 +112,10 @@ int mainThrows(int argc, char * argv[]) bool quad = false; uint32 batch_bytes = 0; std::pair bits{-1, -1}; + int core = -1; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "w:dqn:b:")) != -1) + while ((my_opt = getopt(argc, argv, "w:dqn:b:c:")) != -1) { switch (my_opt) { @@ -121,6 +139,9 @@ int mainThrows(int argc, char * argv[]) batch_bytes = MAX_BATCH_OPERATE_BYTES; } break; + case 'c': + core = read_number(optarg); + break; default: print_usage(argv[0]); return -1; @@ -151,16 +172,18 @@ int mainThrows(int argc, char * argv[]) batch_bytes = (rangeSize - offset); //limit the boundary } - MMIORange mmio(baseAddr, rangeSize, !write); + MMIORange mmio(baseAddr, rangeSize, !write, false, core); using namespace std::placeholders; if (quad) { - doOp(bits, address, offset, batch_bytes, write, (uint64)value, std::bind(&MMIORange::read64, &mmio, _1), std::bind(&MMIORange::write64, &mmio, _1, _2), dec); + doOp(bits, address, offset, batch_bytes, write, (uint64)value, + std::bind(&MMIORange::read64, &mmio, _1), std::bind(&MMIORange::write64, &mmio, _1, _2), dec, core); } else { - doOp(bits, address, offset, batch_bytes, write, (uint32)value, std::bind(&MMIORange::read32, &mmio, _1), std::bind(&MMIORange::write32, &mmio, _1, _2), dec); + doOp(bits, address, offset, batch_bytes, write, (uint32)value, + std::bind(&MMIORange::read32, &mmio, _1), std::bind(&MMIORange::write32, &mmio, _1, _2), dec, core); } } catch (std::exception & e) diff --git a/src/utils.cpp b/src/utils.cpp index d442a3b8..7fe9b998 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -330,16 +330,11 @@ void sigINT_handler(int signum) } } -/** - * \brief handles SIGSEGV signals that lead to termination of the program - * this function specifically works when the client application launched - * by pcm -- terminates - */ constexpr auto BACKTRACE_MAX_STACK_FRAME = 30; -void sigSEGV_handler(int signum) +void printBacktrace() { - void *backtrace_buffer[BACKTRACE_MAX_STACK_FRAME] = {0}; - char **backtrace_strings = NULL; + void* backtrace_buffer[BACKTRACE_MAX_STACK_FRAME] = { 0 }; + char** backtrace_strings = NULL; size_t backtrace_size = 0; backtrace_size = backtrace(backtrace_buffer, BACKTRACE_MAX_STACK_FRAME); @@ -357,7 +352,16 @@ void sigSEGV_handler(int signum) } freeAndNullify(backtrace_strings); } +} +/** + * \brief handles SIGSEGV signals that lead to termination of the program + * this function specifically works when the client application launched + * by pcm -- terminates + */ +void sigSEGV_handler(int signum) +{ + printBacktrace(); sigINT_handler(signum); } diff --git a/src/utils.h b/src/utils.h index 63f7e570..11e7dd41 100644 --- a/src/utils.h +++ b/src/utils.h @@ -140,6 +140,7 @@ void set_signal_handlers(void); void set_real_time_priority(const bool & silent); void restore_signal_handlers(void); #ifndef _MSC_VER +void printBacktrace(); void sigINT_handler(int signum); void sigHUP_handler(int signum); void sigUSR_handler(int signum); @@ -248,6 +249,27 @@ inline std::string unit_format(IntType n) void print_cpu_details(); + +inline void printDebugCallstack() +{ +#ifndef _MSC_VER + if (safe_getenv("PCM_PRINT_DEBUG_CALLSTACK") == "1") + { + printBacktrace(); + } +#endif +} + +template +inline void warnAlignment(const char* call, const bool silent, const uint64 offset) +{ + if (silent == false && (offset % Bytes) != 0) + { + std::cerr << "PCM Warning: " << call << " offset " << offset << " is not " << Bytes << "-byte aligned\n"; + printDebugCallstack(); + } +} + #define PCM_UNUSED(x) (void)(x) #define PCM_COMPILE_ASSERT(condition) \ @@ -678,7 +700,7 @@ void restrictDriverAccessNative(LPCTSTR path); std::vector findPathsFromPattern(const char* pattern); #endif -class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows +class TemporalThreadAffinity { TemporalThreadAffinity(); // forbidden #if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707)