Skip to content

Commit

Permalink
Merge branch 'master' into i7046-add-linux-support-to-dr_create_memor…
Browse files Browse the repository at this point in the history
…y_dump
  • Loading branch information
ivankyluk committed Oct 18, 2024
2 parents 162c902 + e9a983a commit 971b2d1
Show file tree
Hide file tree
Showing 53 changed files with 1,711 additions and 320 deletions.
5 changes: 3 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# **********************************************************
# Copyright (c) 2010-2023 Google, Inc. All rights reserved.
# Copyright (c) 2010-2024 Google, Inc. All rights reserved.
# Copyright (c) 2009-2010 VMware, Inc. All rights reserved.
# Copyright (c) 2018 Arm Limited All rights reserved.
# **********************************************************
Expand Down Expand Up @@ -1840,7 +1840,8 @@ endfunction ()
# TODO i#5767: Install an explicit zlib package on our Windows GA CI images
# (this find_package finds a strawberry perl zlib which causes 32-bit build
# and 64-bit private loader issues).
if (WIN32 AND AUTOMATED_TESTING)
option(DISABLE_ZLIB "Disable looking for and using zlib" OFF)
if (WIN32 AND NOT DISABLE_ZLIB)
set(ZLIB_FOUND OFF)
else ()
find_package(ZLIB)
Expand Down
10 changes: 10 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,16 @@ Further non-compatibility-affecting changes include:
- Added -trace_instr_intervals_file option to the drmemtrace trace analysis tools
framework. The file must be in CSV format containing a <start,duration> tracing
interval per line where start and duration are expressed in number of instructions.
- Added modify_marker_value_filter_t to #dynamorio::drmemtrace::record_filter_t to modify
the value of TRACE_MARKER_TYPE_ markers. This filter takes a list of
<TRACE_MARKER_TYPE_,new_value> and changes every listed marker in the trace to its
corresponding new_value.
- Added trace_analysis_tool::preferred_shard_type() to the drmemtrace framework to
allow switching to core-sharded by default if all tools prefer that mode.
- For the drmemtrace framework, if only core-sharded-preferring tools are enabled
(these include cache and TLB simulators and the schedule_stats tool), -core_sharded or
-core_serial is automatically turned on for offline analysis to enable more
representative simulated software thread scheduling onto virtual cores.

**************************************************
<hr>
Expand Down
12 changes: 12 additions & 0 deletions clients/drcachesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ add_exported_library(drmemtrace_record_filter STATIC
tools/filter/type_filter.h
tools/filter/encodings2regdeps_filter.h
tools/filter/func_id_filter.h
tools/filter/modify_marker_value_filter.h
tools/filter/null_filter.h)
target_link_libraries(drmemtrace_record_filter drmemtrace_simulator
drmemtrace_schedule_file)
Expand Down Expand Up @@ -1086,6 +1087,17 @@ if (BUILD_TESTS)
use_DynamoRIO_drmemtrace_tracer(tool.drcacheoff.burst_syscall_inject)
endif ()

if (LINUX AND BUILD_PT_POST_PROCESSOR AND BUILD_PT_TRACER)
add_executable(tool.drcacheoff.burst_syscall_pt_SUDO tests/burst_syscall_pt.cpp)
configure_DynamoRIO_static(tool.drcacheoff.burst_syscall_pt_SUDO)
use_DynamoRIO_static_client(tool.drcacheoff.burst_syscall_pt_SUDO drmemtrace_static)
target_link_libraries(tool.drcacheoff.burst_syscall_pt_SUDO drmemtrace_raw2trace
drmemtrace_analyzer test_helpers drmemtrace_basic_counts)
add_win32_flags(tool.drcacheoff.burst_syscall_pt_SUDO)
use_DynamoRIO_drmemtrace_tracer(tool.drcacheoff.burst_syscall_pt_SUDO)
link_with_pthread(tool.drcacheoff.burst_syscall_pt_SUDO)
endif ()

if (UNIX)
if (X86 AND NOT APPLE) # This test is x86-specific.
# uses ptrace and looks for linux-specific syscalls
Expand Down
13 changes: 13 additions & 0 deletions clients/drcachesim/analysis_tool.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,19 @@ template <typename RecordType> class analysis_tool_tmpl_t {
{
return "";
}
/**
* Identifies the preferred shard type for this analysis. This only applies when
* the user does not specify a shard type for a run. In that case, if every tool
* being run prefers #SHARD_BY_CORE, the framework uses that mode. If tools
* disagree then an error is raised. This is ignored if the user specifies a
* shard type via one of -core_sharded, -core_serial, -no_core_sharded,
* -no_core_serial, or -cpu_scheduling.
*/
virtual shard_type_t
preferred_shard_type()
{
return SHARD_BY_THREAD;
}
/** Returns whether the tool was created successfully. */
virtual bool
operator!()
Expand Down
15 changes: 14 additions & 1 deletion clients/drcachesim/analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,19 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
uint64_t filetype = scheduler_.get_stream(i)->get_filetype();
VPRINT(this, 2, "Worker %d filetype %" PRIx64 "\n", i, filetype);
if (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, filetype)) {
if (i == 0 && shard_type_ == SHARD_BY_CORE) {
// This is almost certainly user error.
// Better to exit than risk user confusion.
// XXX i#7045: Ideally this could be reported as an error by the
// scheduler, and also detected early in analyzer_multi to auto-fix
// (when no mode is specified: if the user specifies core-sharding
// there could be config differences and this should be an error),
// but neither is simple so today the user has to re-run.
error_string_ =
"Re-scheduling a core-sharded-on-disk trace is generally a "
"mistake; re-run with -no_core_sharded.\n";
return false;
}
shard_type_ = SHARD_BY_CORE;
}
}
Expand Down Expand Up @@ -524,7 +537,7 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_serial(analyzer_worker_data_t &
while (true) {
RecordType record;
// The current time is used for time quanta; for instr quanta, it's ignored and
// we pass 0.
// we pass 0 and let the scheduler use instruction + idle counts.
uint64_t cur_micros = sched_by_time_ ? get_current_microseconds() : 0;
typename sched_type_t::stream_status_t status =
worker.stream->next_record(record, cur_micros);
Expand Down
61 changes: 58 additions & 3 deletions clients/drcachesim/analyzer_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,8 @@ record_analyzer_multi_t::create_analysis_tool_from_options(const std::string &to
op_filter_cache_size.get_value(), op_filter_trace_types.get_value(),
op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(),
op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(),
op_filter_func_ids.get_value(), op_verbose.get_value());
op_filter_func_ids.get_value(), op_modify_marker_value.get_value(),
op_verbose.get_value());
}
ERRMSG("Usage error: unsupported record analyzer type \"%s\". Only " RECORD_FILTER
" is supported.\n",
Expand Down Expand Up @@ -461,6 +462,7 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
if (!error.empty()) {
this->success_ = false;
this->error_string_ = "raw2trace failed: " + error;
return;
}
}
}
Expand All @@ -472,8 +474,54 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
return;
}

bool sharding_specified = op_core_sharded.specified() || op_core_serial.specified() ||
// -cpu_scheduling implies thread-sharded.
op_cpu_scheduling.get_value();
// TODO i#7040: Add core-sharded support for online tools.
bool offline = !op_indir.get_value().empty() || !op_infile.get_value().empty();
if (offline && !sharding_specified) {
bool all_prefer_thread_sharded = true;
bool all_prefer_core_sharded = true;
for (int i = 0; i < this->num_tools_; ++i) {
if (this->tools_[i]->preferred_shard_type() == SHARD_BY_THREAD) {
all_prefer_core_sharded = false;
} else if (this->tools_[i]->preferred_shard_type() == SHARD_BY_CORE) {
all_prefer_thread_sharded = false;
}
if (this->parallel_ && !this->tools_[i]->parallel_shard_supported()) {
this->parallel_ = false;
}
}
if (all_prefer_core_sharded) {
// XXX i#6949: Ideally we could detect a core-sharded-on-disk input
// here and avoid this but that's not simple so currently we have a
// fatal error from the analyzer and the user must re-run with
// -no_core_sharded for such inputs.
if (this->parallel_) {
if (op_verbose.get_value() > 0)
fprintf(stderr, "Enabling -core_sharded as all tools prefer it\n");
op_core_sharded.set_value(true);
} else {
if (op_verbose.get_value() > 0)
fprintf(stderr, "Enabling -core_serial as all tools prefer it\n");
op_core_serial.set_value(true);
}
} else if (!all_prefer_thread_sharded) {
this->success_ = false;
this->error_string_ = "Selected tools differ in preferred sharding: please "
"re-run with -[no_]core_sharded or -[no_]core_serial";
return;
}
}

typename sched_type_t::scheduler_options_t sched_ops;
if (op_core_sharded.get_value() || op_core_serial.get_value()) {
if (!offline) {
// TODO i#7040: Add core-sharded support for online tools.
this->success_ = false;
this->error_string_ = "Core-sharded is not yet supported for online analysis";
return;
}
if (op_core_serial.get_value()) {
this->parallel_ = false;
}
Expand Down Expand Up @@ -501,8 +549,10 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
return;
}
if (!this->init_scheduler(tracedir, only_threads, only_shards,
op_verbose.get_value(), std::move(sched_ops)))
op_verbose.get_value(), std::move(sched_ops))) {
this->success_ = false;
return;
}
} else if (op_infile.get_value().empty()) {
// XXX i#3323: Add parallel analysis support for online tools.
this->parallel_ = false;
Expand All @@ -519,12 +569,15 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
if (!this->init_scheduler(std::move(reader), std::move(end),
op_verbose.get_value(), std::move(sched_ops))) {
this->success_ = false;
return;
}
} else {
// Legacy file.
if (!this->init_scheduler(op_infile.get_value(), {}, {}, op_verbose.get_value(),
std::move(sched_ops)))
std::move(sched_ops))) {
this->success_ = false;
return;
}
}
if (!init_analysis_tools()) {
this->success_ = false;
Expand Down Expand Up @@ -574,6 +627,8 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::init_dynamic_schedule()
sched_ops.rebalance_period_us = op_sched_rebalance_period_us.get_value();
sched_ops.randomize_next_input = op_sched_randomize.get_value();
sched_ops.honor_direct_switches = !op_sched_disable_direct_switches.get_value();
sched_ops.exit_if_fraction_inputs_left =
op_sched_exit_if_fraction_inputs_left.get_value();
#ifdef HAS_ZIP
if (!op_record_file.get_value().empty()) {
record_schedule_zip_.reset(new zipfile_ostream_t(op_record_file.get_value()));
Expand Down
70 changes: 53 additions & 17 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,19 @@ droption_t<std::string> op_v2p_file(
droption_t<bool> op_cpu_scheduling(
DROPTION_SCOPE_CLIENT, "cpu_scheduling", false,
"Map threads to cores matching recorded cpu execution",
"By default, the simulator schedules threads to simulated cores in a static "
"By default for online analysis, the simulator schedules threads to simulated cores "
"in a static "
"round-robin fashion. This option causes the scheduler to instead use the recorded "
"cpu that each thread executed on (at a granularity of the trace buffer size) "
"for scheduling, mapping traced cpu's to cores and running each segment of each "
"thread on the core that owns the recorded cpu for that segment. "
"This option is not supported with -core_serial; use "
"-cpu_schedule_file with -core_serial instead.");
"-cpu_schedule_file with -core_serial instead. For offline analysis, the "
"recommendation is to not recreate the as-traced schedule (as it is not accurate due "
"to overhead) and instead use a dynamic schedule via -core_serial. If only "
"core-sharded-preferring tools are enabled (e.g., " CPU_CACHE ", " TLB
", " SCHEDULE_STATS
"), -core_serial is automatically turned on for offline analysis.");

droption_t<bytesize_t> op_max_trace_size(
DROPTION_SCOPE_CLIENT, "max_trace_size", 0,
Expand Down Expand Up @@ -890,35 +896,43 @@ droption_t<int> op_kernel_trace_buffer_size_shift(
// Core-oriented analysis.
droption_t<bool> op_core_sharded(
DROPTION_SCOPE_ALL, "core_sharded", false, "Analyze per-core in parallel.",
"By default, the input trace is analyzed in parallel across shards equal to "
"software threads. This option instead schedules those threads onto virtual cores "
"By default, the sharding mode is determined by the preferred shard type of the"
"tools selected (unless overridden, the default preferred type is thread-sharded). "
"This option enables core-sharded, overriding tool defaults. Core-sharded "
"anlysis schedules the input software threads onto virtual cores "
"and analyzes each core in parallel. Thus, each shard consists of pieces from "
"many software threads. How the scheduling is performed is controlled by a set "
"of options with the prefix \"sched_\" along with -cores.");
"of options with the prefix \"sched_\" along with -cores. If only "
"core-sharded-preferring tools are enabled (e.g., " CPU_CACHE ", " TLB
", " SCHEDULE_STATS ") and they all support parallel operation, -core_sharded is "
"automatically turned on for offline analysis.");

droption_t<bool> op_core_serial(
DROPTION_SCOPE_ALL, "core_serial", false, "Analyze per-core in serial.",
"In this mode, scheduling is performed just like for -core_sharded. "
"However, the resulting schedule is acted upon by a single analysis thread"
"which walks the N cores in lockstep in round robin fashion. "
"How the scheduling is performed is controlled by a set "
"of options with the prefix \"sched_\" along with -cores.");
"of options with the prefix \"sched_\" along with -cores. If only "
"core-sharded-preferring tools are enabled (e.g., " CPU_CACHE ", " TLB
", " SCHEDULE_STATS ") and not all of them support parallel operation, "
"-core_serial is automatically turned on for offline analysis.");

droption_t<int64_t>
// We pick 10 million to match 2 instructions per nanosecond with a 5ms quantum.
op_sched_quantum(DROPTION_SCOPE_ALL, "sched_quantum", 10 * 1000 * 1000,
"Scheduling quantum",
"Applies to -core_sharded and -core_serial. "
"Scheduling quantum in instructions, unless -sched_time is set in "
"which case this value is multiplied by -sched_time_per_us to "
"produce a quantum in wall-clock microseconds.");
"Applies to -core_sharded and -core_serial. Scheduling quantum in "
"instructions, unless -sched_time is set in which case this value "
"is the quantum in simulated microseconds (equal to wall-clock "
"microseconds multiplied by -sched_time_per_us).");

droption_t<bool>
op_sched_time(DROPTION_SCOPE_ALL, "sched_time", false,
"Whether to use time for the scheduling quantum",
"Applies to -core_sharded and -core_serial. "
"Whether to use wall-clock time for the scheduling quantum, with a "
"value equal to -sched_quantum in microseconds of wall-clock time.");
"Applies to -core_sharded and -core_serial. Whether to use wall-clock "
"time (multiplied by -sched_time_per_us) for measuring idle time and "
"for the scheduling quantum (see -sched_quantum).");

droption_t<bool> op_sched_order_time(DROPTION_SCOPE_ALL, "sched_order_time", true,
"Whether to honor recorded timestamps for ordering",
Expand Down Expand Up @@ -1016,11 +1030,12 @@ droption_t<bool> op_sched_infinite_timeouts(
"(set to false).");

droption_t<double> op_sched_time_units_per_us(
DROPTION_SCOPE_ALL, "sched_time_units_per_us", 100.,
DROPTION_SCOPE_ALL, "sched_time_units_per_us", 1000.,
"Time units per simulated microsecond",
"Time units (currently wall-clock time) per simulated microsecond. This scales all "
"of the -sched_*_us values as it converts wall-clock time into the simulated "
"microseconds measured by those options.");
"Time units per simulated microsecond. The units are either the instruction count "
"plus the idle count (the default) or if -sched_time is selected wall-clock "
"microseconds. This option value scales all of the -sched_*_us values as it "
"converts time units into the simulated microseconds measured by those options.");

droption_t<uint64_t> op_sched_migration_threshold_us(
DROPTION_SCOPE_ALL, "sched_migration_threshold_us", 500,
Expand All @@ -1034,6 +1049,18 @@ droption_t<uint64_t> op_sched_rebalance_period_us(
"The period in simulated microseconds at which per-core run queues are re-balanced "
"to redistribute load.");

droption_t<double> op_sched_exit_if_fraction_inputs_left(
DROPTION_SCOPE_FRONTEND, "sched_exit_if_fraction_inputs_left", 0.1,
"Exit if non-EOF inputs left are <= this fraction of the total",
"Applies to -core_sharded and -core_serial. When an input reaches EOF, if the "
"number of non-EOF inputs left as a fraction of the original inputs is equal to or "
"less than this value then the scheduler exits (sets all outputs to EOF) rather than "
"finishing off the final inputs. This helps avoid long sequences of idles during "
"staggered endings with fewer inputs left than cores and only a small fraction of "
"the total instructions left in those inputs. Since the remaining instruction "
"count is not considered (as it is not available), use discretion when raising "
"this value on uneven inputs.");

// Schedule_stats options.
droption_t<uint64_t>
op_schedule_stats_print_every(DROPTION_SCOPE_ALL, "schedule_stats_print_every",
Expand Down Expand Up @@ -1099,6 +1126,15 @@ droption_t<std::string>
"for the listed function IDs and removes those belonging to "
"unlisted function IDs.");

droption_t<std::string> op_modify_marker_value(
DROPTION_SCOPE_FRONTEND, "filter_modify_marker_value", "",
"Comma-separated pairs of integers representing <TRACE_MARKER_TYPE_, new_value>.",
"This option is for -tool " RECORD_FILTER ". It modifies the value of all listed "
"TRACE_MARKER_TYPE_ markers in the trace with their corresponding new_value. "
"The list must have an even size. Example: -filter_modify_marker_value 3,24,18,2048 "
"sets all TRACE_MARKER_TYPE_CPU_ID == 3 in the trace to core 24 and "
"TRACE_MARKER_TYPE_PAGE_SIZE == 18 to 2k.");

droption_t<uint64_t> op_trim_before_timestamp(
DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0,
(std::numeric_limits<uint64_t>::max)(),
Expand Down
Loading

0 comments on commit 971b2d1

Please sign in to comment.