Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add per-Task cpu time metric (#56320) #194

Merged
merged 2 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,33 @@
#end

#mutable struct Task
# parent::Task
# next::Any
# queue::Any
# storage::Any
# state::Symbol
# donenotify::Any
# result::Any
# exception::Any
# backtrace::Any
# logstate::Any
# scope::Any
# code::Any
# @atomic _state::UInt8
# sticky::UInt8
# priority::UInt16
# @atomic _isexception::UInt8
# pad00::UInt8
# pad01::UInt8
# pad02::UInt8
# rngState0::UInt64
# rngState1::UInt64
# rngState2::UInt64
# rngState3::UInt64
# rngState4::UInt64
# const metrics_enabled::Bool
# pad10::UInt8
# pad11::UInt8
# pad12::UInt8
# @atomic first_enqueued_at::UInt64
# @atomic last_started_running_at::UInt64
# @atomic running_time_ns::UInt64
# @atomic finished_at::UInt64
#end

export
Expand Down
74 changes: 74 additions & 0 deletions base/experimental.jl
Original file line number Diff line number Diff line change
Expand Up @@ -368,4 +368,78 @@ adding them to the global method table.
"""
:@MethodTable

### Task metrics

"""
Base.Experimental.task_metrics(::Bool)

Enable or disable the collection of per-task metrics.
A `Task` created when `Base.Experimental.task_metrics(true)` is in effect will have
[`Base.Experimental.task_running_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref)
timing information available.

!!! note
Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option.
"""
function task_metrics(b::Bool)
if b
ccall(:jl_task_metrics_enable, Cvoid, ())
else
ccall(:jl_task_metrics_disable, Cvoid, ())
end
return nothing
end

"""
Base.Experimental.task_running_time_ns(t::Task) -> Union{UInt64, Nothing}

Return the total nanoseconds that the task `t` has spent running.
This metric is only updated when `t` yields or completes unless `t` is the current task, in
which it will be updated continuously.
See also [`Base.Experimental.task_wall_time_ns`](@ref).

Returns `nothing` if task timings are not enabled.
See [`Base.Experimental.task_metrics`](@ref).

!!! note "This metric is from the Julia scheduler"
A task may be running on an OS thread that is descheduled by the OS
scheduler, this time still counts towards the metric.

!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_running_time_ns(t::Task=current_task())
t.metrics_enabled || return nothing
if t == current_task()
# These metrics fields can't update while we're running.
# But since we're running we need to include the time since we last started running!
return t.running_time_ns + (time_ns() - t.last_started_running_at)
else
return t.running_time_ns
end
end

"""
Base.Experimental.task_wall_time_ns(t::Task) -> Union{UInt64, Nothing}

Return the total nanoseconds that the task `t` was runnable.
This is the time since the task first entered the run queue until the time at which it
completed, or until the current time if the task has not yet completed.
See also [`Base.Experimental.task_running_time_ns`](@ref).

Returns `nothing` if task timings are not enabled.
See [`Base.Experimental.task_metrics`](@ref).

!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_wall_time_ns(t::Task=current_task())
t.metrics_enabled || return nothing
start_at = t.first_enqueued_at
start_at == 0 && return UInt64(0)
end_at = t.finished_at
end_at == 0 && return time_ns() - start_at
return end_at - start_at
end

end # module
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct JLOptions
heap_size_hint::UInt64
trace_compile_timing::Int8
safe_crash_log_file::Ptr{UInt8}
task_metrics::Int8
end

# This runs early in the sysimage != is not defined yet
Expand Down
53 changes: 50 additions & 3 deletions base/task.jl
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,11 @@ function enq_work(t::Task)
return t
end

schedule(t::Task) = enq_work(t)
function schedule(t::Task)
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
enq_work(t)
end

"""
schedule(t::Task, [val]; error=false)
Expand Down Expand Up @@ -857,6 +861,8 @@ function schedule(t::Task, @nospecialize(arg); error=false)
t.queue === nothing || Base.error("schedule: Task not runnable")
setfield!(t, :result, arg)
end
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
enq_work(t)
return t
end
Expand Down Expand Up @@ -888,9 +894,15 @@ A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
immediately yields to `t` before calling the scheduler.
"""
function yield(t::Task, @nospecialize(x=nothing))
(t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
ct = current_task()
t === ct && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
(t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
# [task] user_time -yield-> wait_time
record_running_time!(ct)
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
t.result = x
enq_work(current_task())
enq_work(ct)
set_next_task(t)
return try_yieldto(ensure_rescheduled)
end
Expand All @@ -904,13 +916,18 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
or scheduling in any way. Its use is discouraged.
"""
function yieldto(t::Task, @nospecialize(x=nothing))
ct = current_task()
# TODO: these are legacy behaviors; these should perhaps be a scheduler
# state error instead.
if t._state === task_state_done
return x
elseif t._state === task_state_failed
throw(t.result)
end
# [task] user_time -yield-> wait_time
record_running_time!(ct)
# [task] created -scheduled-unfairly-> wait_time
maybe_record_enqueued!(t)
t.result = x
set_next_task(t)
return try_yieldto(identity)
Expand All @@ -924,6 +941,10 @@ function try_yieldto(undo)
rethrow()
end
ct = current_task()
# [task] wait_time -(re)started-> user_time
if ct.metrics_enabled
@atomic :monotonic ct.last_started_running_at = time_ns()
end
if ct._isexception
exc = ct.result
ct.result = nothing
Expand All @@ -937,6 +958,11 @@ end

# yield to a task, throwing an exception in it
function throwto(t::Task, @nospecialize exc)
ct = current_task()
# [task] user_time -yield-> wait_time
record_running_time!(ct)
# [task] created -scheduled-unfairly-> wait_time
maybe_record_enqueued!(t)
t.result = exc
t._isexception = true
set_next_task(t)
Expand Down Expand Up @@ -989,6 +1015,9 @@ checktaskempty = Partr.multiq_check_empty
end

function wait()
ct = current_task()
# [task] user_time -yield-or-done-> wait_time
record_running_time!(ct)
GC.safepoint()
W = workqueue_for(Threads.threadid())
poptask(W)
Expand All @@ -1003,3 +1032,21 @@ if Sys.iswindows()
else
pause() = ccall(:pause, Cvoid, ())
end

# update the `running_time_ns` field of `t` to include the time since it last started running.
function record_running_time!(t::Task)
if t.metrics_enabled && !istaskdone(t)
@atomic :monotonic t.running_time_ns += time_ns() - t.last_started_running_at
end
return t
end

# if this is the first time `t` has been added to the run queue
# (or the first time it has been unfairly yielded to without being added to the run queue)
# then set the `first_enqueued_at` field to the current time.
function maybe_record_enqueued!(t::Task)
if t.metrics_enabled && t.first_enqueued_at == 0
@atomic :monotonic t.first_enqueued_at = time_ns()
end
return t
end
4 changes: 4 additions & 0 deletions doc/man/julia.1
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,10 @@ Print precompile statements for methods compiled during execution or save to a p
--trace-compile-timing=
If --trace-compile is enabled show how long each took to compile in ms

.TP
--task-metrics={yes|no*}
Enable the collection of per-task metrics.

.TP
-image-codegen
Force generate code in imaging mode
Expand Down
8 changes: 8 additions & 0 deletions doc/src/base/multi-threading.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,11 @@ These building blocks are used to create the regular synchronization objects.
```@docs
Base.Threads.SpinLock
```

## Task metrics (Experimental)

```@docs
Base.Experimental.task_metrics
Base.Experimental.task_running_time_ns
Base.Experimental.task_wall_time_ns
```
1 change: 1 addition & 0 deletions doc/src/manual/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ The following is a complete list of command-line switches available when launchi
|`--code-coverage=tracefile.info` |Append coverage information to the LCOV tracefile (filename supports format tokens).|
|`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
|`--track-allocation=@<path>` |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
|`--task-metrics={yes\|no*}` |Enable the collection of per-task metrics|
|`--bug-report=KIND` |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
|`--compile={yes*\|no\|all\|min}` |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
|`--output-o <name>` |Generate an object file (including system image data)|
Expand Down
4 changes: 4 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
#pragma GCC diagnostic ignored "-Wdangling-pointer"
#endif
if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) {
// enable before creating the root task so it gets timings too.
jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
}
// warning: this changes `jl_current_task`, so be careful not to call that from this function
jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
#pragma GCC diagnostic pop
Expand Down
30 changes: 30 additions & 0 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,36 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

/**
* @brief Enable per-task timing.
*/
JL_DLLEXPORT void jl_task_metrics_enable(void)
{
// Increment the flag to allow reentrant callers.
jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
}

/**
* @brief Disable per-task timing.
*/
JL_DLLEXPORT void jl_task_metrics_disable(void)
{
// Prevent decrementing the counter below zero
uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled);
while (enabled > 0) {
if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1))
break;
}
}

/**
* @brief Retrieve floating-point environment constants.
*
* Populates an array with constants related to the floating-point environment,
* such as rounding modes and exception flags.
*
* @param ret An array of integers to be populated with floating-point environment constants.
*/
JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
{
ret[0] = FE_INEXACT;
Expand Down
38 changes: 27 additions & 11 deletions src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ JL_DLLEXPORT void jl_init_options(void)
0, // heap-size-hint
0, // trace_compile_timing
NULL, // safe_crash_log_file
0, // task_metrics
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -206,17 +207,22 @@ static const char opts_hidden[] =
" --strip-metadata Remove docstrings and source location info from system image\n"
" --strip-ir Remove IR (intermediate representation) of compiled functions\n\n"

// compiler debugging (see the devdocs for tips on using these options)
" --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
" --output-bc <name> Generate LLVM bitcode (.bc)\n"
" --output-asm <name> Generate an assembly file (.s)\n"
" --output-incremental={yes|no*}\n"
" Generate an incremental output file (rather than complete)\n"
" --trace-compile={stderr,name}\n"
" Print precompile statements for methods compiled during execution or save to a path\n"
" --trace-compile-timing If --trace-compile is enabled show how long each took to compile in ms\n"
" --image-codegen Force generate code in imaging mode\n"
" --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
// compiler debugging and experimental (see the devdocs for tips on using these options)
" --experimental Enable the use of experimental (alpha) features\n"
" --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
" --output-bc <name> Generate LLVM bitcode (.bc)\n"
" --output-asm <name> Generate an assembly file (.s)\n"
" --output-incremental={yes|no*} Generate an incremental output file (rather than\n"
" complete)\n"
" --trace-compile={stderr|name} Print precompile statements for methods compiled\n"
" during execution or save to stderr or a path. Methods that\n"
" were recompiled are printed in yellow or with a trailing\n"
" comment if color is not supported\n"
" --trace-compile-timing If --trace-compile is enabled show how long each took to\n"
" compile in ms\n"
" --task-metrics={yes|no*} Enable collection of per-task timing data.\n"
" --image-codegen Force generate code in imaging mode\n"
" --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
;

JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
Expand All @@ -239,6 +245,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_trace_compile,
opt_trace_compile_timing,
opt_trace_dispatch,
opt_task_metrics,
opt_math_mode,
opt_worker,
opt_bind_to,
Expand Down Expand Up @@ -317,6 +324,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "trace-compile", required_argument, 0, opt_trace_compile },
{ "trace-compile-timing", no_argument, 0, opt_trace_compile_timing },
{ "trace-dispatch", required_argument, 0, opt_trace_dispatch },
{ "task-metrics", required_argument, 0, opt_task_metrics },
{ "math-mode", required_argument, 0, opt_math_mode },
{ "handle-signals", required_argument, 0, opt_handle_signals },
// hidden command line options
Expand Down Expand Up @@ -873,6 +881,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
if (jl_options.safe_crash_log_file == NULL)
jl_error("julia: failed to allocate memory for --safe-crash-log-file");
break;
case opt_task_metrics:
if (!strcmp(optarg, "no"))
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF;
else if (!strcmp(optarg, "yes"))
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON;
else
jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
break;
default:
jl_errorf("julia: unhandled option -- %c\n"
"This is a bug, please report it.", c);
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct {
uint64_t heap_size_hint;
int8_t trace_compile_timing;
const char *safe_crash_log_file;
int8_t task_metrics;
} jl_options_t;

#endif
Loading
Loading