Skip to content

Commit

Permalink
do not count profiler init in total profile time
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisChourakiSonos committed Dec 9, 2024
1 parent 2c18740 commit 66c131e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
21 changes: 11 additions & 10 deletions libcli/src/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,14 @@ pub fn profile(
&state.session_state.resolved_symbols,
)?;
session_handler.before_plan_eval(&mut state.session_state)?;

let start = crate::time::now();
while iters < bench_limits.max_loops && start.elapsed() < bench_limits.max_time {
rec_profiler_metal(&mut state, dg, inputs, &prefix)?;
let mut entire = Duration::default();
while iters < bench_limits.max_loops && entire < bench_limits.max_time {
entire += rec_profiler_metal(&mut state, dg, inputs, &prefix)?.1;

iters += 1;
}

let entire = start.elapsed();

session_handler.after_plan_eval(&mut state.session_state)?;
entire
}
Expand Down Expand Up @@ -141,13 +140,14 @@ pub fn rec_profiler_metal(
dg: &mut Annotations,
inputs: &TVec<TValue>,
prefix: &[(usize, String)],
) -> TractResult<TVec<TValue>> {
) -> TractResult<(TVec<TValue>, Duration)> {
tract_metal::METAL_CONTEXT.with_borrow(|ctxt| {
let (mut cpu_start, mut gpu_start): (u64, u64) = (0, 0);
ctxt.device().sample_timestamps(&mut cpu_start, &mut gpu_start);

let n_nodes = state.plan().model().nodes_len();
let (result, profiler) = ctxt.profile(n_nodes, || {
let (result, eval_dur, profiler) = ctxt.profile(n_nodes, || {
let profile_start = crate::time::now();
let r = state.run_plan_with_eval(
inputs.clone(),
|session_state, mut node_state, node, input| {
Expand All @@ -166,7 +166,8 @@ pub fn rec_profiler_metal(
res
},
)?;
Ok(r)

Ok((r, profile_start.elapsed()))
})?;

let (mut cpu_end, mut gpu_end): (u64, u64) = (0, 0);
Expand All @@ -180,7 +181,7 @@ pub fn rec_profiler_metal(
));
});

Ok(result)
Ok((result, eval_dur))
})
}

Expand Down
10 changes: 5 additions & 5 deletions metal/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::path::Path;
use std::rc::Rc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, OnceLock, RwLock};
use std::time::Duration;

use anyhow::{anyhow, Context, Result};
use metal::{
Expand Down Expand Up @@ -323,9 +324,9 @@ impl MetalContext {
&self,
num_nodes: usize,
eval: EvalCallback,
) -> TractResult<(TVec<TValue>, Vec<u64>)>
) -> TractResult<(TVec<TValue>, Duration, Vec<u64>)>
where
EvalCallback: FnOnce() -> TractResult<TVec<TValue>>,
EvalCallback: FnOnce() -> TractResult<(TVec<TValue>, Duration)>,
{
self.wait_until_completed()?;

Expand All @@ -336,14 +337,13 @@ impl MetalContext {

self.profiler.replace(Some(profiler.clone()));

let output = eval()?;

let (output, eval_duration) = eval()?;
let profile_buffers = profiler.borrow_mut().get_profile_data();

self.profiler.replace(None);
self.wait_until_completed()?;

Ok((output, profile_buffers))
Ok((output, eval_duration, profile_buffers))
}
}

Expand Down

0 comments on commit 66c131e

Please sign in to comment.