Skip to content

Commit

Permalink
Fixes draios#1696 - Sysdig capture running endless while using filters
Browse files Browse the repository at this point in the history
The timeout check was being triggered only by filter-matched events
which meant a timeout could possibly never be observed. This was changed
to introduce an optional system that allows people to specify whether
they want the clock to be triggered by any packet (a) or a filtered
match (f). They can specify this optionally after the -M argument.

If a user uses a number with no argument, backwards compatibility is
maintained (which is 'ff'). Otherwise they can specify either 1 or
2 letters such as, for example "15aa" or "12af" or just "11a".

The help text has been modified to talk about these new options.

sysdig-CLA-1.0-signed-off-by: Christopher McKenzie
<[email protected]>
  • Loading branch information
kristopolous committed Oct 18, 2020
1 parent c4f0960 commit 4c5149b
Showing 1 changed file with 81 additions and 14 deletions.
95 changes: 81 additions & 14 deletions userspace/sysdig/sysdig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,10 @@ static void usage()
" Marathon url is optional and defaults to Mesos address, port 8080.\n"
" The API servers can also be specified via the environment variable\n"
" SYSDIG_MESOS_API.\n"
" -M <num_seconds> Stop collecting after <num_seconds> reached.\n"
" -M <num_seconds>[a|f][a|f]\n"
" Stop collecting after <num_seconds> reached. The two optional arguments\n"
" are whether to start and end the clock after any event (a) or the first\n"
" event that matches the filter (f). The default when unspecified is ff.\n"
" -n <num>, --numevents=<num>\n"
" Stop capturing after <num> events\n"
" --page-faults Capture user/kernel major/minor page faults\n"
Expand Down Expand Up @@ -575,6 +578,7 @@ vector<string> split_nextrun_args(string na)
captureinfo do_inspect(sinsp* inspector,
uint64_t cnt,
uint64_t duration_to_tot_ns,
char *duration_flags,
bool quiet,
bool json,
bool do_flush,
Expand Down Expand Up @@ -613,13 +617,44 @@ captureinfo do_inspect(sinsp* inspector,

if(res == SCAP_TIMEOUT)
{
if(ev != NULL && ev->is_filtered_out())
if(ev != NULL)
{
//
// The event has been dropped by the filtering system.
// Give the chisels a chance to run their timeout logic.
// See #1696 - Sysdig capture running endless while using filters
// Previously this check was done after a filter which meant that
// we did not observe the duration rule.
//
chisels_do_timeout(ev);
// The choice of this compound if statement is intentional. We want
// this to effectively only "have cost" if we're using the -M option
//
// Note that we may set duration_start here or below depending on
// the invocation of the argument.
//
if(duration_to_tot_ns > 0)
{
if(duration_flags[0] == 'a' && duration_start == 0)
{
duration_start = ev->get_ts();
}

if(duration_flags[1] == 'a' && duration_start != 0)
{
if(ev->get_ts() - duration_start >= duration_to_tot_ns)
{
handle_end_of_file(print_progress, formatter);
break;
}
}
}

if(ev->is_filtered_out())
{
//
// The event has been dropped by the filtering system.
// Give the chisels a chance to run their timeout logic.
//
chisels_do_timeout(ev);
}
}

continue;
Expand All @@ -640,17 +675,37 @@ captureinfo do_inspect(sinsp* inspector,
throw sinsp_exception(inspector->getlasterr().c_str());
}

if (duration_start == 0)
{
duration_start = ev->get_ts();
} else if(duration_to_tot_ns > 0)
//
// Prior to the patch for #1696 - Sysdig capture running endless while using filters
// The duration_start would be set at the first match. This previously undocumented
// feature (it is now documented in the help text that can be seen in the commit hunk
// associated with this line of text) is useful because it allows someone to start up
// sysdig, then run their tests and not have to worry about the timing delay between
// those two events before the -M countdown timer begins.
//
// This likely unintended side-effect of the initial implementation has been preserved
// to accommodate people who have likely built workflows based on this.
//
// Additionally the user can override this by adorning the number after the -M argument
// with 2 letters (as documented) to specify either 'a'll or 'f'irst match with respect
// to the beginning and end of the duration specified.
//
if(duration_to_tot_ns > 0)
{
if(ev->get_ts() - duration_start >= duration_to_tot_ns)
if(duration_flags[0] == 'f' && duration_start == 0)
{
handle_end_of_file(print_progress, formatter);
break;
duration_start = ev->get_ts();
}
if(duration_flags[1] == 'f' && duration_start != 0)
{
if(ev->get_ts() - duration_start >= duration_to_tot_ns)
{
handle_end_of_file(print_progress, formatter);
break;
}
}
}

retval.m_nevts++;

if(print_progress)
Expand Down Expand Up @@ -773,6 +828,7 @@ sysdig_init_res sysdig_init(int argc, char **argv)
sinsp_filter* display_filter = NULL;
double duration = 1;
int duration_to_tot = 0;
char duration_flags[] = "ff";
captureinfo cinfo;
string output_format;
uint32_t snaplen = 0;
Expand Down Expand Up @@ -1034,13 +1090,23 @@ sysdig_init_res sysdig_init(int argc, char **argv)
break;
#endif // MINIMAL_BUILD
case 'M':
duration_to_tot = atoi(optarg);
sscanf(optarg, "%d%2c", &duration_to_tot, duration_flags);
if(duration_to_tot <= 0)
{
throw sinsp_exception(string("invalid duration") + optarg);
throw sinsp_exception(string("invalid duration ") + optarg);
res.m_res = EXIT_FAILURE;
goto exit;
}

if( !(duration_flags[0] == 'a' || duration_flags[0] == 'f') ||
!(duration_flags[1] == 'a' || duration_flags[1] == 'f')
)
{
throw sinsp_exception(string("invalid duration positioning ") + optarg);
res.m_res = EXIT_FAILURE;
goto exit;
}

break;
case 'n':
try
Expand Down Expand Up @@ -1627,6 +1693,7 @@ sysdig_init_res sysdig_init(int argc, char **argv)
cinfo = do_inspect(inspector,
cnt,
uint64_t(duration_to_tot*ONE_SECOND_IN_NS),
duration_flags,
quiet,
jflag,
unbuf_flag,
Expand Down

0 comments on commit 4c5149b

Please sign in to comment.