Skip to content

Commit

Permalink
Move all regex building into App::Ack
Browse files Browse the repository at this point in the history
  • Loading branch information
petdance committed Nov 30, 2024
1 parent 9d80d0b commit bb68114
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 118 deletions.
122 changes: 6 additions & 116 deletions ack
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ MAIN: {
}

if ( $opt_range_start ) {
($opt_range_start, undef) = build_regex( $opt_range_start, {} );
($opt_range_start, undef) = App::Ack::build_regex( $opt_range_start, {} );
}
if ( $opt_range_end ) {
($opt_range_end, undef) = build_regex( $opt_range_end, {} );
($opt_range_end, undef) = App::Ack::build_regex( $opt_range_end, {} );
}
$using_ranges = $opt_range_start || $opt_range_end;

Expand Down Expand Up @@ -211,8 +211,8 @@ MAIN: {
if ( $App::Ack::is_filter_mode && !$opt->{files_from} ) { # probably -x
$files = App::Ack::Files->from_stdin();
$opt_regex //= shift @ARGV;
($search_re, $scan_re) = build_regex( $opt_regex, $opt );
$search_not_re = _build_search_not_re( $opt );
($search_re, $scan_re) = App::Ack::build_regex( $opt_regex, $opt );
$search_not_re = App::Ack::build_search_not_re( $opt );
$stats{search_re} = $search_re;
$stats{scan_re} = $scan_re;
$stats{search_not_re} = $search_not_re;
Expand All @@ -223,8 +223,8 @@ MAIN: {
}
else {
$opt_regex //= shift @ARGV;
($search_re, $scan_re) = build_regex( $opt_regex, $opt );
$search_not_re = _build_search_not_re( $opt );
($search_re, $scan_re) = App::Ack::build_regex( $opt_regex, $opt );
$search_not_re = App::Ack::build_search_not_re( $opt );
$stats{search_re} = $search_re;
$stats{scan_re} = $scan_re;
$stats{search_not_re} = $search_not_re;
Expand Down Expand Up @@ -528,98 +528,6 @@ sub get_file_id {
}


# Returns a regex object based on a string and command-line options.
# Dies when the regex $str is undefined (i.e. not given on command line).

sub build_regex {
my $str = shift;
my $opt = shift;

defined $str or App::Ack::die( 'No regular expression found.' );

if ( !$opt->{Q} ) {
# Compile the regex to see if it dies or throws warnings.
local $SIG{__WARN__} = sub { die @_ }; # Anything that warns becomes a die.
my $scratch_regex = eval { qr/$str/ };
if ( not $scratch_regex ) {
my $err = $@;
chomp $err;

if ( $err =~ m{^(.+?); marked by <-- HERE in m/(.+?) <-- HERE} ) {
my ($why, $where) = ($1,$2);
my $pointy = ' ' x (6+length($where)) . '^---HERE';
App::Ack::die( "Invalid regex '$str'\nRegex: $str\n$pointy $why" );
}
else {
App::Ack::die( "Invalid regex '$str'\n$err" );
}
}
}

# Check for lowercaseness before we do any modifications.
my $regex_is_lc = App::Ack::is_lowercase( $str );

$str = quotemeta( $str ) if $opt->{Q};

my $scan_str = $str;

# Whole words only.
if ( $opt->{w} ) {
my $ok = 1;

if ( $str =~ /^\\[wd]/ ) {
# Explicit \w is good.
}
else {
# Can start with \w, (, [ or dot.
if ( $str !~ /^[\w\(\[\.]/ ) {
$ok = 0;
}
}

# Can end with \w, }, ), ], +, *, or dot.
if ( $str !~ /[\w\}\)\]\+\*\?\.]$/ ) {
$ok = 0;
}
# ... unless it's escaped.
elsif ( $str =~ /\\[\}\)\]\+\*\?\.]$/ ) {
$ok = 0;
}

if ( !$ok ) {
App::Ack::die( '-w will not do the right thing if your regex does not begin and end with a word character.' );
}

if ( $str =~ /^\w+$/ ) {
# No need for fancy regex if it's a simple word.
$str = sprintf( '\b(?:%s)\b', $str );
}
else {
$str = sprintf( '(?:^|\b|\s)\K(?:%s)(?=\s|\b|$)', $str );
}
}

if ( $opt->{i} || ($opt->{S} && $regex_is_lc) ) {
$_ = "(?i)$_" for ( $str, $scan_str );
}

my $scan_regex = undef;
my $regex = eval { qr/$str/ };
if ( $regex ) {
if ( $scan_str !~ /\$/ ) {
# No line_scan is possible if there's a $ in the regex.
$scan_regex = eval { qr/$scan_str/m };
}
}
else {
my $err = $@;
chomp $err;
App::Ack::die( "Invalid regex '$str':\n $err" );
}

return ($regex, $scan_regex);
}

my $match_colno;

{
Expand Down Expand Up @@ -1198,24 +1106,6 @@ sub range_setup {
}


sub _build_search_not_re {
my $opt = shift;

my @not = @{$opt->{not}};

if ( @not ) {
my @built;
for my $re ( @not ) {
my ($built,undef) = build_regex( $re, $opt );
push( @built, $built );
}
return join( '|', @built );
}

return;
}


=pod
=encoding UTF-8
Expand Down
112 changes: 112 additions & 0 deletions lib/App/Ack.pm
Original file line number Diff line number Diff line change
Expand Up @@ -734,4 +734,116 @@ sub is_lowercase {
}


# Returns a regex object based on a string and command-line options.
# Dies when the regex $str is undefined (i.e. not given on command line).

sub build_regex {
my $str = shift;
my $opt = shift;

defined $str or App::Ack::die( 'No regular expression found.' );

if ( !$opt->{Q} ) {
# Compile the regex to see if it dies or throws warnings.
local $SIG{__WARN__} = sub { App::Ack::die @_ }; # Anything that warns becomes a die.
my $scratch_regex = eval { qr/$str/ };
if ( not $scratch_regex ) {
my $err = $@;
chomp $err;

if ( $err =~ m{^(.+?); marked by <-- HERE in m/(.+?) <-- HERE} ) {
my ($why, $where) = ($1,$2);
my $pointy = ' ' x (6+length($where)) . '^---HERE';
App::Ack::die( "Invalid regex '$str'\nRegex: $str\n$pointy $why" );
}
else {
App::Ack::die( "Invalid regex '$str'\n$err" );
}
}
}

# Check for lowercaseness before we do any modifications.
my $regex_is_lc = App::Ack::is_lowercase( $str );

$str = quotemeta( $str ) if $opt->{Q};

my $scan_str = $str;

# Whole words only.
if ( $opt->{w} ) {
my $ok = 1;

if ( $str =~ /^\\[wd]/ ) {
# Explicit \w is good.
}
else {
# Can start with \w, (, [ or dot.
if ( $str !~ /^[\w\(\[\.]/ ) {
$ok = 0;
}
}

# Can end with \w, }, ), ], +, *, or dot.
if ( $str !~ /[\w\}\)\]\+\*\?\.]$/ ) {
$ok = 0;
}
# ... unless it's escaped.
elsif ( $str =~ /\\[\}\)\]\+\*\?\.]$/ ) {
$ok = 0;
}

if ( !$ok ) {
App::Ack::die( '-w will not do the right thing if your regex does not begin and end with a word character.' );
}

if ( $str =~ /^\w+$/ ) {
# No need for fancy regex if it's a simple word.
$str = sprintf( '\b(?:%s)\b', $str );
}
else {
$str = sprintf( '(?:^|\b|\s)\K(?:%s)(?=\s|\b|$)', $str );
}
}

if ( $opt->{i} || ($opt->{S} && $regex_is_lc) ) {
$_ = "(?i)$_" for ( $str, $scan_str );
}

my $scan_regex = undef;
my $regex = eval { qr/$str/ };
if ( $regex ) {
if ( $scan_str !~ /\$/ ) {
# No line_scan is possible if there's a $ in the regex.
$scan_regex = eval { qr/$scan_str/m };
}
}
else {
my $err = $@;
chomp $err;
App::Ack::die( "Invalid regex '$str':\n $err" );
}

return ($regex, $scan_regex);
}


sub build_search_not_re {
my $opt = shift;

my @not = @{$opt->{not}};

if ( @not ) {
my @built;
for my $re ( @not ) {
my ($built,undef) = App::Ack::build_regex( $re, $opt );
push( @built, $built );
}
return join( '|', @built );
}

return;
}



1; # End of App::Ack
8 changes: 6 additions & 2 deletions tags
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,14 @@ MY Makefile.PL /^package MY;$/;" p
MY Makefile.PL /^sub MY::postamble {$/;" s
MY Makefile.PL /^sub MY::test {$/;" s
NEEDS_LINE_SCAN t/needs-line-scan.t /^NEEDS_LINE_SCAN: {$/;" l
NEVERMORE_NOT_QUOTH t/boolean.t /^NEVERMORE_NOT_QUOTH: {$/;" l
NEVERMORE_NOT_QUOTH_NOT_THE t/boolean.t /^NEVERMORE_NOT_QUOTH_NOT_THE: {$/;" l
NOIGNORE_DIR_RELATIVE_PATHS t/ack-ignore-dir.t /^NOIGNORE_DIR_RELATIVE_PATHS: {$/;" l
NOIGNORE_SUBDIR_WINS t/ack-ignore-dir.t /^NOIGNORE_SUBDIR_WINS: {$/;" l
NORMAL_CASE t/ack-v.t /^NORMAL_CASE: {$/;" l
NORMAL_COLOR t/ack-color.t /^NORMAL_COLOR: {$/;" l
NOT t/ack-c.t /^NOT: {$/;" l
NO_BOOLEANS t/boolean.t /^NO_BOOLEANS: {$/;" l
NO_GROUPING t/ack-group.t /^NO_GROUPING: {$/;" l
NO_O t/ack-o.t /^NO_O: {$/;" l
NO_PAGER t/ack-pager.t /^NO_PAGER: {$/;" l
Expand All @@ -180,6 +183,7 @@ POSTMATCH t/ack-output.t /^POSTMATCH: {$/;" l
POSTMATCH_MULTIPLE_FILES t/ack-output.t /^POSTMATCH_MULTIPLE_FILES: {$/;" l
PREMATCH t/ack-output.t /^PREMATCH: {$/;" l
PREMATCH_MULTIPLE_FILES t/ack-output.t /^PREMATCH_MULTIPLE_FILES: {$/;" l
QUOTH_NOT_NEVERMORE t/boolean.t /^QUOTH_NOT_NEVERMORE: {$/;" l
REQUIRE_F_OR_G t/ack-show-types.t /^REQUIRE_F_OR_G: {$/;" l
RESTRICTED_DIRECTORIES t/ack-s.t /^RESTRICTED_DIRECTORIES: {$/;" l
RUBY_AND_RAKE t/ack-show-types.t /^RUBY_AND_RAKE: {$/;" l
Expand Down Expand Up @@ -220,7 +224,6 @@ WITH_SWITCHES_MULTIPLE_FILES t/basic.t /^WITH_SWITCHES_MULTIPLE_FILES: {$/;" l
WITH_SWITCHES_ONE_FILE t/ack-h.t /^WITH_SWITCHES_ONE_FILE: {$/;" l
WITH_SWITCHES_ONE_FILE t/basic.t /^WITH_SWITCHES_ONE_FILE: {$/;" l
_big_split t/lowercase.t /^sub _big_split {$/;" s
_build_search_not_re ack /^sub _build_search_not_re {$/;" s
_check_command_for_taintedness t/Util.pm /^sub _check_command_for_taintedness {$/;" s
_check_for_ackrc lib/App/Ack/ConfigFinder.pm /^sub _check_for_ackrc {$/;" s
_check_for_mutex_options lib/App/Ack/ConfigLoader.pm /^sub _check_for_mutex_options {$/;" s
Expand Down Expand Up @@ -280,7 +283,8 @@ bar t/range/rangefile.pm /^sub bar {$/;" s
basename lib/App/Ack/File.pm /^sub basename {$/;" s
bisect dev/find-minimum-dep-versions.pl /^sub bisect (&@) {$/;" s
build_ack_invocation t/Util.pm /^sub build_ack_invocation {$/;" s
build_regex ack /^sub build_regex {$/;" s
build_regex lib/App/Ack.pm /^sub build_regex {$/;" s
build_search_not_re lib/App/Ack.pm /^sub build_search_not_re {$/;" s
caret_X t/Util.pm /^sub caret_X {$/;" s
cathy lib/App/Ack.pm /^sub cathy {$/;" s
check_for_option_in_man_output xt/man.t /^sub check_for_option_in_man_output {$/;" s
Expand Down

0 comments on commit bb68114

Please sign in to comment.