From bb681149ff4fb78a5c03a8c02f8d7a8b4f44b059 Mon Sep 17 00:00:00 2001 From: Andy Lester Date: Sat, 30 Nov 2024 09:16:04 -0600 Subject: [PATCH] Move all regex building into App::Ack --- ack | 122 +++---------------------------------------------- lib/App/Ack.pm | 112 +++++++++++++++++++++++++++++++++++++++++++++ tags | 8 +++- 3 files changed, 124 insertions(+), 118 deletions(-) diff --git a/ack b/ack index e65ed440..872a85bc 100755 --- a/ack +++ b/ack @@ -162,10 +162,10 @@ MAIN: { } if ( $opt_range_start ) { - ($opt_range_start, undef) = build_regex( $opt_range_start, {} ); + ($opt_range_start, undef) = App::Ack::build_regex( $opt_range_start, {} ); } if ( $opt_range_end ) { - ($opt_range_end, undef) = build_regex( $opt_range_end, {} ); + ($opt_range_end, undef) = App::Ack::build_regex( $opt_range_end, {} ); } $using_ranges = $opt_range_start || $opt_range_end; @@ -211,8 +211,8 @@ MAIN: { if ( $App::Ack::is_filter_mode && !$opt->{files_from} ) { # probably -x $files = App::Ack::Files->from_stdin(); $opt_regex //= shift @ARGV; - ($search_re, $scan_re) = build_regex( $opt_regex, $opt ); - $search_not_re = _build_search_not_re( $opt ); + ($search_re, $scan_re) = App::Ack::build_regex( $opt_regex, $opt ); + $search_not_re = App::Ack::build_search_not_re( $opt ); $stats{search_re} = $search_re; $stats{scan_re} = $scan_re; $stats{search_not_re} = $search_not_re; @@ -223,8 +223,8 @@ MAIN: { } else { $opt_regex //= shift @ARGV; - ($search_re, $scan_re) = build_regex( $opt_regex, $opt ); - $search_not_re = _build_search_not_re( $opt ); + ($search_re, $scan_re) = App::Ack::build_regex( $opt_regex, $opt ); + $search_not_re = App::Ack::build_search_not_re( $opt ); $stats{search_re} = $search_re; $stats{scan_re} = $scan_re; $stats{search_not_re} = $search_not_re; @@ -528,98 +528,6 @@ sub get_file_id { } -# Returns a regex object based on a string and command-line options. -# Dies when the regex $str is undefined (i.e. not given on command line). - -sub build_regex { - my $str = shift; - my $opt = shift; - - defined $str or App::Ack::die( 'No regular expression found.' ); - - if ( !$opt->{Q} ) { - # Compile the regex to see if it dies or throws warnings. - local $SIG{__WARN__} = sub { die @_ }; # Anything that warns becomes a die. - my $scratch_regex = eval { qr/$str/ }; - if ( not $scratch_regex ) { - my $err = $@; - chomp $err; - - if ( $err =~ m{^(.+?); marked by <-- HERE in m/(.+?) <-- HERE} ) { - my ($why, $where) = ($1,$2); - my $pointy = ' ' x (6+length($where)) . '^---HERE'; - App::Ack::die( "Invalid regex '$str'\nRegex: $str\n$pointy $why" ); - } - else { - App::Ack::die( "Invalid regex '$str'\n$err" ); - } - } - } - - # Check for lowercaseness before we do any modifications. - my $regex_is_lc = App::Ack::is_lowercase( $str ); - - $str = quotemeta( $str ) if $opt->{Q}; - - my $scan_str = $str; - - # Whole words only. - if ( $opt->{w} ) { - my $ok = 1; - - if ( $str =~ /^\\[wd]/ ) { - # Explicit \w is good. - } - else { - # Can start with \w, (, [ or dot. - if ( $str !~ /^[\w\(\[\.]/ ) { - $ok = 0; - } - } - - # Can end with \w, }, ), ], +, *, or dot. - if ( $str !~ /[\w\}\)\]\+\*\?\.]$/ ) { - $ok = 0; - } - # ... unless it's escaped. - elsif ( $str =~ /\\[\}\)\]\+\*\?\.]$/ ) { - $ok = 0; - } - - if ( !$ok ) { - App::Ack::die( '-w will not do the right thing if your regex does not begin and end with a word character.' ); - } - - if ( $str =~ /^\w+$/ ) { - # No need for fancy regex if it's a simple word. - $str = sprintf( '\b(?:%s)\b', $str ); - } - else { - $str = sprintf( '(?:^|\b|\s)\K(?:%s)(?=\s|\b|$)', $str ); - } - } - - if ( $opt->{i} || ($opt->{S} && $regex_is_lc) ) { - $_ = "(?i)$_" for ( $str, $scan_str ); - } - - my $scan_regex = undef; - my $regex = eval { qr/$str/ }; - if ( $regex ) { - if ( $scan_str !~ /\$/ ) { - # No line_scan is possible if there's a $ in the regex. - $scan_regex = eval { qr/$scan_str/m }; - } - } - else { - my $err = $@; - chomp $err; - App::Ack::die( "Invalid regex '$str':\n $err" ); - } - - return ($regex, $scan_regex); -} - my $match_colno; { @@ -1198,24 +1106,6 @@ sub range_setup { } -sub _build_search_not_re { - my $opt = shift; - - my @not = @{$opt->{not}}; - - if ( @not ) { - my @built; - for my $re ( @not ) { - my ($built,undef) = build_regex( $re, $opt ); - push( @built, $built ); - } - return join( '|', @built ); - } - - return; -} - - =pod =encoding UTF-8 diff --git a/lib/App/Ack.pm b/lib/App/Ack.pm index 13baa1fc..1c631acb 100644 --- a/lib/App/Ack.pm +++ b/lib/App/Ack.pm @@ -734,4 +734,116 @@ sub is_lowercase { } +# Returns a regex object based on a string and command-line options. +# Dies when the regex $str is undefined (i.e. not given on command line). + +sub build_regex { + my $str = shift; + my $opt = shift; + + defined $str or App::Ack::die( 'No regular expression found.' ); + + if ( !$opt->{Q} ) { + # Compile the regex to see if it dies or throws warnings. + local $SIG{__WARN__} = sub { App::Ack::die @_ }; # Anything that warns becomes a die. + my $scratch_regex = eval { qr/$str/ }; + if ( not $scratch_regex ) { + my $err = $@; + chomp $err; + + if ( $err =~ m{^(.+?); marked by <-- HERE in m/(.+?) <-- HERE} ) { + my ($why, $where) = ($1,$2); + my $pointy = ' ' x (6+length($where)) . '^---HERE'; + App::Ack::die( "Invalid regex '$str'\nRegex: $str\n$pointy $why" ); + } + else { + App::Ack::die( "Invalid regex '$str'\n$err" ); + } + } + } + + # Check for lowercaseness before we do any modifications. + my $regex_is_lc = App::Ack::is_lowercase( $str ); + + $str = quotemeta( $str ) if $opt->{Q}; + + my $scan_str = $str; + + # Whole words only. + if ( $opt->{w} ) { + my $ok = 1; + + if ( $str =~ /^\\[wd]/ ) { + # Explicit \w is good. + } + else { + # Can start with \w, (, [ or dot. + if ( $str !~ /^[\w\(\[\.]/ ) { + $ok = 0; + } + } + + # Can end with \w, }, ), ], +, *, or dot. + if ( $str !~ /[\w\}\)\]\+\*\?\.]$/ ) { + $ok = 0; + } + # ... unless it's escaped. + elsif ( $str =~ /\\[\}\)\]\+\*\?\.]$/ ) { + $ok = 0; + } + + if ( !$ok ) { + App::Ack::die( '-w will not do the right thing if your regex does not begin and end with a word character.' ); + } + + if ( $str =~ /^\w+$/ ) { + # No need for fancy regex if it's a simple word. + $str = sprintf( '\b(?:%s)\b', $str ); + } + else { + $str = sprintf( '(?:^|\b|\s)\K(?:%s)(?=\s|\b|$)', $str ); + } + } + + if ( $opt->{i} || ($opt->{S} && $regex_is_lc) ) { + $_ = "(?i)$_" for ( $str, $scan_str ); + } + + my $scan_regex = undef; + my $regex = eval { qr/$str/ }; + if ( $regex ) { + if ( $scan_str !~ /\$/ ) { + # No line_scan is possible if there's a $ in the regex. + $scan_regex = eval { qr/$scan_str/m }; + } + } + else { + my $err = $@; + chomp $err; + App::Ack::die( "Invalid regex '$str':\n $err" ); + } + + return ($regex, $scan_regex); +} + + +sub build_search_not_re { + my $opt = shift; + + my @not = @{$opt->{not}}; + + if ( @not ) { + my @built; + for my $re ( @not ) { + my ($built,undef) = App::Ack::build_regex( $re, $opt ); + push( @built, $built ); + } + return join( '|', @built ); + } + + return; +} + + + 1; # End of App::Ack diff --git a/tags b/tags index 169c0817..4983f9c9 100644 --- a/tags +++ b/tags @@ -150,11 +150,14 @@ MY Makefile.PL /^package MY;$/;" p MY Makefile.PL /^sub MY::postamble {$/;" s MY Makefile.PL /^sub MY::test {$/;" s NEEDS_LINE_SCAN t/needs-line-scan.t /^NEEDS_LINE_SCAN: {$/;" l +NEVERMORE_NOT_QUOTH t/boolean.t /^NEVERMORE_NOT_QUOTH: {$/;" l +NEVERMORE_NOT_QUOTH_NOT_THE t/boolean.t /^NEVERMORE_NOT_QUOTH_NOT_THE: {$/;" l NOIGNORE_DIR_RELATIVE_PATHS t/ack-ignore-dir.t /^NOIGNORE_DIR_RELATIVE_PATHS: {$/;" l NOIGNORE_SUBDIR_WINS t/ack-ignore-dir.t /^NOIGNORE_SUBDIR_WINS: {$/;" l NORMAL_CASE t/ack-v.t /^NORMAL_CASE: {$/;" l NORMAL_COLOR t/ack-color.t /^NORMAL_COLOR: {$/;" l NOT t/ack-c.t /^NOT: {$/;" l +NO_BOOLEANS t/boolean.t /^NO_BOOLEANS: {$/;" l NO_GROUPING t/ack-group.t /^NO_GROUPING: {$/;" l NO_O t/ack-o.t /^NO_O: {$/;" l NO_PAGER t/ack-pager.t /^NO_PAGER: {$/;" l @@ -180,6 +183,7 @@ POSTMATCH t/ack-output.t /^POSTMATCH: {$/;" l POSTMATCH_MULTIPLE_FILES t/ack-output.t /^POSTMATCH_MULTIPLE_FILES: {$/;" l PREMATCH t/ack-output.t /^PREMATCH: {$/;" l PREMATCH_MULTIPLE_FILES t/ack-output.t /^PREMATCH_MULTIPLE_FILES: {$/;" l +QUOTH_NOT_NEVERMORE t/boolean.t /^QUOTH_NOT_NEVERMORE: {$/;" l REQUIRE_F_OR_G t/ack-show-types.t /^REQUIRE_F_OR_G: {$/;" l RESTRICTED_DIRECTORIES t/ack-s.t /^RESTRICTED_DIRECTORIES: {$/;" l RUBY_AND_RAKE t/ack-show-types.t /^RUBY_AND_RAKE: {$/;" l @@ -220,7 +224,6 @@ WITH_SWITCHES_MULTIPLE_FILES t/basic.t /^WITH_SWITCHES_MULTIPLE_FILES: {$/;" l WITH_SWITCHES_ONE_FILE t/ack-h.t /^WITH_SWITCHES_ONE_FILE: {$/;" l WITH_SWITCHES_ONE_FILE t/basic.t /^WITH_SWITCHES_ONE_FILE: {$/;" l _big_split t/lowercase.t /^sub _big_split {$/;" s -_build_search_not_re ack /^sub _build_search_not_re {$/;" s _check_command_for_taintedness t/Util.pm /^sub _check_command_for_taintedness {$/;" s _check_for_ackrc lib/App/Ack/ConfigFinder.pm /^sub _check_for_ackrc {$/;" s _check_for_mutex_options lib/App/Ack/ConfigLoader.pm /^sub _check_for_mutex_options {$/;" s @@ -280,7 +283,8 @@ bar t/range/rangefile.pm /^sub bar {$/;" s basename lib/App/Ack/File.pm /^sub basename {$/;" s bisect dev/find-minimum-dep-versions.pl /^sub bisect (&@) {$/;" s build_ack_invocation t/Util.pm /^sub build_ack_invocation {$/;" s -build_regex ack /^sub build_regex {$/;" s +build_regex lib/App/Ack.pm /^sub build_regex {$/;" s +build_search_not_re lib/App/Ack.pm /^sub build_search_not_re {$/;" s caret_X t/Util.pm /^sub caret_X {$/;" s cathy lib/App/Ack.pm /^sub cathy {$/;" s check_for_option_in_man_output xt/man.t /^sub check_for_option_in_man_output {$/;" s