Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 1 | #!/usr/bin/env perl |
| 2 | # |
| 3 | # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com> |
| 4 | # |
| 5 | # This tool scans shell scripts for test definitions and checks those tests for |
| 6 | # problems, such as broken &&-chains, which might hide bugs in the tests |
| 7 | # themselves or in behaviors being exercised by the tests. |
| 8 | # |
| 9 | # Input arguments are pathnames of shell scripts containing test definitions, |
| 10 | # or globs referencing a collection of scripts. For each problem discovered, |
| 11 | # the pathname of the script containing the test is printed along with the test |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 12 | # name and the test body with a `?!LINT: ...?!` annotation at the location of |
| 13 | # each detected problem, where "..." is an explanation of the problem. Returns |
| 14 | # zero if no problems are discovered, otherwise non-zero. |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 15 | |
| 16 | use warnings; |
| 17 | use strict; |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 18 | use Config; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 19 | use File::Glob; |
| 20 | use Getopt::Long; |
| 21 | |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 22 | my $jobs = -1; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 23 | my $show_stats; |
| 24 | my $emit_all; |
| 25 | |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 26 | # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 |
| 27 | # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although |
| 28 | # similar to lexical analyzers for other languages, this one differs in a few |
| 29 | # substantial ways due to quirks of the shell command language. |
| 30 | # |
| 31 | # For instance, in many languages, newline is just whitespace like space or |
| 32 | # TAB, but in shell a newline is a command separator, thus a distinct lexical |
| 33 | # token. A newline is significant and returned as a distinct token even at the |
| 34 | # end of a shell comment. |
| 35 | # |
| 36 | # In other languages, `1+2` would typically be scanned as three tokens |
| 37 | # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar |
| 38 | # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. |
| 39 | # In shell, several characters with special meaning lose that meaning when not |
| 40 | # surrounded by whitespace. For instance, the negation operator `!` is special |
| 41 | # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is |
| 42 | # just a plain character in the longer token "foo!uucp". In many other |
| 43 | # languages, `"string"/foo:'string'` might be scanned as five tokens ("string", |
| 44 | # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. |
| 45 | # |
| 46 | # The lexical analyzer for the shell command language is also somewhat unusual |
| 47 | # in that it recursively invokes the parser to handle the body of `$(...)` |
| 48 | # expressions which can contain arbitrary shell code. Such expressions may be |
| 49 | # encountered both inside and outside of double-quoted strings. |
| 50 | # |
| 51 | # The lexical analyzer is responsible for consuming shell here-doc bodies which |
| 52 | # extend from the line following a `<<TAG` operator until a line consisting |
| 53 | # solely of `TAG`. Here-doc consumption begins when a newline is encountered. |
| 54 | # It is legal for multiple here-doc `<<TAG` operators to be present on a single |
| 55 | # line, in which case their bodies must be present one following the next, and |
| 56 | # are consumed in the (left-to-right) order the `<<TAG` operators appear on the |
| 57 | # line. A special complication is that the bodies of all here-docs must be |
| 58 | # consumed when the newline is encountered even if the parse context depth has |
| 59 | # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs |
| 60 | # "A" and "B" must be consumed even though "A" was introduced outside the |
| 61 | # recursive parse context in which "B" was introduced and in which the newline |
| 62 | # is encountered. |
| 63 | package Lexer; |
| 64 | |
| 65 | sub new { |
| 66 | my ($class, $parser, $s) = @_; |
| 67 | bless { |
| 68 | parser => $parser, |
| 69 | buff => $s, |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 70 | lineno => 1, |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 71 | heretags => [] |
| 72 | } => $class; |
| 73 | } |
| 74 | |
| 75 | sub scan_heredoc_tag { |
| 76 | my $self = shift @_; |
| 77 | ${$self->{buff}} =~ /\G(-?)/gc; |
| 78 | my $indented = $1; |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 79 | my $token = $self->scan_token(); |
| 80 | return "<<$indented" unless $token; |
| 81 | my $tag = $token->[0]; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 82 | $tag =~ s/['"\\]//g; |
Eric Sunshine | 2b61c8d | 2023-03-30 15:30:31 -0400 | [diff] [blame] | 83 | $$token[0] = $indented ? "\t$tag" : "$tag"; |
| 84 | push(@{$self->{heretags}}, $token); |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 85 | return "<<$indented$tag"; |
| 86 | } |
| 87 | |
| 88 | sub scan_op { |
| 89 | my ($self, $c) = @_; |
| 90 | my $b = $self->{buff}; |
| 91 | return $c unless $$b =~ /\G(.)/sgc; |
| 92 | my $cc = $c . $1; |
| 93 | return scan_heredoc_tag($self) if $cc eq '<<'; |
| 94 | return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; |
| 95 | pos($$b)--; |
| 96 | return $c; |
| 97 | } |
| 98 | |
| 99 | sub scan_sqstring { |
| 100 | my $self = shift @_; |
| 101 | ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 102 | my $s = $1; |
| 103 | $self->{lineno} += () = $s =~ /\n/sg; |
| 104 | return "'" . $s; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 105 | } |
| 106 | |
| 107 | sub scan_dqstring { |
| 108 | my $self = shift @_; |
| 109 | my $b = $self->{buff}; |
| 110 | my $s = '"'; |
| 111 | while (1) { |
| 112 | # slurp up non-special characters |
| 113 | $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; |
| 114 | # handle special characters |
| 115 | last unless $$b =~ /\G(.)/sgc; |
| 116 | my $c = $1; |
| 117 | $s .= '"', last if $c eq '"'; |
| 118 | $s .= '$' . $self->scan_dollar(), next if $c eq '$'; |
| 119 | if ($c eq '\\') { |
| 120 | $s .= '\\', last unless $$b =~ /\G(.)/sgc; |
| 121 | $c = $1; |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 122 | $self->{lineno}++, next if $c eq "\n"; # line splice |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 123 | # backslash escapes only $, `, ", \ in dq-string |
| 124 | $s .= '\\' unless $c =~ /^[\$`"\\]$/; |
| 125 | $s .= $c; |
| 126 | next; |
| 127 | } |
| 128 | die("internal error scanning dq-string '$c'\n"); |
| 129 | } |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 130 | $self->{lineno} += () = $s =~ /\n/sg; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 131 | return $s; |
| 132 | } |
| 133 | |
| 134 | sub scan_balanced { |
| 135 | my ($self, $c1, $c2) = @_; |
| 136 | my $b = $self->{buff}; |
| 137 | my $depth = 1; |
| 138 | my $s = $c1; |
| 139 | while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { |
| 140 | $s .= $1; |
| 141 | $depth++, next if $s =~ /\Q$c1\E$/; |
| 142 | $depth--; |
| 143 | last if $depth == 0; |
| 144 | } |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 145 | $self->{lineno} += () = $s =~ /\n/sg; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 146 | return $s; |
| 147 | } |
| 148 | |
| 149 | sub scan_subst { |
| 150 | my $self = shift @_; |
| 151 | my @tokens = $self->{parser}->parse(qr/^\)$/); |
| 152 | $self->{parser}->next_token(); # closing ")" |
| 153 | return @tokens; |
| 154 | } |
| 155 | |
| 156 | sub scan_dollar { |
| 157 | my $self = shift @_; |
| 158 | my $b = $self->{buff}; |
| 159 | return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 160 | return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 161 | return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} |
| 162 | return $1 if $$b =~ /\G(\w+)/gc; # $var |
| 163 | return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. |
| 164 | return ''; |
| 165 | } |
| 166 | |
| 167 | sub swallow_heredocs { |
| 168 | my $self = shift @_; |
| 169 | my $b = $self->{buff}; |
| 170 | my $tags = $self->{heretags}; |
| 171 | while (my $tag = shift @$tags) { |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 172 | my $start = pos($$b); |
Eric Sunshine | 2b61c8d | 2023-03-30 15:30:31 -0400 | [diff] [blame] | 173 | my $indent = $$tag[0] =~ s/^\t// ? '\\s*' : ''; |
| 174 | $$b =~ /(?:\G|\n)$indent\Q$$tag[0]\E(?:\n|\z)/gc; |
| 175 | if (pos($$b) > $start) { |
| 176 | my $body = substr($$b, $start, pos($$b) - $start); |
Eric Sunshine | a4a5f28 | 2024-07-10 04:38:31 -0400 | [diff] [blame] | 177 | $self->{parser}->{heredocs}->{$$tag[0]} = { |
| 178 | content => substr($body, 0, length($body) - length($&)), |
| 179 | start_line => $self->{lineno}, |
| 180 | }; |
Eric Sunshine | 2b61c8d | 2023-03-30 15:30:31 -0400 | [diff] [blame] | 181 | $self->{lineno} += () = $body =~ /\n/sg; |
| 182 | next; |
| 183 | } |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 184 | push(@{$self->{parser}->{problems}}, ['HEREDOC', $tag]); |
Eric Sunshine | 2b61c8d | 2023-03-30 15:30:31 -0400 | [diff] [blame] | 185 | $$b =~ /(?:\G|\n).*\z/gc; # consume rest of input |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 186 | my $body = substr($$b, $start, pos($$b) - $start); |
| 187 | $self->{lineno} += () = $body =~ /\n/sg; |
Eric Sunshine | 2b61c8d | 2023-03-30 15:30:31 -0400 | [diff] [blame] | 188 | last; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 189 | } |
| 190 | } |
| 191 | |
| 192 | sub scan_token { |
| 193 | my $self = shift @_; |
| 194 | my $b = $self->{buff}; |
| 195 | my $token = ''; |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 196 | my ($start, $startln); |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 197 | RESTART: |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 198 | $startln = $self->{lineno}; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 199 | $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 200 | $start = pos($$b) || 0; |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 201 | $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 202 | while (1) { |
| 203 | # slurp up non-special characters |
| 204 | $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; |
| 205 | # handle special characters |
| 206 | last unless $$b =~ /\G(.)/sgc; |
| 207 | my $c = $1; |
Eric Sunshine | ca748f5 | 2022-11-08 19:08:28 +0000 | [diff] [blame] | 208 | pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 209 | pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; |
| 210 | $token .= $self->scan_sqstring(), next if $c eq "'"; |
| 211 | $token .= $self->scan_dqstring(), next if $c eq '"'; |
| 212 | $token .= $c . $self->scan_dollar(), next if $c eq '$'; |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 213 | $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 214 | $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; |
| 215 | $token = $c, last if $c =~ /^[(){}]$/; |
| 216 | if ($c eq '\\') { |
| 217 | $token .= '\\', last unless $$b =~ /\G(.)/sgc; |
| 218 | $c = $1; |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 219 | $self->{lineno}++, next if $c eq "\n" && length($token); # line splice |
| 220 | $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 221 | $token .= '\\' . $c; |
| 222 | next; |
| 223 | } |
| 224 | die("internal error scanning character '$c'\n"); |
| 225 | } |
Eric Sunshine | bf42f0a | 2022-11-11 07:34:53 +0000 | [diff] [blame] | 226 | return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef; |
Eric Sunshine | 7d48047 | 2022-09-01 00:29:40 +0000 | [diff] [blame] | 227 | } |
| 228 | |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 229 | # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It |
| 230 | # is a recursive descent parser very roughly modeled after section 2.10 "Shell |
| 231 | # Grammar" of POSIX chapter 2 "Shell Command Language". |
| 232 | package ShellParser; |
| 233 | |
| 234 | sub new { |
| 235 | my ($class, $s) = @_; |
| 236 | my $self = bless { |
| 237 | buff => [], |
| 238 | stop => [], |
Eric Sunshine | a4a5f28 | 2024-07-10 04:38:31 -0400 | [diff] [blame] | 239 | output => [], |
| 240 | heredocs => {}, |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 241 | insubshell => 0, |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 242 | } => $class; |
| 243 | $self->{lexer} = Lexer->new($self, $s); |
| 244 | return $self; |
| 245 | } |
| 246 | |
| 247 | sub next_token { |
| 248 | my $self = shift @_; |
| 249 | return pop(@{$self->{buff}}) if @{$self->{buff}}; |
| 250 | return $self->{lexer}->scan_token(); |
| 251 | } |
| 252 | |
| 253 | sub untoken { |
| 254 | my $self = shift @_; |
| 255 | push(@{$self->{buff}}, @_); |
| 256 | } |
| 257 | |
| 258 | sub peek { |
| 259 | my $self = shift @_; |
| 260 | my $token = $self->next_token(); |
| 261 | return undef unless defined($token); |
| 262 | $self->untoken($token); |
| 263 | return $token; |
| 264 | } |
| 265 | |
| 266 | sub stop_at { |
| 267 | my ($self, $token) = @_; |
| 268 | return 1 unless defined($token); |
| 269 | my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 270 | return defined($stop) && $token->[0] =~ $stop; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 271 | } |
| 272 | |
| 273 | sub expect { |
| 274 | my ($self, $expect) = @_; |
| 275 | my $token = $self->next_token(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 276 | return $token if defined($token) && $token->[0] eq $expect; |
| 277 | push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n"); |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 278 | $self->untoken($token) if defined($token); |
| 279 | return (); |
| 280 | } |
| 281 | |
| 282 | sub optional_newlines { |
| 283 | my $self = shift @_; |
| 284 | my @tokens; |
| 285 | while (my $token = $self->peek()) { |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 286 | last unless $token->[0] eq "\n"; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 287 | push(@tokens, $self->next_token()); |
| 288 | } |
| 289 | return @tokens; |
| 290 | } |
| 291 | |
| 292 | sub parse_group { |
| 293 | my $self = shift @_; |
| 294 | return ($self->parse(qr/^}$/), |
| 295 | $self->expect('}')); |
| 296 | } |
| 297 | |
| 298 | sub parse_subshell { |
| 299 | my $self = shift @_; |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 300 | $self->{insubshell}++; |
| 301 | my @tokens = ($self->parse(qr/^\)$/), |
| 302 | $self->expect(')')); |
| 303 | $self->{insubshell}--; |
| 304 | return @tokens; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 305 | } |
| 306 | |
| 307 | sub parse_case_pattern { |
| 308 | my $self = shift @_; |
| 309 | my @tokens; |
| 310 | while (defined(my $token = $self->next_token())) { |
| 311 | push(@tokens, $token); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 312 | last if $token->[0] eq ')'; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 313 | } |
| 314 | return @tokens; |
| 315 | } |
| 316 | |
| 317 | sub parse_case { |
| 318 | my $self = shift @_; |
| 319 | my @tokens; |
| 320 | push(@tokens, |
| 321 | $self->next_token(), # subject |
| 322 | $self->optional_newlines(), |
| 323 | $self->expect('in'), |
| 324 | $self->optional_newlines()); |
| 325 | while (1) { |
| 326 | my $token = $self->peek(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 327 | last unless defined($token) && $token->[0] ne 'esac'; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 328 | push(@tokens, |
| 329 | $self->parse_case_pattern(), |
| 330 | $self->optional_newlines(), |
| 331 | $self->parse(qr/^(?:;;|esac)$/)); # item body |
| 332 | $token = $self->peek(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 333 | last unless defined($token) && $token->[0] ne 'esac'; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 334 | push(@tokens, |
| 335 | $self->expect(';;'), |
| 336 | $self->optional_newlines()); |
| 337 | } |
| 338 | push(@tokens, $self->expect('esac')); |
| 339 | return @tokens; |
| 340 | } |
| 341 | |
| 342 | sub parse_for { |
| 343 | my $self = shift @_; |
| 344 | my @tokens; |
| 345 | push(@tokens, |
| 346 | $self->next_token(), # variable |
| 347 | $self->optional_newlines()); |
| 348 | my $token = $self->peek(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 349 | if (defined($token) && $token->[0] eq 'in') { |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 350 | push(@tokens, |
| 351 | $self->expect('in'), |
| 352 | $self->optional_newlines()); |
| 353 | } |
| 354 | push(@tokens, |
| 355 | $self->parse(qr/^do$/), # items |
| 356 | $self->expect('do'), |
| 357 | $self->optional_newlines(), |
| 358 | $self->parse_loop_body(), |
| 359 | $self->expect('done')); |
| 360 | return @tokens; |
| 361 | } |
| 362 | |
| 363 | sub parse_if { |
| 364 | my $self = shift @_; |
| 365 | my @tokens; |
| 366 | while (1) { |
| 367 | push(@tokens, |
| 368 | $self->parse(qr/^then$/), # if/elif condition |
| 369 | $self->expect('then'), |
| 370 | $self->optional_newlines(), |
| 371 | $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body |
| 372 | my $token = $self->peek(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 373 | last unless defined($token) && $token->[0] eq 'elif'; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 374 | push(@tokens, $self->expect('elif')); |
| 375 | } |
| 376 | my $token = $self->peek(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 377 | if (defined($token) && $token->[0] eq 'else') { |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 378 | push(@tokens, |
| 379 | $self->expect('else'), |
| 380 | $self->optional_newlines(), |
| 381 | $self->parse(qr/^fi$/)); # else body |
| 382 | } |
| 383 | push(@tokens, $self->expect('fi')); |
| 384 | return @tokens; |
| 385 | } |
| 386 | |
| 387 | sub parse_loop_body { |
| 388 | my $self = shift @_; |
| 389 | return $self->parse(qr/^done$/); |
| 390 | } |
| 391 | |
| 392 | sub parse_loop { |
| 393 | my $self = shift @_; |
| 394 | return ($self->parse(qr/^do$/), # condition |
| 395 | $self->expect('do'), |
| 396 | $self->optional_newlines(), |
| 397 | $self->parse_loop_body(), |
| 398 | $self->expect('done')); |
| 399 | } |
| 400 | |
| 401 | sub parse_func { |
| 402 | my $self = shift @_; |
| 403 | return ($self->expect('('), |
| 404 | $self->expect(')'), |
| 405 | $self->optional_newlines(), |
| 406 | $self->parse_cmd()); # body |
| 407 | } |
| 408 | |
| 409 | sub parse_bash_array_assignment { |
| 410 | my $self = shift @_; |
| 411 | my @tokens = $self->expect('('); |
| 412 | while (defined(my $token = $self->next_token())) { |
| 413 | push(@tokens, $token); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 414 | last if $token->[0] eq ')'; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 415 | } |
| 416 | return @tokens; |
| 417 | } |
| 418 | |
| 419 | my %compound = ( |
| 420 | '{' => \&parse_group, |
| 421 | '(' => \&parse_subshell, |
| 422 | 'case' => \&parse_case, |
| 423 | 'for' => \&parse_for, |
| 424 | 'if' => \&parse_if, |
| 425 | 'until' => \&parse_loop, |
| 426 | 'while' => \&parse_loop); |
| 427 | |
| 428 | sub parse_cmd { |
| 429 | my $self = shift @_; |
| 430 | my $cmd = $self->next_token(); |
| 431 | return () unless defined($cmd); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 432 | return $cmd if $cmd->[0] eq "\n"; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 433 | |
| 434 | my $token; |
| 435 | my @tokens = $cmd; |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 436 | if ($cmd->[0] eq '!') { |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 437 | push(@tokens, $self->parse_cmd()); |
| 438 | return @tokens; |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 439 | } elsif (my $f = $compound{$cmd->[0]}) { |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 440 | push(@tokens, $self->$f()); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 441 | } elsif (defined($token = $self->peek()) && $token->[0] eq '(') { |
| 442 | if ($cmd->[0] !~ /\w=$/) { |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 443 | push(@tokens, $self->parse_func()); |
| 444 | return @tokens; |
| 445 | } |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 446 | my @array = $self->parse_bash_array_assignment(); |
| 447 | $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array); |
| 448 | $tokens[-1]->[2] = $array[$#array][2] if @array; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 449 | } |
| 450 | |
| 451 | while (defined(my $token = $self->next_token())) { |
| 452 | $self->untoken($token), last if $self->stop_at($token); |
| 453 | push(@tokens, $token); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 454 | last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 455 | } |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 456 | push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n"; |
Eric Sunshine | 6594554 | 2022-09-01 00:29:41 +0000 | [diff] [blame] | 457 | return @tokens; |
| 458 | } |
| 459 | |
| 460 | sub accumulate { |
| 461 | my ($self, $tokens, $cmd) = @_; |
| 462 | push(@$tokens, @$cmd); |
| 463 | } |
| 464 | |
| 465 | sub parse { |
| 466 | my ($self, $stop) = @_; |
| 467 | push(@{$self->{stop}}, $stop); |
| 468 | goto DONE if $self->stop_at($self->peek()); |
| 469 | my @tokens; |
| 470 | while (my @cmd = $self->parse_cmd()) { |
| 471 | $self->accumulate(\@tokens, \@cmd); |
| 472 | last if $self->stop_at($self->peek()); |
| 473 | } |
| 474 | DONE: |
| 475 | pop(@{$self->{stop}}); |
| 476 | return @tokens; |
| 477 | } |
| 478 | |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 479 | # TestParser is a subclass of ShellParser which, beyond parsing shell script |
| 480 | # code, is also imbued with semantic knowledge of test construction, and checks |
| 481 | # tests for common problems (such as broken &&-chains) which might hide bugs in |
| 482 | # the tests themselves or in behaviors being exercised by the tests. As such, |
| 483 | # TestParser is only called upon to parse test bodies, not the top-level |
| 484 | # scripts in which the tests are defined. |
| 485 | package TestParser; |
| 486 | |
| 487 | use base 'ShellParser'; |
| 488 | |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 489 | sub new { |
| 490 | my $class = shift @_; |
| 491 | my $self = $class->SUPER::new(@_); |
| 492 | $self->{problems} = []; |
| 493 | return $self; |
| 494 | } |
| 495 | |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 496 | sub find_non_nl { |
| 497 | my $tokens = shift @_; |
| 498 | my $n = shift @_; |
| 499 | $n = $#$tokens if !defined($n); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 500 | $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n"; |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 501 | return $n; |
| 502 | } |
| 503 | |
| 504 | sub ends_with { |
| 505 | my ($tokens, $needles) = @_; |
| 506 | my $n = find_non_nl($tokens); |
| 507 | for my $needle (reverse(@$needles)) { |
| 508 | return undef if $n < 0; |
| 509 | $n = find_non_nl($tokens, $n), next if $needle eq "\n"; |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 510 | return undef if $$tokens[$n]->[0] !~ $needle; |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 511 | $n--; |
| 512 | } |
| 513 | return 1; |
| 514 | } |
| 515 | |
Eric Sunshine | 35ebb1e | 2022-09-01 00:29:45 +0000 | [diff] [blame] | 516 | sub match_ending { |
| 517 | my ($tokens, $endings) = @_; |
| 518 | for my $needles (@$endings) { |
| 519 | next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); |
| 520 | return 1 if ends_with($tokens, $needles); |
| 521 | } |
| 522 | return undef; |
| 523 | } |
| 524 | |
Eric Sunshine | fd4094c | 2022-09-01 00:29:50 +0000 | [diff] [blame] | 525 | sub parse_loop_body { |
| 526 | my $self = shift @_; |
| 527 | my @tokens = $self->SUPER::parse_loop_body(@_); |
| 528 | # did loop signal failure via "|| return" or "|| exit"? |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 529 | return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens; |
Eric Sunshine | ae0c55a | 2022-09-01 00:29:51 +0000 | [diff] [blame] | 530 | # did loop upstream of a pipe signal failure via "|| echo 'impossible |
| 531 | # text'" as the final command in the loop body? |
| 532 | return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); |
Eric Sunshine | fd4094c | 2022-09-01 00:29:50 +0000 | [diff] [blame] | 533 | # flag missing "return/exit" handling explicit failure in loop body |
| 534 | my $n = find_non_nl(\@tokens); |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 535 | push(@{$self->{problems}}, [$self->{insubshell} ? 'LOOPEXIT' : 'LOOPRETURN', $tokens[$n]]); |
Eric Sunshine | fd4094c | 2022-09-01 00:29:50 +0000 | [diff] [blame] | 536 | return @tokens; |
| 537 | } |
| 538 | |
Eric Sunshine | 35ebb1e | 2022-09-01 00:29:45 +0000 | [diff] [blame] | 539 | my @safe_endings = ( |
Eric Sunshine | aabc325 | 2022-09-01 00:29:47 +0000 | [diff] [blame] | 540 | [qr/^(?:&&|\|\||\||&)$/], |
Eric Sunshine | 35ebb1e | 2022-09-01 00:29:45 +0000 | [diff] [blame] | 541 | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], |
| 542 | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], |
| 543 | [qr/^(?:exit|return|continue)$/], |
| 544 | [qr/^(?:exit|return|continue)$/, qr/^;$/]); |
| 545 | |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 546 | sub accumulate { |
| 547 | my ($self, $tokens, $cmd) = @_; |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 548 | my $problems = $self->{problems}; |
Eric Sunshine | c90d81f | 2022-11-08 19:08:27 +0000 | [diff] [blame] | 549 | |
| 550 | # no previous command to check for missing "&&" |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 551 | goto DONE unless @$tokens; |
Eric Sunshine | c90d81f | 2022-11-08 19:08:27 +0000 | [diff] [blame] | 552 | |
| 553 | # new command is empty line; can't yet check if previous is missing "&&" |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 554 | goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n"; |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 555 | |
Eric Sunshine | 35ebb1e | 2022-09-01 00:29:45 +0000 | [diff] [blame] | 556 | # did previous command end with "&&", "|", "|| return" or similar? |
| 557 | goto DONE if match_ending($tokens, \@safe_endings); |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 558 | |
Eric Sunshine | a8f30ee | 2022-09-01 00:29:48 +0000 | [diff] [blame] | 559 | # if this command handles "$?" specially, then okay for previous |
| 560 | # command to be missing "&&" |
| 561 | for my $token (@$cmd) { |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 562 | goto DONE if $token->[0] =~ /\$\?/; |
Eric Sunshine | a8f30ee | 2022-09-01 00:29:48 +0000 | [diff] [blame] | 563 | } |
| 564 | |
Eric Sunshine | 832c68b | 2022-09-01 00:29:49 +0000 | [diff] [blame] | 565 | # if this command is "false", "return 1", or "exit 1" (which signal |
| 566 | # failure explicitly), then okay for all preceding commands to be |
| 567 | # missing "&&" |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 568 | if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) { |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 569 | @$problems = grep {$_->[0] ne 'AMP'} @$problems; |
Eric Sunshine | 832c68b | 2022-09-01 00:29:49 +0000 | [diff] [blame] | 570 | goto DONE; |
| 571 | } |
| 572 | |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 573 | # flag missing "&&" at end of previous command |
| 574 | my $n = find_non_nl($tokens); |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 575 | push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0; |
Eric Sunshine | 6d932e9 | 2022-09-01 00:29:42 +0000 | [diff] [blame] | 576 | |
| 577 | DONE: |
| 578 | $self->SUPER::accumulate($tokens, $cmd); |
| 579 | } |
| 580 | |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 581 | # ScriptParser is a subclass of ShellParser which identifies individual test |
| 582 | # definitions within test scripts, and passes each test body through TestParser |
| 583 | # to identify possible problems. ShellParser detects test definitions not only |
| 584 | # at the top-level of test scripts but also within compound commands such as |
| 585 | # loops and function definitions. |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 586 | package ScriptParser; |
| 587 | |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 588 | use base 'ShellParser'; |
| 589 | |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 590 | sub new { |
| 591 | my $class = shift @_; |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 592 | my $self = $class->SUPER::new(@_); |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 593 | $self->{ntests} = 0; |
Eric Sunshine | 588ef84 | 2024-09-10 00:10:11 -0400 | [diff] [blame] | 594 | $self->{nerrs} = 0; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 595 | return $self; |
| 596 | } |
| 597 | |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 598 | # extract the raw content of a token, which may be a single string or a |
| 599 | # composition of multiple strings and non-string character runs; for instance, |
| 600 | # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` |
| 601 | sub unwrap { |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 602 | my $token = (@_ ? shift @_ : $_)->[0]; |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 603 | # simple case: 'sqstring' or "dqstring" |
| 604 | return $token if $token =~ s/^'([^']*)'$/$1/; |
| 605 | return $token if $token =~ s/^"([^"]*)"$/$1/; |
| 606 | |
| 607 | # composite case |
| 608 | my ($s, $q, $escaped); |
| 609 | while (1) { |
| 610 | # slurp up non-special characters |
| 611 | $s .= $1 if $token =~ /\G([^\\'"]*)/gc; |
| 612 | # handle special characters |
| 613 | last unless $token =~ /\G(.)/sgc; |
| 614 | my $c = $1; |
| 615 | $q = undef, next if defined($q) && $c eq $q; |
| 616 | $q = $c, next if !defined($q) && $c =~ /^['"]$/; |
| 617 | if ($c eq '\\') { |
| 618 | last unless $token =~ /\G(.)/sgc; |
| 619 | $c = $1; |
| 620 | $s .= '\\' if $c eq "\n"; # preserve line splice |
| 621 | } |
| 622 | $s .= $c; |
| 623 | } |
| 624 | return $s |
| 625 | } |
| 626 | |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 627 | sub format_problem { |
| 628 | local $_ = shift; |
| 629 | /^AMP$/ && return "missing '&&'"; |
| 630 | /^LOOPRETURN$/ && return "missing '|| return 1'"; |
| 631 | /^LOOPEXIT$/ && return "missing '|| exit 1'"; |
| 632 | /^HEREDOC$/ && return 'unclosed heredoc'; |
| 633 | die("unrecognized problem type '$_'\n"); |
| 634 | } |
| 635 | |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 636 | sub check_test { |
| 637 | my $self = shift @_; |
Eric Sunshine | a4a5f28 | 2024-07-10 04:38:31 -0400 | [diff] [blame] | 638 | my $title = unwrap(shift @_); |
| 639 | my $body = shift @_; |
| 640 | my $lineno = $body->[3]; |
| 641 | $body = unwrap($body); |
| 642 | if ($body eq '-') { |
| 643 | my $herebody = shift @_; |
| 644 | $body = $herebody->{content}; |
| 645 | $lineno = $herebody->{start_line}; |
| 646 | } |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 647 | $self->{ntests}++; |
| 648 | my $parser = TestParser->new(\$body); |
| 649 | my @tokens = $parser->parse(); |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 650 | my $problems = $parser->{problems}; |
Eric Sunshine | 588ef84 | 2024-09-10 00:10:11 -0400 | [diff] [blame] | 651 | $self->{nerrs} += @$problems; |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 652 | return unless $emit_all || @$problems; |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 653 | my $c = main::fd_colors(1); |
Eric Sunshine | a13ff41 | 2024-09-10 00:10:13 -0400 | [diff] [blame] | 654 | my ($erropen, $errclose) = -t 1 ? ("$c->{rev}$c->{red}", $c->{reset}) : ('?!', '?!'); |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 655 | my $start = 0; |
| 656 | my $checked = ''; |
| 657 | for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) { |
| 658 | my ($label, $token) = @$_; |
| 659 | my $pos = $token->[2]; |
Eric Sunshine | e44f15b | 2024-09-10 00:10:12 -0400 | [diff] [blame] | 660 | my $err = format_problem($label); |
Eric Sunshine | 588ef84 | 2024-09-10 00:10:11 -0400 | [diff] [blame] | 661 | $checked .= substr($body, $start, $pos - $start); |
| 662 | $checked .= ' ' unless $checked =~ /\s$/; |
Eric Sunshine | a13ff41 | 2024-09-10 00:10:13 -0400 | [diff] [blame] | 663 | $checked .= "${erropen}LINT: $err$errclose"; |
Eric Sunshine | 588ef84 | 2024-09-10 00:10:11 -0400 | [diff] [blame] | 664 | $checked .= ' ' unless $pos >= length($body) || |
| 665 | substr($body, $pos, 1) =~ /^\s/; |
Eric Sunshine | 73c768d | 2022-11-08 19:08:30 +0000 | [diff] [blame] | 666 | $start = $pos; |
| 667 | } |
| 668 | $checked .= substr($body, $start); |
Eric Sunshine | 48d69d8 | 2022-11-11 07:34:54 +0000 | [diff] [blame] | 669 | $checked =~ s/^/$lineno++ . ' '/mge; |
| 670 | $checked =~ s/^\d+ \n//; |
Eric Sunshine | 48d69d8 | 2022-11-11 07:34:54 +0000 | [diff] [blame] | 671 | $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg; |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 672 | $checked .= "\n" unless $checked =~ /\n$/; |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 673 | push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked"); |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 674 | } |
| 675 | |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 676 | sub parse_cmd { |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 677 | my $self = shift @_; |
| 678 | my @tokens = $self->SUPER::parse_cmd(); |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 679 | return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/; |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 680 | my $n = $#tokens; |
Eric Sunshine | 5f0321a | 2022-11-08 19:08:29 +0000 | [diff] [blame] | 681 | $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; |
Eric Sunshine | a4a5f28 | 2024-07-10 04:38:31 -0400 | [diff] [blame] | 682 | my $herebody; |
| 683 | if ($n >= 2 && $tokens[$n-1]->[0] eq '-' && $tokens[$n]->[0] =~ /^<<-?(.+)$/) { |
| 684 | $herebody = $self->{heredocs}->{$1}; |
| 685 | $n--; |
| 686 | } |
| 687 | $self->check_test($tokens[1], $tokens[2], $herebody) if $n == 2; # title body |
| 688 | $self->check_test($tokens[2], $tokens[3], $herebody) if $n > 2; # prereq title body |
Eric Sunshine | d99ebd6 | 2022-09-01 00:29:43 +0000 | [diff] [blame] | 689 | return @tokens; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 690 | } |
| 691 | |
| 692 | # main contains high-level functionality for processing command-line switches, |
| 693 | # feeding input test scripts to ScriptParser, and reporting results. |
| 694 | package main; |
| 695 | |
| 696 | my $getnow = sub { return time(); }; |
| 697 | my $interval = sub { return time() - shift; }; |
| 698 | if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { |
| 699 | $getnow = sub { return [Time::HiRes::gettimeofday()]; }; |
| 700 | $interval = sub { return Time::HiRes::tv_interval(shift); }; |
| 701 | } |
| 702 | |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 703 | # Restore TERM if test framework set it to "dumb" so 'tput' will work; do this |
| 704 | # outside of get_colors() since under 'ithreads' all threads use %ENV of main |
| 705 | # thread and ignore %ENV changes in subthreads. |
| 706 | $ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM}; |
| 707 | |
Eric Sunshine | 48d69d8 | 2022-11-11 07:34:54 +0000 | [diff] [blame] | 708 | my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => ''); |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 709 | my %COLORS = (); |
| 710 | sub get_colors { |
| 711 | return \%COLORS if %COLORS; |
Eric Sunshine | 5451877 | 2022-11-11 07:34:52 +0000 | [diff] [blame] | 712 | if (exists($ENV{NO_COLOR})) { |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 713 | %COLORS = @NOCOLORS; |
| 714 | return \%COLORS; |
| 715 | } |
Eric Sunshine | 5451877 | 2022-11-11 07:34:52 +0000 | [diff] [blame] | 716 | if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) { |
| 717 | %COLORS = (bold => "\e[1m", |
| 718 | rev => "\e[7m", |
Eric Sunshine | 48d69d8 | 2022-11-11 07:34:54 +0000 | [diff] [blame] | 719 | dim => "\e[2m", |
Eric Sunshine | 5451877 | 2022-11-11 07:34:52 +0000 | [diff] [blame] | 720 | reset => "\e[0m", |
| 721 | blue => "\e[34m", |
| 722 | green => "\e[32m", |
| 723 | red => "\e[31m"); |
| 724 | return \%COLORS; |
| 725 | } |
| 726 | if (system("tput sgr0 >/dev/null 2>&1") == 0 && |
| 727 | system("tput bold >/dev/null 2>&1") == 0 && |
| 728 | system("tput rev >/dev/null 2>&1") == 0 && |
Eric Sunshine | 48d69d8 | 2022-11-11 07:34:54 +0000 | [diff] [blame] | 729 | system("tput dim >/dev/null 2>&1") == 0 && |
Eric Sunshine | 5451877 | 2022-11-11 07:34:52 +0000 | [diff] [blame] | 730 | system("tput setaf 1 >/dev/null 2>&1") == 0) { |
| 731 | %COLORS = (bold => `tput bold`, |
| 732 | rev => `tput rev`, |
Eric Sunshine | 48d69d8 | 2022-11-11 07:34:54 +0000 | [diff] [blame] | 733 | dim => `tput dim`, |
Eric Sunshine | 5451877 | 2022-11-11 07:34:52 +0000 | [diff] [blame] | 734 | reset => `tput sgr0`, |
| 735 | blue => `tput setaf 4`, |
| 736 | green => `tput setaf 2`, |
| 737 | red => `tput setaf 1`); |
| 738 | return \%COLORS; |
| 739 | } |
| 740 | %COLORS = @NOCOLORS; |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 741 | return \%COLORS; |
| 742 | } |
| 743 | |
| 744 | my %FD_COLORS = (); |
| 745 | sub fd_colors { |
| 746 | my $fd = shift; |
| 747 | return $FD_COLORS{$fd} if exists($FD_COLORS{$fd}); |
| 748 | $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS}; |
| 749 | return $FD_COLORS{$fd}; |
| 750 | } |
| 751 | |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 752 | sub ncores { |
| 753 | # Windows |
Eric Sunshine | 037348e | 2024-05-20 15:01:29 -0400 | [diff] [blame] | 754 | if (exists($ENV{NUMBER_OF_PROCESSORS})) { |
| 755 | my $ncpu = $ENV{NUMBER_OF_PROCESSORS}; |
| 756 | return $ncpu > 0 ? $ncpu : 1; |
| 757 | } |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 758 | # Linux / MSYS2 / Cygwin / WSL |
Eric Sunshine | 037348e | 2024-05-20 15:01:29 -0400 | [diff] [blame] | 759 | if (open my $fh, '<', '/proc/cpuinfo') { |
| 760 | my $cpuinfo = do { local $/; <$fh> }; |
| 761 | close($fh); |
Eric Sunshine | 2e7e920 | 2024-05-20 15:01:31 -0400 | [diff] [blame] | 762 | if ($cpuinfo =~ /^n?cpus active\s*:\s*(\d+)/m) { |
| 763 | return $1 if $1 > 0; |
| 764 | } |
John Paul Adrian Glaubitz | 45db5ed | 2024-05-20 15:01:30 -0400 | [diff] [blame] | 765 | my @matches = ($cpuinfo =~ /^(processor|CPU)[\s\d]*:/mg); |
Eric Sunshine | 037348e | 2024-05-20 15:01:29 -0400 | [diff] [blame] | 766 | return @matches ? scalar(@matches) : 1; |
| 767 | } |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 768 | # macOS & BSD |
Eric Sunshine | 037348e | 2024-05-20 15:01:29 -0400 | [diff] [blame] | 769 | if ($^O =~ /(?:^darwin$|bsd)/) { |
| 770 | my $ncpu = qx/sysctl -n hw.ncpu/; |
| 771 | return $ncpu > 0 ? $ncpu : 1; |
| 772 | } |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 773 | return 1; |
| 774 | } |
| 775 | |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 776 | sub show_stats { |
| 777 | my ($start_time, $stats) = @_; |
| 778 | my $walltime = $interval->($start_time); |
| 779 | my ($usertime) = times(); |
| 780 | my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 781 | my $c = fd_colors(2); |
| 782 | print(STDERR $c->{green}); |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 783 | for (@$stats) { |
| 784 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; |
| 785 | print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); |
| 786 | $total_workers++; |
| 787 | $total_scripts += $nscripts; |
| 788 | $total_tests += $ntests; |
| 789 | $total_errs += $nerrs; |
| 790 | } |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 791 | printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 792 | } |
| 793 | |
| 794 | sub check_script { |
| 795 | my ($id, $next_script, $emit) = @_; |
| 796 | my ($nscripts, $ntests, $nerrs) = (0, 0, 0); |
| 797 | while (my $path = $next_script->()) { |
| 798 | $nscripts++; |
| 799 | my $fh; |
Jeff King | 382f6ed | 2024-07-10 04:37:30 -0400 | [diff] [blame] | 800 | unless (open($fh, "<:unix:crlf", $path)) { |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 801 | $emit->("?!ERR?! $path: $!\n"); |
| 802 | next; |
| 803 | } |
| 804 | my $s = do { local $/; <$fh> }; |
| 805 | close($fh); |
| 806 | my $parser = ScriptParser->new(\$s); |
| 807 | 1 while $parser->parse_cmd(); |
| 808 | if (@{$parser->{output}}) { |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 809 | my $c = fd_colors(1); |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 810 | my $s = join('', @{$parser->{output}}); |
Eric Sunshine | 7c04aa7 | 2022-09-13 04:01:47 +0000 | [diff] [blame] | 811 | $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s); |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 812 | } |
| 813 | $ntests += $parser->{ntests}; |
Eric Sunshine | 588ef84 | 2024-09-10 00:10:11 -0400 | [diff] [blame] | 814 | $nerrs += $parser->{nerrs}; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 815 | } |
| 816 | return [$id, $nscripts, $ntests, $nerrs]; |
| 817 | } |
| 818 | |
| 819 | sub exit_code { |
| 820 | my $stats = shift @_; |
| 821 | for (@$stats) { |
| 822 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; |
| 823 | return 1 if $nerrs; |
| 824 | } |
| 825 | return 0; |
| 826 | } |
| 827 | |
| 828 | Getopt::Long::Configure(qw{bundling}); |
| 829 | GetOptions( |
| 830 | "emit-all!" => \$emit_all, |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 831 | "jobs|j=i" => \$jobs, |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 832 | "stats|show-stats!" => \$show_stats) or die("option error\n"); |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 833 | $jobs = ncores() if $jobs < 1; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 834 | |
| 835 | my $start_time = $getnow->(); |
| 836 | my @stats; |
| 837 | |
| 838 | my @scripts; |
| 839 | push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); |
| 840 | unless (@scripts) { |
| 841 | show_stats($start_time, \@stats) if $show_stats; |
| 842 | exit; |
| 843 | } |
Jeff King | d558509 | 2024-07-10 04:35:57 -0400 | [diff] [blame] | 844 | $jobs = @scripts if @scripts < $jobs; |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 845 | |
Jeff King | a7c1c10 | 2024-07-10 04:35:13 -0400 | [diff] [blame] | 846 | unless ($jobs > 1 && |
| 847 | $Config{useithreads} && eval { |
Eric Sunshine | 29fb2ec | 2022-09-01 00:29:44 +0000 | [diff] [blame] | 848 | require threads; threads->import(); |
| 849 | require Thread::Queue; Thread::Queue->import(); |
| 850 | 1; |
| 851 | }) { |
| 852 | push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); |
| 853 | show_stats($start_time, \@stats) if $show_stats; |
| 854 | exit(exit_code(\@stats)); |
| 855 | } |
| 856 | |
| 857 | my $script_queue = Thread::Queue->new(); |
| 858 | my $output_queue = Thread::Queue->new(); |
| 859 | |
| 860 | sub next_script { return $script_queue->dequeue(); } |
| 861 | sub emit { $output_queue->enqueue(@_); } |
| 862 | |
| 863 | sub monitor { |
| 864 | while (my $s = $output_queue->dequeue()) { |
| 865 | print($s); |
| 866 | } |
| 867 | } |
| 868 | |
| 869 | my $mon = threads->create({'context' => 'void'}, \&monitor); |
| 870 | threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; |
| 871 | |
| 872 | $script_queue->enqueue(@scripts); |
| 873 | $script_queue->end(); |
| 874 | |
| 875 | for (threads->list()) { |
| 876 | push(@stats, $_->join()) unless $_ == $mon; |
| 877 | } |
| 878 | |
| 879 | $output_queue->end(); |
| 880 | $mon->join(); |
| 881 | |
Eric Sunshine | b4f25b0 | 2022-09-01 00:29:39 +0000 | [diff] [blame] | 882 | show_stats($start_time, \@stats) if $show_stats; |
| 883 | exit(exit_code(\@stats)); |