blob: f0598e3934bc46e780d517b2046004c0de54f598 [file] [log] [blame]
Eric Sunshineb4f25b02022-09-01 00:29:39 +00001#!/usr/bin/env perl
2#
3# Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
4#
5# This tool scans shell scripts for test definitions and checks those tests for
6# problems, such as broken &&-chains, which might hide bugs in the tests
7# themselves or in behaviors being exercised by the tests.
8#
9# Input arguments are pathnames of shell scripts containing test definitions,
10# or globs referencing a collection of scripts. For each problem discovered,
11# the pathname of the script containing the test is printed along with the test
Eric Sunshinee44f15b2024-09-10 00:10:12 -040012# name and the test body with a `?!LINT: ...?!` annotation at the location of
13# each detected problem, where "..." is an explanation of the problem. Returns
14# zero if no problems are discovered, otherwise non-zero.
Eric Sunshineb4f25b02022-09-01 00:29:39 +000015
16use warnings;
17use strict;
Eric Sunshine29fb2ec2022-09-01 00:29:44 +000018use Config;
Eric Sunshineb4f25b02022-09-01 00:29:39 +000019use File::Glob;
20use Getopt::Long;
21
Eric Sunshine29fb2ec2022-09-01 00:29:44 +000022my $jobs = -1;
Eric Sunshineb4f25b02022-09-01 00:29:39 +000023my $show_stats;
24my $emit_all;
25
Eric Sunshine7d480472022-09-01 00:29:40 +000026# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
27# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
28# similar to lexical analyzers for other languages, this one differs in a few
29# substantial ways due to quirks of the shell command language.
30#
31# For instance, in many languages, newline is just whitespace like space or
32# TAB, but in shell a newline is a command separator, thus a distinct lexical
33# token. A newline is significant and returned as a distinct token even at the
34# end of a shell comment.
35#
36# In other languages, `1+2` would typically be scanned as three tokens
37# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
38# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
39# In shell, several characters with special meaning lose that meaning when not
40# surrounded by whitespace. For instance, the negation operator `!` is special
41# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
42# just a plain character in the longer token "foo!uucp". In many other
43# languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
44# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
45#
46# The lexical analyzer for the shell command language is also somewhat unusual
47# in that it recursively invokes the parser to handle the body of `$(...)`
48# expressions which can contain arbitrary shell code. Such expressions may be
49# encountered both inside and outside of double-quoted strings.
50#
51# The lexical analyzer is responsible for consuming shell here-doc bodies which
52# extend from the line following a `<<TAG` operator until a line consisting
53# solely of `TAG`. Here-doc consumption begins when a newline is encountered.
54# It is legal for multiple here-doc `<<TAG` operators to be present on a single
55# line, in which case their bodies must be present one following the next, and
56# are consumed in the (left-to-right) order the `<<TAG` operators appear on the
57# line. A special complication is that the bodies of all here-docs must be
58# consumed when the newline is encountered even if the parse context depth has
59# changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
60# "A" and "B" must be consumed even though "A" was introduced outside the
61# recursive parse context in which "B" was introduced and in which the newline
62# is encountered.
63package Lexer;
64
65sub new {
66 my ($class, $parser, $s) = @_;
67 bless {
68 parser => $parser,
69 buff => $s,
Eric Sunshinebf42f0a2022-11-11 07:34:53 +000070 lineno => 1,
Eric Sunshine7d480472022-09-01 00:29:40 +000071 heretags => []
72 } => $class;
73}
74
75sub scan_heredoc_tag {
76 my $self = shift @_;
77 ${$self->{buff}} =~ /\G(-?)/gc;
78 my $indented = $1;
Eric Sunshine5f0321a2022-11-08 19:08:29 +000079 my $token = $self->scan_token();
80 return "<<$indented" unless $token;
81 my $tag = $token->[0];
Eric Sunshine7d480472022-09-01 00:29:40 +000082 $tag =~ s/['"\\]//g;
Eric Sunshine2b61c8d2023-03-30 15:30:31 -040083 $$token[0] = $indented ? "\t$tag" : "$tag";
84 push(@{$self->{heretags}}, $token);
Eric Sunshine7d480472022-09-01 00:29:40 +000085 return "<<$indented$tag";
86}
87
88sub scan_op {
89 my ($self, $c) = @_;
90 my $b = $self->{buff};
91 return $c unless $$b =~ /\G(.)/sgc;
92 my $cc = $c . $1;
93 return scan_heredoc_tag($self) if $cc eq '<<';
94 return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
95 pos($$b)--;
96 return $c;
97}
98
99sub scan_sqstring {
100 my $self = shift @_;
101 ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000102 my $s = $1;
103 $self->{lineno} += () = $s =~ /\n/sg;
104 return "'" . $s;
Eric Sunshine7d480472022-09-01 00:29:40 +0000105}
106
107sub scan_dqstring {
108 my $self = shift @_;
109 my $b = $self->{buff};
110 my $s = '"';
111 while (1) {
112 # slurp up non-special characters
113 $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
114 # handle special characters
115 last unless $$b =~ /\G(.)/sgc;
116 my $c = $1;
117 $s .= '"', last if $c eq '"';
118 $s .= '$' . $self->scan_dollar(), next if $c eq '$';
119 if ($c eq '\\') {
120 $s .= '\\', last unless $$b =~ /\G(.)/sgc;
121 $c = $1;
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000122 $self->{lineno}++, next if $c eq "\n"; # line splice
Eric Sunshine7d480472022-09-01 00:29:40 +0000123 # backslash escapes only $, `, ", \ in dq-string
124 $s .= '\\' unless $c =~ /^[\$`"\\]$/;
125 $s .= $c;
126 next;
127 }
128 die("internal error scanning dq-string '$c'\n");
129 }
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000130 $self->{lineno} += () = $s =~ /\n/sg;
Eric Sunshine7d480472022-09-01 00:29:40 +0000131 return $s;
132}
133
134sub scan_balanced {
135 my ($self, $c1, $c2) = @_;
136 my $b = $self->{buff};
137 my $depth = 1;
138 my $s = $c1;
139 while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
140 $s .= $1;
141 $depth++, next if $s =~ /\Q$c1\E$/;
142 $depth--;
143 last if $depth == 0;
144 }
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000145 $self->{lineno} += () = $s =~ /\n/sg;
Eric Sunshine7d480472022-09-01 00:29:40 +0000146 return $s;
147}
148
149sub scan_subst {
150 my $self = shift @_;
151 my @tokens = $self->{parser}->parse(qr/^\)$/);
152 $self->{parser}->next_token(); # closing ")"
153 return @tokens;
154}
155
156sub scan_dollar {
157 my $self = shift @_;
158 my $b = $self->{buff};
159 return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000160 return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
Eric Sunshine7d480472022-09-01 00:29:40 +0000161 return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
162 return $1 if $$b =~ /\G(\w+)/gc; # $var
163 return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
164 return '';
165}
166
167sub swallow_heredocs {
168 my $self = shift @_;
169 my $b = $self->{buff};
170 my $tags = $self->{heretags};
171 while (my $tag = shift @$tags) {
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000172 my $start = pos($$b);
Eric Sunshine2b61c8d2023-03-30 15:30:31 -0400173 my $indent = $$tag[0] =~ s/^\t// ? '\\s*' : '';
174 $$b =~ /(?:\G|\n)$indent\Q$$tag[0]\E(?:\n|\z)/gc;
175 if (pos($$b) > $start) {
176 my $body = substr($$b, $start, pos($$b) - $start);
Eric Sunshinea4a5f282024-07-10 04:38:31 -0400177 $self->{parser}->{heredocs}->{$$tag[0]} = {
178 content => substr($body, 0, length($body) - length($&)),
179 start_line => $self->{lineno},
180 };
Eric Sunshine2b61c8d2023-03-30 15:30:31 -0400181 $self->{lineno} += () = $body =~ /\n/sg;
182 next;
183 }
Eric Sunshinee44f15b2024-09-10 00:10:12 -0400184 push(@{$self->{parser}->{problems}}, ['HEREDOC', $tag]);
Eric Sunshine2b61c8d2023-03-30 15:30:31 -0400185 $$b =~ /(?:\G|\n).*\z/gc; # consume rest of input
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000186 my $body = substr($$b, $start, pos($$b) - $start);
187 $self->{lineno} += () = $body =~ /\n/sg;
Eric Sunshine2b61c8d2023-03-30 15:30:31 -0400188 last;
Eric Sunshine7d480472022-09-01 00:29:40 +0000189 }
190}
191
192sub scan_token {
193 my $self = shift @_;
194 my $b = $self->{buff};
195 my $token = '';
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000196 my ($start, $startln);
Eric Sunshine7d480472022-09-01 00:29:40 +0000197RESTART:
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000198 $startln = $self->{lineno};
Eric Sunshine7d480472022-09-01 00:29:40 +0000199 $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000200 $start = pos($$b) || 0;
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000201 $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
Eric Sunshine7d480472022-09-01 00:29:40 +0000202 while (1) {
203 # slurp up non-special characters
204 $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
205 # handle special characters
206 last unless $$b =~ /\G(.)/sgc;
207 my $c = $1;
Eric Sunshineca748f52022-11-08 19:08:28 +0000208 pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token
Eric Sunshine7d480472022-09-01 00:29:40 +0000209 pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
210 $token .= $self->scan_sqstring(), next if $c eq "'";
211 $token .= $self->scan_dqstring(), next if $c eq '"';
212 $token .= $c . $self->scan_dollar(), next if $c eq '$';
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000213 $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n";
Eric Sunshine7d480472022-09-01 00:29:40 +0000214 $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
215 $token = $c, last if $c =~ /^[(){}]$/;
216 if ($c eq '\\') {
217 $token .= '\\', last unless $$b =~ /\G(.)/sgc;
218 $c = $1;
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000219 $self->{lineno}++, next if $c eq "\n" && length($token); # line splice
220 $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice
Eric Sunshine7d480472022-09-01 00:29:40 +0000221 $token .= '\\' . $c;
222 next;
223 }
224 die("internal error scanning character '$c'\n");
225 }
Eric Sunshinebf42f0a2022-11-11 07:34:53 +0000226 return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef;
Eric Sunshine7d480472022-09-01 00:29:40 +0000227}
228
Eric Sunshine65945542022-09-01 00:29:41 +0000229# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
230# is a recursive descent parser very roughly modeled after section 2.10 "Shell
231# Grammar" of POSIX chapter 2 "Shell Command Language".
232package ShellParser;
233
234sub new {
235 my ($class, $s) = @_;
236 my $self = bless {
237 buff => [],
238 stop => [],
Eric Sunshinea4a5f282024-07-10 04:38:31 -0400239 output => [],
240 heredocs => {},
Eric Sunshinee44f15b2024-09-10 00:10:12 -0400241 insubshell => 0,
Eric Sunshine65945542022-09-01 00:29:41 +0000242 } => $class;
243 $self->{lexer} = Lexer->new($self, $s);
244 return $self;
245}
246
247sub next_token {
248 my $self = shift @_;
249 return pop(@{$self->{buff}}) if @{$self->{buff}};
250 return $self->{lexer}->scan_token();
251}
252
253sub untoken {
254 my $self = shift @_;
255 push(@{$self->{buff}}, @_);
256}
257
258sub peek {
259 my $self = shift @_;
260 my $token = $self->next_token();
261 return undef unless defined($token);
262 $self->untoken($token);
263 return $token;
264}
265
266sub stop_at {
267 my ($self, $token) = @_;
268 return 1 unless defined($token);
269 my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000270 return defined($stop) && $token->[0] =~ $stop;
Eric Sunshine65945542022-09-01 00:29:41 +0000271}
272
273sub expect {
274 my ($self, $expect) = @_;
275 my $token = $self->next_token();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000276 return $token if defined($token) && $token->[0] eq $expect;
277 push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n");
Eric Sunshine65945542022-09-01 00:29:41 +0000278 $self->untoken($token) if defined($token);
279 return ();
280}
281
282sub optional_newlines {
283 my $self = shift @_;
284 my @tokens;
285 while (my $token = $self->peek()) {
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000286 last unless $token->[0] eq "\n";
Eric Sunshine65945542022-09-01 00:29:41 +0000287 push(@tokens, $self->next_token());
288 }
289 return @tokens;
290}
291
292sub parse_group {
293 my $self = shift @_;
294 return ($self->parse(qr/^}$/),
295 $self->expect('}'));
296}
297
298sub parse_subshell {
299 my $self = shift @_;
Eric Sunshinee44f15b2024-09-10 00:10:12 -0400300 $self->{insubshell}++;
301 my @tokens = ($self->parse(qr/^\)$/),
302 $self->expect(')'));
303 $self->{insubshell}--;
304 return @tokens;
Eric Sunshine65945542022-09-01 00:29:41 +0000305}
306
307sub parse_case_pattern {
308 my $self = shift @_;
309 my @tokens;
310 while (defined(my $token = $self->next_token())) {
311 push(@tokens, $token);
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000312 last if $token->[0] eq ')';
Eric Sunshine65945542022-09-01 00:29:41 +0000313 }
314 return @tokens;
315}
316
317sub parse_case {
318 my $self = shift @_;
319 my @tokens;
320 push(@tokens,
321 $self->next_token(), # subject
322 $self->optional_newlines(),
323 $self->expect('in'),
324 $self->optional_newlines());
325 while (1) {
326 my $token = $self->peek();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000327 last unless defined($token) && $token->[0] ne 'esac';
Eric Sunshine65945542022-09-01 00:29:41 +0000328 push(@tokens,
329 $self->parse_case_pattern(),
330 $self->optional_newlines(),
331 $self->parse(qr/^(?:;;|esac)$/)); # item body
332 $token = $self->peek();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000333 last unless defined($token) && $token->[0] ne 'esac';
Eric Sunshine65945542022-09-01 00:29:41 +0000334 push(@tokens,
335 $self->expect(';;'),
336 $self->optional_newlines());
337 }
338 push(@tokens, $self->expect('esac'));
339 return @tokens;
340}
341
342sub parse_for {
343 my $self = shift @_;
344 my @tokens;
345 push(@tokens,
346 $self->next_token(), # variable
347 $self->optional_newlines());
348 my $token = $self->peek();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000349 if (defined($token) && $token->[0] eq 'in') {
Eric Sunshine65945542022-09-01 00:29:41 +0000350 push(@tokens,
351 $self->expect('in'),
352 $self->optional_newlines());
353 }
354 push(@tokens,
355 $self->parse(qr/^do$/), # items
356 $self->expect('do'),
357 $self->optional_newlines(),
358 $self->parse_loop_body(),
359 $self->expect('done'));
360 return @tokens;
361}
362
363sub parse_if {
364 my $self = shift @_;
365 my @tokens;
366 while (1) {
367 push(@tokens,
368 $self->parse(qr/^then$/), # if/elif condition
369 $self->expect('then'),
370 $self->optional_newlines(),
371 $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
372 my $token = $self->peek();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000373 last unless defined($token) && $token->[0] eq 'elif';
Eric Sunshine65945542022-09-01 00:29:41 +0000374 push(@tokens, $self->expect('elif'));
375 }
376 my $token = $self->peek();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000377 if (defined($token) && $token->[0] eq 'else') {
Eric Sunshine65945542022-09-01 00:29:41 +0000378 push(@tokens,
379 $self->expect('else'),
380 $self->optional_newlines(),
381 $self->parse(qr/^fi$/)); # else body
382 }
383 push(@tokens, $self->expect('fi'));
384 return @tokens;
385}
386
387sub parse_loop_body {
388 my $self = shift @_;
389 return $self->parse(qr/^done$/);
390}
391
392sub parse_loop {
393 my $self = shift @_;
394 return ($self->parse(qr/^do$/), # condition
395 $self->expect('do'),
396 $self->optional_newlines(),
397 $self->parse_loop_body(),
398 $self->expect('done'));
399}
400
401sub parse_func {
402 my $self = shift @_;
403 return ($self->expect('('),
404 $self->expect(')'),
405 $self->optional_newlines(),
406 $self->parse_cmd()); # body
407}
408
409sub parse_bash_array_assignment {
410 my $self = shift @_;
411 my @tokens = $self->expect('(');
412 while (defined(my $token = $self->next_token())) {
413 push(@tokens, $token);
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000414 last if $token->[0] eq ')';
Eric Sunshine65945542022-09-01 00:29:41 +0000415 }
416 return @tokens;
417}
418
419my %compound = (
420 '{' => \&parse_group,
421 '(' => \&parse_subshell,
422 'case' => \&parse_case,
423 'for' => \&parse_for,
424 'if' => \&parse_if,
425 'until' => \&parse_loop,
426 'while' => \&parse_loop);
427
428sub parse_cmd {
429 my $self = shift @_;
430 my $cmd = $self->next_token();
431 return () unless defined($cmd);
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000432 return $cmd if $cmd->[0] eq "\n";
Eric Sunshine65945542022-09-01 00:29:41 +0000433
434 my $token;
435 my @tokens = $cmd;
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000436 if ($cmd->[0] eq '!') {
Eric Sunshine65945542022-09-01 00:29:41 +0000437 push(@tokens, $self->parse_cmd());
438 return @tokens;
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000439 } elsif (my $f = $compound{$cmd->[0]}) {
Eric Sunshine65945542022-09-01 00:29:41 +0000440 push(@tokens, $self->$f());
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000441 } elsif (defined($token = $self->peek()) && $token->[0] eq '(') {
442 if ($cmd->[0] !~ /\w=$/) {
Eric Sunshine65945542022-09-01 00:29:41 +0000443 push(@tokens, $self->parse_func());
444 return @tokens;
445 }
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000446 my @array = $self->parse_bash_array_assignment();
447 $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array);
448 $tokens[-1]->[2] = $array[$#array][2] if @array;
Eric Sunshine65945542022-09-01 00:29:41 +0000449 }
450
451 while (defined(my $token = $self->next_token())) {
452 $self->untoken($token), last if $self->stop_at($token);
453 push(@tokens, $token);
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000454 last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
Eric Sunshine65945542022-09-01 00:29:41 +0000455 }
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000456 push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n";
Eric Sunshine65945542022-09-01 00:29:41 +0000457 return @tokens;
458}
459
460sub accumulate {
461 my ($self, $tokens, $cmd) = @_;
462 push(@$tokens, @$cmd);
463}
464
465sub parse {
466 my ($self, $stop) = @_;
467 push(@{$self->{stop}}, $stop);
468 goto DONE if $self->stop_at($self->peek());
469 my @tokens;
470 while (my @cmd = $self->parse_cmd()) {
471 $self->accumulate(\@tokens, \@cmd);
472 last if $self->stop_at($self->peek());
473 }
474DONE:
475 pop(@{$self->{stop}});
476 return @tokens;
477}
478
Eric Sunshine6d932e92022-09-01 00:29:42 +0000479# TestParser is a subclass of ShellParser which, beyond parsing shell script
480# code, is also imbued with semantic knowledge of test construction, and checks
481# tests for common problems (such as broken &&-chains) which might hide bugs in
482# the tests themselves or in behaviors being exercised by the tests. As such,
483# TestParser is only called upon to parse test bodies, not the top-level
484# scripts in which the tests are defined.
485package TestParser;
486
487use base 'ShellParser';
488
Eric Sunshine73c768d2022-11-08 19:08:30 +0000489sub new {
490 my $class = shift @_;
491 my $self = $class->SUPER::new(@_);
492 $self->{problems} = [];
493 return $self;
494}
495
Eric Sunshine6d932e92022-09-01 00:29:42 +0000496sub find_non_nl {
497 my $tokens = shift @_;
498 my $n = shift @_;
499 $n = $#$tokens if !defined($n);
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000500 $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n";
Eric Sunshine6d932e92022-09-01 00:29:42 +0000501 return $n;
502}
503
504sub ends_with {
505 my ($tokens, $needles) = @_;
506 my $n = find_non_nl($tokens);
507 for my $needle (reverse(@$needles)) {
508 return undef if $n < 0;
509 $n = find_non_nl($tokens, $n), next if $needle eq "\n";
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000510 return undef if $$tokens[$n]->[0] !~ $needle;
Eric Sunshine6d932e92022-09-01 00:29:42 +0000511 $n--;
512 }
513 return 1;
514}
515
Eric Sunshine35ebb1e2022-09-01 00:29:45 +0000516sub match_ending {
517 my ($tokens, $endings) = @_;
518 for my $needles (@$endings) {
519 next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
520 return 1 if ends_with($tokens, $needles);
521 }
522 return undef;
523}
524
Eric Sunshinefd4094c2022-09-01 00:29:50 +0000525sub parse_loop_body {
526 my $self = shift @_;
527 my @tokens = $self->SUPER::parse_loop_body(@_);
528 # did loop signal failure via "|| return" or "|| exit"?
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000529 return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens;
Eric Sunshineae0c55a2022-09-01 00:29:51 +0000530 # did loop upstream of a pipe signal failure via "|| echo 'impossible
531 # text'" as the final command in the loop body?
532 return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
Eric Sunshinefd4094c2022-09-01 00:29:50 +0000533 # flag missing "return/exit" handling explicit failure in loop body
534 my $n = find_non_nl(\@tokens);
Eric Sunshinee44f15b2024-09-10 00:10:12 -0400535 push(@{$self->{problems}}, [$self->{insubshell} ? 'LOOPEXIT' : 'LOOPRETURN', $tokens[$n]]);
Eric Sunshinefd4094c2022-09-01 00:29:50 +0000536 return @tokens;
537}
538
Eric Sunshine35ebb1e2022-09-01 00:29:45 +0000539my @safe_endings = (
Eric Sunshineaabc3252022-09-01 00:29:47 +0000540 [qr/^(?:&&|\|\||\||&)$/],
Eric Sunshine35ebb1e2022-09-01 00:29:45 +0000541 [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
542 [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
543 [qr/^(?:exit|return|continue)$/],
544 [qr/^(?:exit|return|continue)$/, qr/^;$/]);
545
Eric Sunshine6d932e92022-09-01 00:29:42 +0000546sub accumulate {
547 my ($self, $tokens, $cmd) = @_;
Eric Sunshine73c768d2022-11-08 19:08:30 +0000548 my $problems = $self->{problems};
Eric Sunshinec90d81f2022-11-08 19:08:27 +0000549
550 # no previous command to check for missing "&&"
Eric Sunshine6d932e92022-09-01 00:29:42 +0000551 goto DONE unless @$tokens;
Eric Sunshinec90d81f2022-11-08 19:08:27 +0000552
553 # new command is empty line; can't yet check if previous is missing "&&"
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000554 goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n";
Eric Sunshine6d932e92022-09-01 00:29:42 +0000555
Eric Sunshine35ebb1e2022-09-01 00:29:45 +0000556 # did previous command end with "&&", "|", "|| return" or similar?
557 goto DONE if match_ending($tokens, \@safe_endings);
Eric Sunshine6d932e92022-09-01 00:29:42 +0000558
Eric Sunshinea8f30ee2022-09-01 00:29:48 +0000559 # if this command handles "$?" specially, then okay for previous
560 # command to be missing "&&"
561 for my $token (@$cmd) {
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000562 goto DONE if $token->[0] =~ /\$\?/;
Eric Sunshinea8f30ee2022-09-01 00:29:48 +0000563 }
564
Eric Sunshine832c68b2022-09-01 00:29:49 +0000565 # if this command is "false", "return 1", or "exit 1" (which signal
566 # failure explicitly), then okay for all preceding commands to be
567 # missing "&&"
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000568 if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
Eric Sunshine73c768d2022-11-08 19:08:30 +0000569 @$problems = grep {$_->[0] ne 'AMP'} @$problems;
Eric Sunshine832c68b2022-09-01 00:29:49 +0000570 goto DONE;
571 }
572
Eric Sunshine6d932e92022-09-01 00:29:42 +0000573 # flag missing "&&" at end of previous command
574 my $n = find_non_nl($tokens);
Eric Sunshine73c768d2022-11-08 19:08:30 +0000575 push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
Eric Sunshine6d932e92022-09-01 00:29:42 +0000576
577DONE:
578 $self->SUPER::accumulate($tokens, $cmd);
579}
580
Eric Sunshined99ebd62022-09-01 00:29:43 +0000581# ScriptParser is a subclass of ShellParser which identifies individual test
582# definitions within test scripts, and passes each test body through TestParser
583# to identify possible problems. ShellParser detects test definitions not only
584# at the top-level of test scripts but also within compound commands such as
585# loops and function definitions.
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000586package ScriptParser;
587
Eric Sunshined99ebd62022-09-01 00:29:43 +0000588use base 'ShellParser';
589
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000590sub new {
591 my $class = shift @_;
Eric Sunshined99ebd62022-09-01 00:29:43 +0000592 my $self = $class->SUPER::new(@_);
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000593 $self->{ntests} = 0;
Eric Sunshine588ef842024-09-10 00:10:11 -0400594 $self->{nerrs} = 0;
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000595 return $self;
596}
597
Eric Sunshined99ebd62022-09-01 00:29:43 +0000598# extract the raw content of a token, which may be a single string or a
599# composition of multiple strings and non-string character runs; for instance,
600# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
601sub unwrap {
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000602 my $token = (@_ ? shift @_ : $_)->[0];
Eric Sunshined99ebd62022-09-01 00:29:43 +0000603 # simple case: 'sqstring' or "dqstring"
604 return $token if $token =~ s/^'([^']*)'$/$1/;
605 return $token if $token =~ s/^"([^"]*)"$/$1/;
606
607 # composite case
608 my ($s, $q, $escaped);
609 while (1) {
610 # slurp up non-special characters
611 $s .= $1 if $token =~ /\G([^\\'"]*)/gc;
612 # handle special characters
613 last unless $token =~ /\G(.)/sgc;
614 my $c = $1;
615 $q = undef, next if defined($q) && $c eq $q;
616 $q = $c, next if !defined($q) && $c =~ /^['"]$/;
617 if ($c eq '\\') {
618 last unless $token =~ /\G(.)/sgc;
619 $c = $1;
620 $s .= '\\' if $c eq "\n"; # preserve line splice
621 }
622 $s .= $c;
623 }
624 return $s
625}
626
Eric Sunshinee44f15b2024-09-10 00:10:12 -0400627sub format_problem {
628 local $_ = shift;
629 /^AMP$/ && return "missing '&&'";
630 /^LOOPRETURN$/ && return "missing '|| return 1'";
631 /^LOOPEXIT$/ && return "missing '|| exit 1'";
632 /^HEREDOC$/ && return 'unclosed heredoc';
633 die("unrecognized problem type '$_'\n");
634}
635
Eric Sunshined99ebd62022-09-01 00:29:43 +0000636sub check_test {
637 my $self = shift @_;
Eric Sunshinea4a5f282024-07-10 04:38:31 -0400638 my $title = unwrap(shift @_);
639 my $body = shift @_;
640 my $lineno = $body->[3];
641 $body = unwrap($body);
642 if ($body eq '-') {
643 my $herebody = shift @_;
644 $body = $herebody->{content};
645 $lineno = $herebody->{start_line};
646 }
Eric Sunshined99ebd62022-09-01 00:29:43 +0000647 $self->{ntests}++;
648 my $parser = TestParser->new(\$body);
649 my @tokens = $parser->parse();
Eric Sunshine73c768d2022-11-08 19:08:30 +0000650 my $problems = $parser->{problems};
Eric Sunshine588ef842024-09-10 00:10:11 -0400651 $self->{nerrs} += @$problems;
Eric Sunshine73c768d2022-11-08 19:08:30 +0000652 return unless $emit_all || @$problems;
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000653 my $c = main::fd_colors(1);
Eric Sunshinea13ff412024-09-10 00:10:13 -0400654 my ($erropen, $errclose) = -t 1 ? ("$c->{rev}$c->{red}", $c->{reset}) : ('?!', '?!');
Eric Sunshine73c768d2022-11-08 19:08:30 +0000655 my $start = 0;
656 my $checked = '';
657 for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
658 my ($label, $token) = @$_;
659 my $pos = $token->[2];
Eric Sunshinee44f15b2024-09-10 00:10:12 -0400660 my $err = format_problem($label);
Eric Sunshine588ef842024-09-10 00:10:11 -0400661 $checked .= substr($body, $start, $pos - $start);
662 $checked .= ' ' unless $checked =~ /\s$/;
Eric Sunshinea13ff412024-09-10 00:10:13 -0400663 $checked .= "${erropen}LINT: $err$errclose";
Eric Sunshine588ef842024-09-10 00:10:11 -0400664 $checked .= ' ' unless $pos >= length($body) ||
665 substr($body, $pos, 1) =~ /^\s/;
Eric Sunshine73c768d2022-11-08 19:08:30 +0000666 $start = $pos;
667 }
668 $checked .= substr($body, $start);
Eric Sunshine48d69d82022-11-11 07:34:54 +0000669 $checked =~ s/^/$lineno++ . ' '/mge;
670 $checked =~ s/^\d+ \n//;
Eric Sunshine48d69d82022-11-11 07:34:54 +0000671 $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg;
Eric Sunshined99ebd62022-09-01 00:29:43 +0000672 $checked .= "\n" unless $checked =~ /\n$/;
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000673 push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
Eric Sunshined99ebd62022-09-01 00:29:43 +0000674}
675
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000676sub parse_cmd {
Eric Sunshined99ebd62022-09-01 00:29:43 +0000677 my $self = shift @_;
678 my @tokens = $self->SUPER::parse_cmd();
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000679 return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/;
Eric Sunshined99ebd62022-09-01 00:29:43 +0000680 my $n = $#tokens;
Eric Sunshine5f0321a2022-11-08 19:08:29 +0000681 $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
Eric Sunshinea4a5f282024-07-10 04:38:31 -0400682 my $herebody;
683 if ($n >= 2 && $tokens[$n-1]->[0] eq '-' && $tokens[$n]->[0] =~ /^<<-?(.+)$/) {
684 $herebody = $self->{heredocs}->{$1};
685 $n--;
686 }
687 $self->check_test($tokens[1], $tokens[2], $herebody) if $n == 2; # title body
688 $self->check_test($tokens[2], $tokens[3], $herebody) if $n > 2; # prereq title body
Eric Sunshined99ebd62022-09-01 00:29:43 +0000689 return @tokens;
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000690}
691
692# main contains high-level functionality for processing command-line switches,
693# feeding input test scripts to ScriptParser, and reporting results.
694package main;
695
696my $getnow = sub { return time(); };
697my $interval = sub { return time() - shift; };
698if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
699 $getnow = sub { return [Time::HiRes::gettimeofday()]; };
700 $interval = sub { return Time::HiRes::tv_interval(shift); };
701}
702
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000703# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
704# outside of get_colors() since under 'ithreads' all threads use %ENV of main
705# thread and ignore %ENV changes in subthreads.
706$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
707
Eric Sunshine48d69d82022-11-11 07:34:54 +0000708my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => '');
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000709my %COLORS = ();
710sub get_colors {
711 return \%COLORS if %COLORS;
Eric Sunshine54518772022-11-11 07:34:52 +0000712 if (exists($ENV{NO_COLOR})) {
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000713 %COLORS = @NOCOLORS;
714 return \%COLORS;
715 }
Eric Sunshine54518772022-11-11 07:34:52 +0000716 if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) {
717 %COLORS = (bold => "\e[1m",
718 rev => "\e[7m",
Eric Sunshine48d69d82022-11-11 07:34:54 +0000719 dim => "\e[2m",
Eric Sunshine54518772022-11-11 07:34:52 +0000720 reset => "\e[0m",
721 blue => "\e[34m",
722 green => "\e[32m",
723 red => "\e[31m");
724 return \%COLORS;
725 }
726 if (system("tput sgr0 >/dev/null 2>&1") == 0 &&
727 system("tput bold >/dev/null 2>&1") == 0 &&
728 system("tput rev >/dev/null 2>&1") == 0 &&
Eric Sunshine48d69d82022-11-11 07:34:54 +0000729 system("tput dim >/dev/null 2>&1") == 0 &&
Eric Sunshine54518772022-11-11 07:34:52 +0000730 system("tput setaf 1 >/dev/null 2>&1") == 0) {
731 %COLORS = (bold => `tput bold`,
732 rev => `tput rev`,
Eric Sunshine48d69d82022-11-11 07:34:54 +0000733 dim => `tput dim`,
Eric Sunshine54518772022-11-11 07:34:52 +0000734 reset => `tput sgr0`,
735 blue => `tput setaf 4`,
736 green => `tput setaf 2`,
737 red => `tput setaf 1`);
738 return \%COLORS;
739 }
740 %COLORS = @NOCOLORS;
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000741 return \%COLORS;
742}
743
744my %FD_COLORS = ();
745sub fd_colors {
746 my $fd = shift;
747 return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
748 $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
749 return $FD_COLORS{$fd};
750}
751
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000752sub ncores {
753 # Windows
Eric Sunshine037348e2024-05-20 15:01:29 -0400754 if (exists($ENV{NUMBER_OF_PROCESSORS})) {
755 my $ncpu = $ENV{NUMBER_OF_PROCESSORS};
756 return $ncpu > 0 ? $ncpu : 1;
757 }
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000758 # Linux / MSYS2 / Cygwin / WSL
Eric Sunshine037348e2024-05-20 15:01:29 -0400759 if (open my $fh, '<', '/proc/cpuinfo') {
760 my $cpuinfo = do { local $/; <$fh> };
761 close($fh);
Eric Sunshine2e7e9202024-05-20 15:01:31 -0400762 if ($cpuinfo =~ /^n?cpus active\s*:\s*(\d+)/m) {
763 return $1 if $1 > 0;
764 }
John Paul Adrian Glaubitz45db5ed2024-05-20 15:01:30 -0400765 my @matches = ($cpuinfo =~ /^(processor|CPU)[\s\d]*:/mg);
Eric Sunshine037348e2024-05-20 15:01:29 -0400766 return @matches ? scalar(@matches) : 1;
767 }
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000768 # macOS & BSD
Eric Sunshine037348e2024-05-20 15:01:29 -0400769 if ($^O =~ /(?:^darwin$|bsd)/) {
770 my $ncpu = qx/sysctl -n hw.ncpu/;
771 return $ncpu > 0 ? $ncpu : 1;
772 }
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000773 return 1;
774}
775
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000776sub show_stats {
777 my ($start_time, $stats) = @_;
778 my $walltime = $interval->($start_time);
779 my ($usertime) = times();
780 my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000781 my $c = fd_colors(2);
782 print(STDERR $c->{green});
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000783 for (@$stats) {
784 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
785 print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
786 $total_workers++;
787 $total_scripts += $nscripts;
788 $total_tests += $ntests;
789 $total_errs += $nerrs;
790 }
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000791 printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000792}
793
794sub check_script {
795 my ($id, $next_script, $emit) = @_;
796 my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
797 while (my $path = $next_script->()) {
798 $nscripts++;
799 my $fh;
Jeff King382f6ed2024-07-10 04:37:30 -0400800 unless (open($fh, "<:unix:crlf", $path)) {
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000801 $emit->("?!ERR?! $path: $!\n");
802 next;
803 }
804 my $s = do { local $/; <$fh> };
805 close($fh);
806 my $parser = ScriptParser->new(\$s);
807 1 while $parser->parse_cmd();
808 if (@{$parser->{output}}) {
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000809 my $c = fd_colors(1);
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000810 my $s = join('', @{$parser->{output}});
Eric Sunshine7c04aa72022-09-13 04:01:47 +0000811 $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000812 }
813 $ntests += $parser->{ntests};
Eric Sunshine588ef842024-09-10 00:10:11 -0400814 $nerrs += $parser->{nerrs};
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000815 }
816 return [$id, $nscripts, $ntests, $nerrs];
817}
818
819sub exit_code {
820 my $stats = shift @_;
821 for (@$stats) {
822 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
823 return 1 if $nerrs;
824 }
825 return 0;
826}
827
828Getopt::Long::Configure(qw{bundling});
829GetOptions(
830 "emit-all!" => \$emit_all,
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000831 "jobs|j=i" => \$jobs,
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000832 "stats|show-stats!" => \$show_stats) or die("option error\n");
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000833$jobs = ncores() if $jobs < 1;
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000834
835my $start_time = $getnow->();
836my @stats;
837
838my @scripts;
839push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
840unless (@scripts) {
841 show_stats($start_time, \@stats) if $show_stats;
842 exit;
843}
Jeff Kingd5585092024-07-10 04:35:57 -0400844$jobs = @scripts if @scripts < $jobs;
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000845
Jeff Kinga7c1c102024-07-10 04:35:13 -0400846unless ($jobs > 1 &&
847 $Config{useithreads} && eval {
Eric Sunshine29fb2ec2022-09-01 00:29:44 +0000848 require threads; threads->import();
849 require Thread::Queue; Thread::Queue->import();
850 1;
851 }) {
852 push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
853 show_stats($start_time, \@stats) if $show_stats;
854 exit(exit_code(\@stats));
855}
856
857my $script_queue = Thread::Queue->new();
858my $output_queue = Thread::Queue->new();
859
860sub next_script { return $script_queue->dequeue(); }
861sub emit { $output_queue->enqueue(@_); }
862
863sub monitor {
864 while (my $s = $output_queue->dequeue()) {
865 print($s);
866 }
867}
868
869my $mon = threads->create({'context' => 'void'}, \&monitor);
870threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
871
872$script_queue->enqueue(@scripts);
873$script_queue->end();
874
875for (threads->list()) {
876 push(@stats, $_->join()) unless $_ == $mon;
877}
878
879$output_queue->end();
880$mon->join();
881
Eric Sunshineb4f25b02022-09-01 00:29:39 +0000882show_stats($start_time, \@stats) if $show_stats;
883exit(exit_code(\@stats));