blob: ac34bfeafb3c911aa6918d4f90deeca5f8da3bee [file] [log] [blame]
Elijah Newren4f6728d2023-03-21 06:25:56 +00001#include "git-compat-util.h"
Brandon Williamsb2141fc2017-06-14 11:07:36 -07002#include "config.h"
Elijah Newrenf394e092023-03-21 06:25:54 +00003#include "gettext.h"
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07004#include "grep.h"
Elijah Newren41771fa2023-02-24 00:09:27 +00005#include "hex.h"
Elijah Newrena034e912023-05-16 06:34:06 +00006#include "object-store-ll.h"
Elijah Newrend4a4f922023-04-22 20:17:26 +00007#include "pretty.h"
René Scharfe60ecac92009-07-02 00:07:24 +02008#include "userdiff.h"
Johannes Schindelin6bfce932007-06-05 03:36:11 +01009#include "xdiff-interface.h"
Jeff King335ec3b2013-05-10 17:10:15 +020010#include "diff.h"
11#include "diffcore.h"
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +020012#include "quote.h"
Nguyễn Thái Ngọc Duy3ac68a92018-05-26 15:55:24 +020013#include "help.h"
Junio C Hamano83b5d2f2006-09-17 16:02:52 -070014
Junio C Hamano07a7d652012-09-15 14:04:36 -070015static int grep_source_load(struct grep_source *gs);
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +020016static int grep_source_is_binary(struct grep_source *gs,
17 struct index_state *istate);
Junio C Hamano07a7d652012-09-15 14:04:36 -070018
Jeff Kingbcba4462023-08-29 19:45:27 -040019static void std_output(struct grep_opt *opt UNUSED, const void *buf, size_t size)
Martin Ågren96313422020-11-21 19:31:08 +010020{
21 fwrite(buf, size, 1, stdout);
22}
23
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +020024static const char *color_grep_slots[] = {
25 [GREP_COLOR_CONTEXT] = "context",
26 [GREP_COLOR_FILENAME] = "filename",
27 [GREP_COLOR_FUNCTION] = "function",
28 [GREP_COLOR_LINENO] = "lineNumber",
Junio C Hamanod036d662018-07-18 12:20:31 -070029 [GREP_COLOR_COLUMNNO] = "column",
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +020030 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
31 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
32 [GREP_COLOR_SELECTED] = "selected",
33 [GREP_COLOR_SEP] = "separator",
34};
35
Junio C Hamano7687a052012-10-09 16:17:50 -070036static int parse_pattern_type_arg(const char *opt, const char *arg)
37{
38 if (!strcmp(arg, "default"))
39 return GREP_PATTERN_TYPE_UNSPECIFIED;
40 else if (!strcmp(arg, "basic"))
41 return GREP_PATTERN_TYPE_BRE;
42 else if (!strcmp(arg, "extended"))
43 return GREP_PATTERN_TYPE_ERE;
44 else if (!strcmp(arg, "fixed"))
45 return GREP_PATTERN_TYPE_FIXED;
46 else if (!strcmp(arg, "perl"))
47 return GREP_PATTERN_TYPE_PCRE;
48 die("bad %s argument: %s", opt, arg);
49}
50
Nguyễn Thái Ngọc Duy3ac68a92018-05-26 15:55:24 +020051define_list_config_array_extra(color_grep_slots, {"match"});
52
Junio C Hamano7687a052012-10-09 16:17:50 -070053/*
54 * Read the configuration file once and store it in
55 * the grep_defaults template.
56 */
Glen Chooa4e7e312023-06-28 19:26:22 +000057int grep_config(const char *var, const char *value,
58 const struct config_context *ctx, void *cb)
Junio C Hamano7687a052012-10-09 16:17:50 -070059{
Ævar Arnfjörð Bjarmason72365bb2022-02-16 01:00:36 +010060 struct grep_opt *opt = cb;
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +020061 const char *slot;
Junio C Hamano7687a052012-10-09 16:17:50 -070062
63 if (userdiff_config(var, value) < 0)
64 return -1;
65
66 if (!strcmp(var, "grep.extendedregexp")) {
Ævar Arnfjörð Bjarmasonc7e38552017-06-29 22:22:18 +000067 opt->extended_regexp_option = git_config_bool(var, value);
Junio C Hamano7687a052012-10-09 16:17:50 -070068 return 0;
69 }
70
71 if (!strcmp(var, "grep.patterntype")) {
72 opt->pattern_type_option = parse_pattern_type_arg(var, value);
73 return 0;
74 }
75
76 if (!strcmp(var, "grep.linenumber")) {
77 opt->linenum = git_config_bool(var, value);
78 return 0;
79 }
Taylor Blau6653fec2018-06-22 10:49:49 -050080 if (!strcmp(var, "grep.column")) {
81 opt->columnnum = git_config_bool(var, value);
82 return 0;
83 }
Junio C Hamano7687a052012-10-09 16:17:50 -070084
Andreas Schwab6453f7b2014-03-17 20:16:05 +010085 if (!strcmp(var, "grep.fullname")) {
86 opt->relative = !git_config_bool(var, value);
87 return 0;
88 }
89
Junio C Hamano7687a052012-10-09 16:17:50 -070090 if (!strcmp(var, "color.grep"))
91 opt->color = git_config_colorbool(var, value);
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +020092 if (!strcmp(var, "color.grep.match")) {
Glen Chooa4e7e312023-06-28 19:26:22 +000093 if (grep_config("color.grep.matchcontext", value, ctx, cb) < 0)
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +020094 return -1;
Glen Chooa4e7e312023-06-28 19:26:22 +000095 if (grep_config("color.grep.matchselected", value, ctx, cb) < 0)
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +020096 return -1;
97 } else if (skip_prefix(var, "color.grep.", &slot)) {
98 int i = LOOKUP_CONFIG(color_grep_slots, slot);
99 char *color;
Junio C Hamano7687a052012-10-09 16:17:50 -0700100
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +0200101 if (i < 0)
102 return -1;
103 color = opt->colors[i];
Junio C Hamano7687a052012-10-09 16:17:50 -0700104 if (!value)
105 return config_error_nonbool(var);
Jeff Kingf6c5a292014-10-07 15:33:09 -0400106 return color_parse(value, color);
Junio C Hamano7687a052012-10-09 16:17:50 -0700107 }
108 return 0;
109}
110
Ævar Arnfjörð Bjarmason9725c8d2022-02-16 01:00:34 +0100111void grep_init(struct grep_opt *opt, struct repository *repo)
Junio C Hamano7687a052012-10-09 16:17:50 -0700112{
Ævar Arnfjörð Bjarmason72365bb2022-02-16 01:00:36 +0100113 struct grep_opt blank = GREP_OPT_INIT;
114 memcpy(opt, &blank, sizeof(*opt));
Martin Ågren6ba9bb72020-11-29 20:52:21 +0100115
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 17:57:23 +0200116 opt->repo = repo;
Junio C Hamano7687a052012-10-09 16:17:50 -0700117 opt->pattern_tail = &opt->pattern_list;
118 opt->header_tail = &opt->header_list;
Junio C Hamano7687a052012-10-09 16:17:50 -0700119}
Junio C Hamano07a7d652012-09-15 14:04:36 -0700120
René Scharfefc456752012-05-20 16:32:39 +0200121static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
122 const char *origin, int no,
123 enum grep_pat_token t,
124 enum grep_header_field field)
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700125{
126 struct grep_pat *p = xcalloc(1, sizeof(*p));
René Scharfe526a8582012-05-20 16:33:07 +0200127 p->pattern = xmemdupz(pat, patlen);
René Scharfefc456752012-05-20 16:32:39 +0200128 p->patternlen = patlen;
129 p->origin = origin;
130 p->no = no;
131 p->token = t;
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700132 p->field = field;
René Scharfefc456752012-05-20 16:32:39 +0200133 return p;
134}
135
René Scharfe2b3873f2012-05-20 16:32:54 +0200136static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
137{
138 **tail = p;
139 *tail = &p->next;
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700140 p->next = NULL;
René Scharfe526a8582012-05-20 16:33:07 +0200141
142 switch (p->token) {
143 case GREP_PATTERN: /* atom */
144 case GREP_PATTERN_HEAD:
145 case GREP_PATTERN_BODY:
146 for (;;) {
147 struct grep_pat *new_pat;
148 size_t len = 0;
149 char *cp = p->pattern + p->patternlen, *nl = NULL;
150 while (++len <= p->patternlen) {
151 if (*(--cp) == '\n') {
152 nl = cp;
153 break;
154 }
155 }
156 if (!nl)
157 break;
158 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
159 p->no, p->token, p->field);
160 new_pat->next = p->next;
161 if (!p->next)
162 *tail = &new_pat->next;
163 p->next = new_pat;
164 *nl = '\0';
165 p->patternlen -= len;
166 }
167 break;
168 default:
169 break;
170 }
René Scharfe2b3873f2012-05-20 16:32:54 +0200171}
172
René Scharfefc456752012-05-20 16:32:39 +0200173void append_header_grep_pattern(struct grep_opt *opt,
174 enum grep_header_field field, const char *pat)
175{
176 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
177 GREP_PATTERN_HEAD, field);
Junio C Hamanobaa63782012-09-29 11:59:52 -0700178 if (field == GREP_HEADER_REFLOG)
179 opt->use_reflog_filter = 1;
René Scharfe2b3873f2012-05-20 16:32:54 +0200180 do_append_grep_pat(&opt->header_tail, p);
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700181}
182
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700183void append_grep_pattern(struct grep_opt *opt, const char *pat,
184 const char *origin, int no, enum grep_pat_token t)
185{
René Scharfeed40a092010-05-22 23:43:43 +0200186 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
187}
188
189void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
190 const char *origin, int no, enum grep_pat_token t)
191{
René Scharfefc456752012-05-20 16:32:39 +0200192 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
René Scharfe2b3873f2012-05-20 16:32:54 +0200193 do_append_grep_pat(&opt->pattern_tail, p);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700194}
195
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +0100196struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
197{
198 struct grep_pat *pat;
199 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
200 *ret = *opt;
201
202 ret->pattern_list = NULL;
203 ret->pattern_tail = &ret->pattern_list;
204
205 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
206 {
207 if(pat->token == GREP_PATTERN_HEAD)
208 append_header_grep_pattern(ret, pat->field,
209 pat->pattern);
210 else
René Scharfeed40a092010-05-22 23:43:43 +0200211 append_grep_pat(ret, pat->pattern, pat->patternlen,
212 pat->origin, pat->no, pat->token);
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +0100213 }
214
215 return ret;
216}
217
Michał Kiedrowicza30c1482011-05-09 23:52:04 +0200218static NORETURN void compile_regexp_failed(const struct grep_pat *p,
219 const char *error)
220{
221 char where[1024];
222
223 if (p->no)
Jeff King19bdd3e2015-09-24 17:06:51 -0400224 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
Michał Kiedrowicza30c1482011-05-09 23:52:04 +0200225 else if (p->origin)
Jeff King19bdd3e2015-09-24 17:06:51 -0400226 xsnprintf(where, sizeof(where), "%s, ", p->origin);
Michał Kiedrowicza30c1482011-05-09 23:52:04 +0200227 else
228 where[0] = 0;
229
230 die("%s'%s': %s", where, p->pattern, error);
231}
232
Ævar Arnfjörð Bjarmason543f1c02017-05-25 19:45:30 +0000233static int is_fixed(const char *s, size_t len)
234{
235 size_t i;
236
237 for (i = 0; i < len; i++) {
238 if (is_regex_special(s[i]))
239 return 0;
240 }
241
242 return 1;
243}
244
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000245#ifdef USE_LIBPCRE2
Ævar Arnfjörð Bjarmasonc1760352021-02-18 01:07:28 +0100246#define GREP_PCRE2_DEBUG_MALLOC 0
247
248static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
249{
250 void *pointer = malloc(size);
251#if GREP_PCRE2_DEBUG_MALLOC
252 static int count = 1;
253 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
254#endif
255 return pointer;
256}
257
258static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
259{
260#if GREP_PCRE2_DEBUG_MALLOC
261 static int count = 1;
262 if (pointer)
263 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
264#endif
265 free(pointer);
266}
267
Mathias Krause50b6ad52023-01-31 19:56:11 +0100268static int pcre2_jit_functional(void)
269{
270 static int jit_working = -1;
271 pcre2_code *code;
272 size_t off;
273 int err;
274
275 if (jit_working != -1)
276 return jit_working;
277
278 /*
279 * Try to JIT compile a simple pattern to probe if the JIT is
280 * working in general. It might fail for systems where creating
281 * memory mappings for runtime code generation is restricted.
282 */
283 code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL);
284 if (!code)
285 return 0;
286
287 jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0;
288 pcre2_code_free(code);
289
290 return jit_working;
291}
292
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000293static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
294{
295 int error;
296 PCRE2_UCHAR errbuf[256];
297 PCRE2_SIZE erroffset;
298 int options = PCRE2_MULTILINE;
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000299 int jitret;
Ævar Arnfjörð Bjarmasona25b9082017-11-23 14:16:58 +0000300 int patinforet;
301 size_t jitsizearg;
René Scharfe32e3e8b2021-12-18 20:53:15 +0100302 int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000303
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 01:07:27 +0100304 /*
305 * Call pcre2_general_context_create() before calling any
306 * other pcre2_*(). It sets up our malloc()/free() functions
307 * with which everything else is allocated.
308 */
309 p->pcre2_general_context = pcre2_general_context_create(
310 pcre2_malloc, pcre2_free, NULL);
311 if (!p->pcre2_general_context)
312 die("Couldn't allocate PCRE2 general context");
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000313
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000314 if (opt->ignore_case) {
Ævar Arnfjörð Bjarmason44570182019-06-28 01:39:05 +0200315 if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 01:07:27 +0100316 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
317 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
Carlo Marcelo Arenas Belón10da0302019-10-16 12:10:24 +0000318 pcre2_set_character_tables(p->pcre2_compile_context,
319 p->pcre2_tables);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000320 }
321 options |= PCRE2_CASELESS;
322 }
René Scharfe32e3e8b2021-12-18 20:53:15 +0100323 if (!opt->ignore_locale && is_utf8_locale() && !literal)
Carlo Marcelo Arenas Belónacabd202023-01-08 07:52:17 -0800324 options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
Ævar Arnfjörð Bjarmason95ca1f92021-01-24 18:28:13 +0100325
Mathias Krause14b9a042023-03-23 18:25:39 +0100326#ifndef GIT_PCRE2_VERSION_10_35_OR_HIGHER
327 /*
328 * Work around a JIT bug related to invalid Unicode character handling
329 * fixed in 10.35:
330 * https://github.com/PCRE2Project/pcre2/commit/c21bd977547d
331 */
332 options &= ~PCRE2_UCP;
333#endif
334
René Scharfe97169fc2022-02-17 22:14:29 +0100335#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
Ævar Arnfjörð Bjarmason95ca1f92021-01-24 18:28:13 +0100336 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
Ævar Arnfjörð Bjarmason797c3592021-02-18 01:07:24 +0100337 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
338 options |= PCRE2_NO_START_OPTIMIZE;
339#endif
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000340
341 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
342 p->patternlen, options, &error, &erroffset,
343 p->pcre2_compile_context);
344
345 if (p->pcre2_pattern) {
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 01:07:27 +0100346 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000347 if (!p->pcre2_match_data)
348 die("Couldn't allocate PCRE2 match data");
349 } else {
350 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
351 compile_regexp_failed(p, (const char *)&errbuf);
352 }
353
354 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
Ævar Arnfjörð Bjarmason04bef502019-07-26 17:08:11 +0200355 if (p->pcre2_jit_on) {
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000356 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
Mathias Krause50b6ad52023-01-31 19:56:11 +0100357 if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) {
358 /*
359 * Even though pcre2_config(PCRE2_CONFIG_JIT, ...)
360 * indicated JIT support, the library might still
361 * fail to generate JIT code for various reasons,
362 * e.g. when SELinux's 'deny_execmem' or PaX's
363 * MPROTECT prevent creating W|X memory mappings.
364 *
365 * Instead of faling hard, fall back to interpreter
366 * mode, just as if the pattern was prefixed with
367 * '(*NO_JIT)'.
368 */
369 p->pcre2_jit_on = 0;
370 return;
371 } else if (jitret) {
372 int need_clip = p->patternlen > 64;
373 int clip_len = need_clip ? 64 : p->patternlen;
374 die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s",
375 clip_len, p->pattern, need_clip ? "..." : "", jitret,
376 pcre2_jit_functional()
377 ? "\nPerhaps prefix (*NO_JIT) to your pattern?"
378 : "");
379 }
Ævar Arnfjörð Bjarmasona25b9082017-11-23 14:16:58 +0000380
381 /*
382 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
383 * tells us whether the library itself supports JIT,
384 * but to see whether we're going to be actually using
385 * JIT we need to extract PCRE2_INFO_JITSIZE from the
386 * pattern *after* we do pcre2_jit_compile() above.
387 *
388 * This is because if the pattern contains the
389 * (*NO_JIT) verb (see pcre2syntax(3))
390 * pcre2_jit_compile() will exit early with 0. If we
391 * then proceed to call pcre2_jit_match() further down
392 * the line instead of pcre2_match() we'll either
393 * segfault (pre PCRE 10.31) or run into a fatal error
394 * (post PCRE2 10.31)
395 */
396 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
397 if (patinforet)
398 BUG("pcre2_pattern_info() failed: %d", patinforet);
399 if (jitsizearg == 0) {
400 p->pcre2_jit_on = 0;
401 return;
402 }
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000403 }
404}
405
406static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
407 regmatch_t *match, int eflags)
408{
409 int ret, flags = 0;
410 PCRE2_SIZE *ovector;
411 PCRE2_UCHAR errbuf[256];
412
413 if (eflags & REG_NOTBOL)
414 flags |= PCRE2_NOTBOL;
415
416 if (p->pcre2_jit_on)
417 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
418 eol - line, 0, flags, p->pcre2_match_data,
419 NULL);
420 else
421 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
422 eol - line, 0, flags, p->pcre2_match_data,
423 NULL);
424
425 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
426 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
427 die("%s failed with error code %d: %s",
428 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
429 errbuf);
430 }
431 if (ret > 0) {
432 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
433 ret = 0;
434 match->rm_so = (int)ovector[0];
435 match->rm_eo = (int)ovector[1];
436 }
437
438 return ret;
439}
440
441static void free_pcre2_pattern(struct grep_pat *p)
442{
443 pcre2_compile_context_free(p->pcre2_compile_context);
444 pcre2_code_free(p->pcre2_pattern);
445 pcre2_match_data_free(p->pcre2_match_data);
Ævar Arnfjörð Bjarmasonb76bf272021-02-18 01:07:25 +0100446#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 01:07:27 +0100447 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
Ævar Arnfjörð Bjarmasonb76bf272021-02-18 01:07:25 +0100448#else
Carlo Marcelo Arenas Belón10da0302019-10-16 12:10:24 +0000449 free((void *)p->pcre2_tables);
Ævar Arnfjörð Bjarmasonb76bf272021-02-18 01:07:25 +0100450#endif
Ævar Arnfjörð Bjarmasoncbe81e62021-02-18 01:07:27 +0100451 pcre2_general_context_free(p->pcre2_general_context);
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000452}
453#else /* !USE_LIBPCRE2 */
Jeff King4548b012023-08-29 19:45:34 -0400454static void compile_pcre2_pattern(struct grep_pat *p UNUSED,
455 const struct grep_opt *opt UNUSED)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000456{
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000457 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
458}
459
Jeff King4548b012023-08-29 19:45:34 -0400460static int pcre2match(struct grep_pat *p UNUSED, const char *line UNUSED,
461 const char *eol UNUSED, regmatch_t *match UNUSED,
462 int eflags UNUSED)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000463{
464 return 1;
465}
466
Jeff King4548b012023-08-29 19:45:34 -0400467static void free_pcre2_pattern(struct grep_pat *p UNUSED)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000468{
469}
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000470
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200471static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
472{
473 struct strbuf sb = STRBUF_INIT;
474 int err;
Ævar Arnfjörð Bjarmason1ceabab2017-06-29 22:22:22 +0000475 int regflags = 0;
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200476
477 basic_regex_quote_buf(&sb, p->pattern);
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200478 if (opt->ignore_case)
479 regflags |= REG_ICASE;
480 err = regcomp(&p->regexp, sb.buf, regflags);
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200481 strbuf_release(&sb);
482 if (err) {
483 char errbuf[1024];
484 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200485 compile_regexp_failed(p, errbuf);
486 }
487}
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 23:21:00 +0200488#endif /* !USE_LIBPCRE2 */
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200489
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700490static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
491{
René Scharfec8222552009-01-10 00:18:34 +0100492 int err;
Ævar Arnfjörð Bjarmason07a3d412017-06-29 22:22:21 +0000493 int regflags = REG_NEWLINE;
René Scharfec8222552009-01-10 00:18:34 +0100494
Ævar Arnfjörð Bjarmason04bf0522022-02-16 01:00:39 +0100495 if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED)
496 opt->pattern_type_option = (opt->extended_regexp_option
497 ? GREP_PATTERN_TYPE_ERE
498 : GREP_PATTERN_TYPE_BRE);
499
René Scharfed7eb5272009-03-07 13:28:40 +0100500 p->word_regexp = opt->word_regexp;
Brian Collins5183bf62009-11-06 01:22:35 -0800501 p->ignore_case = opt->ignore_case;
Ævar Arnfjörð Bjarmason04bf0522022-02-16 01:00:39 +0100502 p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED;
René Scharfed7eb5272009-03-07 13:28:40 +0100503
Ævar Arnfjörð Bjarmason04bf0522022-02-16 01:00:39 +0100504 if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE &&
Ævar Arnfjörð Bjarmasonae807d72022-02-16 01:00:38 +0100505 memchr(p->pattern, 0, p->patternlen))
Ævar Arnfjörð Bjarmason45d1f372019-07-01 23:20:58 +0200506 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
Fredrik Kuivinen9ecedde2011-08-21 00:42:18 +0200507
Ævar Arnfjörð Bjarmason09872f62019-07-26 17:08:15 +0200508 p->is_fixed = is_fixed(p->pattern, p->patternlen);
Ævar Arnfjörð Bjarmason8a599982019-07-26 17:08:16 +0200509#ifdef USE_LIBPCRE2
510 if (!p->fixed && !p->is_fixed) {
511 const char *no_jit = "(*NO_JIT)";
512 const int no_jit_len = strlen(no_jit);
513 if (starts_with(p->pattern, no_jit) &&
514 is_fixed(p->pattern + no_jit_len,
515 p->patternlen - no_jit_len))
516 p->is_fixed = 1;
517 }
518#endif
Ævar Arnfjörð Bjarmason09872f62019-07-26 17:08:15 +0200519 if (p->fixed || p->is_fixed) {
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 23:21:00 +0200520#ifdef USE_LIBPCRE2
Ævar Arnfjörð Bjarmason09872f62019-07-26 17:08:15 +0200521 if (p->is_fixed) {
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 23:21:00 +0200522 compile_pcre2_pattern(p, opt);
523 } else {
524 /*
525 * E.g. t7811-grep-open.sh relies on the
526 * pattern being restored.
527 */
528 char *old_pattern = p->pattern;
529 size_t old_patternlen = p->patternlen;
530 struct strbuf sb = STRBUF_INIT;
531
532 /*
533 * There is the PCRE2_LITERAL flag, but it's
534 * only in PCRE v2 10.30 and later. Needing to
535 * ifdef our way around that and dealing with
536 * it + PCRE2_MULTILINE being an error is more
537 * complex than just quoting this ourselves.
538 */
539 strbuf_add(&sb, "\\Q", 2);
540 strbuf_add(&sb, p->pattern, p->patternlen);
541 strbuf_add(&sb, "\\E", 2);
542
543 p->pattern = sb.buf;
544 p->patternlen = sb.len;
545 compile_pcre2_pattern(p, opt);
546 p->pattern = old_pattern;
547 p->patternlen = old_patternlen;
548 strbuf_release(&sb);
549 }
550#else /* !USE_LIBPCRE2 */
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200551 compile_fixed_regexp(p, opt);
Ævar Arnfjörð Bjarmasonb65abca2019-07-01 23:21:00 +0200552#endif /* !USE_LIBPCRE2 */
Nguyễn Thái Ngọc Duy793dc672016-06-25 07:22:31 +0200553 return;
Fredrik Kuivinen9ecedde2011-08-21 00:42:18 +0200554 }
René Scharfec8222552009-01-10 00:18:34 +0100555
Ævar Arnfjörð Bjarmason04bf0522022-02-16 01:00:39 +0100556 if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) {
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000557 compile_pcre2_pattern(p, opt);
558 return;
559 }
560
Ævar Arnfjörð Bjarmason07a3d412017-06-29 22:22:21 +0000561 if (p->ignore_case)
562 regflags |= REG_ICASE;
Ævar Arnfjörð Bjarmason04bf0522022-02-16 01:00:39 +0100563 if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE)
Ævar Arnfjörð Bjarmason07a3d412017-06-29 22:22:21 +0000564 regflags |= REG_EXTENDED;
565 err = regcomp(&p->regexp, p->pattern, regflags);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700566 if (err) {
567 char errbuf[1024];
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700568 regerror(err, &p->regexp, errbuf, 1024);
Michał Kiedrowicza30c1482011-05-09 23:52:04 +0200569 compile_regexp_failed(p, errbuf);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700570 }
571}
572
René Scharfee2b15422022-01-06 10:54:19 +0100573static struct grep_expr *grep_not_expr(struct grep_expr *expr)
574{
575 struct grep_expr *z = xcalloc(1, sizeof(*z));
576 z->node = GREP_NODE_NOT;
577 z->u.unary = expr;
578 return z;
579}
580
Taylor Blauf2d27592022-01-06 14:50:12 -0500581static struct grep_expr *grep_binexp(enum grep_expr_node kind,
582 struct grep_expr *left,
583 struct grep_expr *right)
René Scharfe9dbf00b2022-01-06 10:51:00 +0100584{
585 struct grep_expr *z = xcalloc(1, sizeof(*z));
Taylor Blauf2d27592022-01-06 14:50:12 -0500586 z->node = kind;
René Scharfe9dbf00b2022-01-06 10:51:00 +0100587 z->u.binary.left = left;
588 z->u.binary.right = right;
589 return z;
590}
591
Taylor Blauf2d27592022-01-06 14:50:12 -0500592static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
593{
594 return grep_binexp(GREP_NODE_OR, left, right);
595}
596
Taylor Blau0a6adc22022-01-06 14:50:15 -0500597static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right)
598{
599 return grep_binexp(GREP_NODE_AND, left, right);
600}
601
Junio C Hamano0ab7bef2006-09-27 17:50:52 -0700602static struct grep_expr *compile_pattern_or(struct grep_pat **);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700603static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
604{
605 struct grep_pat *p;
606 struct grep_expr *x;
607
608 p = *list;
Linus Torvaldsc922b012009-04-27 11:10:24 -0700609 if (!p)
610 return NULL;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700611 switch (p->token) {
612 case GREP_PATTERN: /* atom */
Junio C Hamano480c1ca2006-09-20 12:39:46 -0700613 case GREP_PATTERN_HEAD:
614 case GREP_PATTERN_BODY:
René Scharfeca56dad2021-03-13 17:17:22 +0100615 CALLOC_ARRAY(x, 1);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700616 x->node = GREP_NODE_ATOM;
617 x->u.atom = p;
618 *list = p->next;
619 return x;
620 case GREP_OPEN_PAREN:
621 *list = p->next;
Junio C Hamano0ab7bef2006-09-27 17:50:52 -0700622 x = compile_pattern_or(list);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700623 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
Ahelenia Ziemiańska0d527842024-03-23 14:18:08 +0100624 die("unmatched ( for expression group");
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700625 *list = (*list)->next;
626 return x;
627 default:
628 return NULL;
629 }
630}
631
632static struct grep_expr *compile_pattern_not(struct grep_pat **list)
633{
634 struct grep_pat *p;
635 struct grep_expr *x;
636
637 p = *list;
Linus Torvaldsc922b012009-04-27 11:10:24 -0700638 if (!p)
639 return NULL;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700640 switch (p->token) {
641 case GREP_NOT:
642 if (!p->next)
643 die("--not not followed by pattern expression");
644 *list = p->next;
René Scharfee2b15422022-01-06 10:54:19 +0100645 x = compile_pattern_not(list);
646 if (!x)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700647 die("--not followed by non pattern expression");
René Scharfee2b15422022-01-06 10:54:19 +0100648 return grep_not_expr(x);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700649 default:
650 return compile_pattern_atom(list);
651 }
652}
653
654static struct grep_expr *compile_pattern_and(struct grep_pat **list)
655{
656 struct grep_pat *p;
Taylor Blau0a6adc22022-01-06 14:50:15 -0500657 struct grep_expr *x, *y;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700658
659 x = compile_pattern_not(list);
660 p = *list;
661 if (p && p->token == GREP_AND) {
René Scharfefe7fe622021-06-30 18:12:43 +0200662 if (!x)
663 die("--and not preceded by pattern expression");
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700664 if (!p->next)
665 die("--and not followed by pattern expression");
666 *list = p->next;
667 y = compile_pattern_and(list);
668 if (!y)
669 die("--and not followed by pattern expression");
Taylor Blau0a6adc22022-01-06 14:50:15 -0500670 return grep_and_expr(x, y);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700671 }
672 return x;
673}
674
675static struct grep_expr *compile_pattern_or(struct grep_pat **list)
676{
677 struct grep_pat *p;
René Scharfe9dbf00b2022-01-06 10:51:00 +0100678 struct grep_expr *x, *y;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700679
680 x = compile_pattern_and(list);
681 p = *list;
682 if (x && p && p->token != GREP_CLOSE_PAREN) {
683 y = compile_pattern_or(list);
684 if (!y)
685 die("not a pattern expression %s", p->pattern);
René Scharfe9dbf00b2022-01-06 10:51:00 +0100686 return grep_or_expr(x, y);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700687 }
688 return x;
689}
690
691static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
692{
693 return compile_pattern_or(list);
694}
695
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700696static struct grep_expr *grep_true_expr(void)
697{
698 struct grep_expr *z = xcalloc(1, sizeof(*z));
699 z->node = GREP_NODE_TRUE;
700 return z;
701}
702
Junio C Hamano95ce9ce2010-09-12 19:30:48 -0700703static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
704{
705 struct grep_pat *p;
706 struct grep_expr *header_expr;
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700707 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
708 enum grep_header_field fld;
Junio C Hamano95ce9ce2010-09-12 19:30:48 -0700709
710 if (!opt->header_list)
711 return NULL;
Angus Hammond2385f242012-05-06 19:17:15 +0100712
Junio C Hamano95ce9ce2010-09-12 19:30:48 -0700713 for (p = opt->header_list; p; p = p->next) {
714 if (p->token != GREP_PATTERN_HEAD)
Johannes Schindelin033abf92018-05-02 11:38:39 +0200715 BUG("a non-header pattern in grep header list.");
Antoine Pelisse3ce3ffb2013-02-03 14:37:09 +0000716 if (p->field < GREP_HEADER_FIELD_MIN ||
717 GREP_HEADER_FIELD_MAX <= p->field)
Johannes Schindelin033abf92018-05-02 11:38:39 +0200718 BUG("unknown header field %d", p->field);
Junio C Hamano95ce9ce2010-09-12 19:30:48 -0700719 compile_regexp(p, opt);
720 }
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700721
722 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
723 header_group[fld] = NULL;
724
725 for (p = opt->header_list; p; p = p->next) {
726 struct grep_expr *h;
727 struct grep_pat *pp = p;
728
729 h = compile_pattern_atom(&pp);
730 if (!h || pp != p->next)
Johannes Schindelin033abf92018-05-02 11:38:39 +0200731 BUG("malformed header expr");
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700732 if (!header_group[p->field]) {
733 header_group[p->field] = h;
734 continue;
735 }
736 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
737 }
738
739 header_expr = NULL;
740
741 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
742 if (!header_group[fld])
743 continue;
744 if (!header_expr)
745 header_expr = grep_true_expr();
746 header_expr = grep_or_expr(header_group[fld], header_expr);
747 }
Junio C Hamano95ce9ce2010-09-12 19:30:48 -0700748 return header_expr;
749}
750
Junio C Hamano13e4fc72012-09-13 16:26:57 -0700751static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
752{
753 struct grep_expr *z = x;
754
755 while (x) {
756 assert(x->node == GREP_NODE_OR);
757 if (x->u.binary.right &&
758 x->u.binary.right->node == GREP_NODE_TRUE) {
759 x->u.binary.right = y;
760 break;
761 }
762 x = x->u.binary.right;
763 }
764 return z;
765}
766
Ævar Arnfjörð Bjarmason15c96492021-01-26 00:36:51 +0100767void compile_grep_patterns(struct grep_opt *opt)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700768{
769 struct grep_pat *p;
Junio C Hamano95ce9ce2010-09-12 19:30:48 -0700770 struct grep_expr *header_expr = prep_header_patterns(opt);
Ævar Arnfjörð Bjarmasondb843762022-10-11 11:48:45 +0200771 int extended = 0;
Junio C Hamano0ab7bef2006-09-27 17:50:52 -0700772
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700773 for (p = opt->pattern_list; p; p = p->next) {
Junio C Hamano480c1ca2006-09-20 12:39:46 -0700774 switch (p->token) {
775 case GREP_PATTERN: /* atom */
776 case GREP_PATTERN_HEAD:
777 case GREP_PATTERN_BODY:
René Scharfec8222552009-01-10 00:18:34 +0100778 compile_regexp(p, opt);
Junio C Hamano480c1ca2006-09-20 12:39:46 -0700779 break;
780 default:
Ævar Arnfjörð Bjarmasondb843762022-10-11 11:48:45 +0200781 extended = 1;
Junio C Hamano480c1ca2006-09-20 12:39:46 -0700782 break;
783 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700784 }
785
René Scharfe794c0002021-12-17 17:48:49 +0100786 if (opt->all_match || opt->no_body_match || header_expr)
Ævar Arnfjörð Bjarmasondb843762022-10-11 11:48:45 +0200787 extended = 1;
788 else if (!extended)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700789 return;
790
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700791 p = opt->pattern_list;
Michele Ballabioba150a32009-03-18 21:53:27 +0100792 if (p)
793 opt->pattern_expression = compile_pattern_expr(&p);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700794 if (p)
Ahelenia Ziemiańska0d527842024-03-23 14:18:08 +0100795 die("incomplete pattern expression group: %s", p->pattern);
Junio C Hamano80235ba2010-01-17 20:09:06 -0800796
René Scharfe794c0002021-12-17 17:48:49 +0100797 if (opt->no_body_match && opt->pattern_expression)
798 opt->pattern_expression = grep_not_expr(opt->pattern_expression);
799
Junio C Hamano80235ba2010-01-17 20:09:06 -0800800 if (!header_expr)
801 return;
802
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700803 if (!opt->pattern_expression)
Junio C Hamano80235ba2010-01-17 20:09:06 -0800804 opt->pattern_expression = header_expr;
Junio C Hamano13e4fc72012-09-13 16:26:57 -0700805 else if (opt->all_match)
806 opt->pattern_expression = grep_splice_or(header_expr,
807 opt->pattern_expression);
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700808 else
809 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
810 header_expr);
Junio C Hamano80235ba2010-01-17 20:09:06 -0800811 opt->all_match = 1;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700812}
813
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700814static void free_pattern_expr(struct grep_expr *x)
815{
816 switch (x->node) {
Junio C Hamano5aaeb732010-09-12 22:15:35 -0700817 case GREP_NODE_TRUE:
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700818 case GREP_NODE_ATOM:
819 break;
820 case GREP_NODE_NOT:
821 free_pattern_expr(x->u.unary);
822 break;
823 case GREP_NODE_AND:
824 case GREP_NODE_OR:
825 free_pattern_expr(x->u.binary.left);
826 free_pattern_expr(x->u.binary.right);
827 break;
828 }
829 free(x);
830}
831
Ævar Arnfjörð Bjarmason891c9962023-02-07 00:07:50 +0100832static void free_grep_pat(struct grep_pat *pattern)
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700833{
834 struct grep_pat *p, *n;
835
Ævar Arnfjörð Bjarmason891c9962023-02-07 00:07:50 +0100836 for (p = pattern; p; p = n) {
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700837 n = p->next;
838 switch (p->token) {
839 case GREP_PATTERN: /* atom */
840 case GREP_PATTERN_HEAD:
841 case GREP_PATTERN_BODY:
Ævar Arnfjörð Bjarmason75997302021-01-24 02:58:33 +0100842 if (p->pcre2_pattern)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000843 free_pcre2_pattern(p);
Michał Kiedrowicz63e7e9d2011-05-09 23:52:05 +0200844 else
845 regfree(&p->regexp);
René Scharfe526a8582012-05-20 16:33:07 +0200846 free(p->pattern);
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700847 break;
848 default:
849 break;
850 }
851 free(p);
852 }
Ævar Arnfjörð Bjarmason891c9962023-02-07 00:07:50 +0100853}
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700854
Ævar Arnfjörð Bjarmason891c9962023-02-07 00:07:50 +0100855void free_grep_patterns(struct grep_opt *opt)
856{
857 free_grep_pat(opt->pattern_list);
Ævar Arnfjörð Bjarmasonfb2ebe72023-02-07 00:07:51 +0100858 free_grep_pat(opt->header_list);
Ævar Arnfjörð Bjarmason891c9962023-02-07 00:07:50 +0100859
860 if (opt->pattern_expression)
861 free_pattern_expr(opt->pattern_expression);
Junio C Hamanob48fb5b2006-09-27 16:27:10 -0700862}
863
Jeff King1a845fb2021-09-20 23:49:49 -0400864static const char *end_of_line(const char *cp, unsigned long *left)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700865{
866 unsigned long l = *left;
867 while (l && *cp != '\n') {
868 l--;
869 cp++;
870 }
871 *left = l;
872 return cp;
873}
874
875static int word_char(char ch)
876{
877 return isalnum(ch) || ch == '_';
878}
879
Mark Lodato55f638b2010-03-07 11:52:46 -0500880static void output_color(struct grep_opt *opt, const void *data, size_t size,
881 const char *color)
882{
Jeff Kingdaa0c3d2011-08-17 22:04:23 -0700883 if (want_color(opt->color) && color && color[0]) {
Mark Lodato55f638b2010-03-07 11:52:46 -0500884 opt->output(opt, color, strlen(color));
885 opt->output(opt, data, size);
886 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
887 } else
888 opt->output(opt, data, size);
889}
890
891static void output_sep(struct grep_opt *opt, char sign)
892{
893 if (opt->null_following_name)
894 opt->output(opt, "\0", 1);
895 else
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +0200896 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
Mark Lodato55f638b2010-03-07 11:52:46 -0500897}
898
Raphael Zimmerer83caecc2008-10-01 18:11:15 +0200899static void show_name(struct grep_opt *opt, const char *name)
900{
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +0200901 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +0100902 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
Raphael Zimmerer83caecc2008-10-01 18:11:15 +0200903}
904
Jeff King1a845fb2021-09-20 23:49:49 -0400905static int patmatch(struct grep_pat *p,
906 const char *line, const char *eol,
Michał Kiedrowicz97e77782011-05-05 00:00:19 +0200907 regmatch_t *match, int eflags)
908{
909 int hit;
910
Ævar Arnfjörð Bjarmason75997302021-01-24 02:58:33 +0100911 if (p->pcre2_pattern)
Ævar Arnfjörð Bjarmason94da9192017-06-01 18:20:56 +0000912 hit = !pcre2match(p, line, eol, match, eflags);
Michał Kiedrowicz97e77782011-05-05 00:00:19 +0200913 else
Johannes Schindelinb7d36ff2016-09-21 20:24:14 +0200914 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
915 eflags);
Michał Kiedrowicz97e77782011-05-05 00:00:19 +0200916
917 return hit;
918}
919
Jeff King1a845fb2021-09-20 23:49:49 -0400920static void strip_timestamp(const char *bol, const char **eol_p)
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700921{
Jeff King1a845fb2021-09-20 23:49:49 -0400922 const char *eol = *eol_p;
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700923
924 while (bol < --eol) {
925 if (*eol != '>')
926 continue;
927 *eol_p = ++eol;
Jeff Kingcc8e26e2021-09-20 23:46:56 -0400928 break;
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700929 }
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700930}
931
932static struct {
933 const char *field;
934 size_t len;
935} header_field[] = {
936 { "author ", 7 },
937 { "committer ", 10 },
Nguyễn Thái Ngọc Duy72fd13f2012-09-29 11:41:28 +0700938 { "reflog ", 7 },
Junio C Hamanoa4d7d2c2008-09-04 22:15:02 -0700939};
940
Hamza Mahfooz3f566c42021-09-29 07:57:15 -0400941static int headerless_match_one_pattern(struct grep_pat *p,
942 const char *bol, const char *eol,
943 enum grep_context ctx,
944 regmatch_t *pmatch, int eflags)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700945{
946 int hit = 0;
René Scharfee701fad2009-05-20 23:31:53 +0200947 const char *start = bol;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700948
Junio C Hamano480c1ca2006-09-20 12:39:46 -0700949 if ((p->token != GREP_PATTERN) &&
950 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
951 return 0;
952
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700953 again:
Michał Kiedrowicz97e77782011-05-05 00:00:19 +0200954 hit = patmatch(p, bol, eol, pmatch, eflags);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700955
René Scharfed7eb5272009-03-07 13:28:40 +0100956 if (hit && p->word_regexp) {
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700957 if ((pmatch[0].rm_so < 0) ||
René Scharfe84201ea2009-06-03 18:19:01 +0200958 (eol - bol) < pmatch[0].rm_so ||
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700959 (pmatch[0].rm_eo < 0) ||
960 (eol - bol) < pmatch[0].rm_eo)
961 die("regexp returned nonsense");
962
963 /* Match beginning must be either beginning of the
964 * line, or at word boundary (i.e. the last char must
965 * not be a word char). Similarly, match end must be
966 * either end of the line, or at word boundary
967 * (i.e. the next char must not be a word char).
968 */
René Scharfefb62eb72009-01-10 00:08:40 +0100969 if ( ((pmatch[0].rm_so == 0) ||
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700970 !word_char(bol[pmatch[0].rm_so-1])) &&
971 ((pmatch[0].rm_eo == (eol-bol)) ||
972 !word_char(bol[pmatch[0].rm_eo])) )
973 ;
974 else
975 hit = 0;
976
René Scharfe84201ea2009-06-03 18:19:01 +0200977 /* Words consist of at least one character. */
978 if (pmatch->rm_so == pmatch->rm_eo)
979 hit = 0;
980
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700981 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
982 /* There could be more than one match on the
983 * line, and the first match might not be
984 * strict word match. But later ones could be!
René Scharfefb62eb72009-01-10 00:08:40 +0100985 * Forward to the next possible start, i.e. the
986 * next position following a non-word char.
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700987 */
988 bol = pmatch[0].rm_so + bol + 1;
René Scharfefb62eb72009-01-10 00:08:40 +0100989 while (word_char(bol[-1]) && bol < eol)
990 bol++;
René Scharfedbb6a4a2009-05-23 13:45:26 +0200991 eflags |= REG_NOTBOL;
René Scharfefb62eb72009-01-10 00:08:40 +0100992 if (bol < eol)
993 goto again;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -0700994 }
995 }
René Scharfee701fad2009-05-20 23:31:53 +0200996 if (hit) {
997 pmatch[0].rm_so += bol - start;
998 pmatch[0].rm_eo += bol - start;
999 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001000 return hit;
1001}
1002
Hamza Mahfooz3f566c42021-09-29 07:57:15 -04001003static int match_one_pattern(struct grep_pat *p,
1004 const char *bol, const char *eol,
1005 enum grep_context ctx, regmatch_t *pmatch,
1006 int eflags)
1007{
1008 const char *field;
1009 size_t len;
1010
1011 if (p->token == GREP_PATTERN_HEAD) {
1012 assert(p->field < ARRAY_SIZE(header_field));
1013 field = header_field[p->field].field;
1014 len = header_field[p->field].len;
1015 if (strncmp(bol, field, len))
1016 return 0;
1017 bol += len;
1018
1019 switch (p->field) {
1020 case GREP_HEADER_AUTHOR:
1021 case GREP_HEADER_COMMITTER:
1022 strip_timestamp(bol, &eol);
1023 break;
1024 default:
1025 break;
1026 }
1027 }
1028
1029 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
1030}
1031
1032
Jeff King1a845fb2021-09-20 23:49:49 -04001033static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
1034 const char *bol, const char *eol,
1035 enum grep_context ctx, ssize_t *col,
Taylor Blau68d686e2018-06-22 10:49:35 -05001036 ssize_t *icol, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001037{
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001038 int h = 0;
1039
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001040 switch (x->node) {
Junio C Hamano5aaeb732010-09-12 22:15:35 -07001041 case GREP_NODE_TRUE:
1042 h = 1;
1043 break;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001044 case GREP_NODE_ATOM:
Taylor Blau68d686e2018-06-22 10:49:35 -05001045 {
1046 regmatch_t tmp;
1047 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1048 &tmp, 0);
1049 if (h && (*col < 0 || tmp.rm_so < *col))
1050 *col = tmp.rm_so;
1051 }
René Scharfe794c0002021-12-17 17:48:49 +01001052 if (x->u.atom->token == GREP_PATTERN_BODY)
1053 opt->body_hit |= h;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001054 break;
1055 case GREP_NODE_NOT:
Taylor Blau68d686e2018-06-22 10:49:35 -05001056 /*
1057 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1058 */
1059 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1060 0);
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001061 break;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001062 case GREP_NODE_AND:
Taylor Blau017c0fc2018-06-22 10:49:39 -05001063 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
Taylor Blau68d686e2018-06-22 10:49:35 -05001064 icol, 0);
Taylor Blau017c0fc2018-06-22 10:49:39 -05001065 if (h || opt->columnnum) {
1066 /*
1067 * Don't short-circuit AND when given --column, since a
1068 * NOT earlier in the tree may turn this into an OR. In
1069 * this case, see the below comment.
1070 */
1071 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1072 ctx, col, icol, 0);
1073 }
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001074 break;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001075 case GREP_NODE_OR:
Taylor Blau017c0fc2018-06-22 10:49:39 -05001076 if (!(collect_hits || opt->columnnum)) {
1077 /*
1078 * Don't short-circuit OR when given --column (or
1079 * collecting hits) to ensure we don't skip a later
1080 * child that would produce an earlier match.
1081 */
Taylor Blau68d686e2018-06-22 10:49:35 -05001082 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1083 ctx, col, icol, 0) ||
1084 match_expr_eval(opt, x->u.binary.right, bol,
1085 eol, ctx, col, icol, 0));
Taylor Blau017c0fc2018-06-22 10:49:39 -05001086 }
Taylor Blau68d686e2018-06-22 10:49:35 -05001087 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1088 icol, 0);
Taylor Blau017c0fc2018-06-22 10:49:39 -05001089 if (collect_hits)
1090 x->u.binary.left->hit |= h;
Taylor Blau68d686e2018-06-22 10:49:35 -05001091 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
Taylor Blau017c0fc2018-06-22 10:49:39 -05001092 icol, collect_hits);
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001093 break;
1094 default:
Alexander Potashevd7530702009-01-04 21:38:41 +03001095 die("Unexpected node type (internal error) %d", x->node);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001096 }
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001097 if (collect_hits)
1098 x->hit |= h;
1099 return h;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001100}
1101
Jeff King1a845fb2021-09-20 23:49:49 -04001102static int match_expr(struct grep_opt *opt,
1103 const char *bol, const char *eol,
Taylor Blau68d686e2018-06-22 10:49:35 -05001104 enum grep_context ctx, ssize_t *col,
1105 ssize_t *icol, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001106{
1107 struct grep_expr *x = opt->pattern_expression;
Taylor Blau68d686e2018-06-22 10:49:35 -05001108 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001109}
1110
Jeff King1a845fb2021-09-20 23:49:49 -04001111static int match_line(struct grep_opt *opt,
1112 const char *bol, const char *eol,
Taylor Blau68d686e2018-06-22 10:49:35 -05001113 ssize_t *col, ssize_t *icol,
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001114 enum grep_context ctx, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001115{
1116 struct grep_pat *p;
Taylor Blau017c0fc2018-06-22 10:49:39 -05001117 int hit = 0;
René Scharfe79212772009-03-07 13:30:27 +01001118
Ævar Arnfjörð Bjarmasondb843762022-10-11 11:48:45 +02001119 if (opt->pattern_expression)
Taylor Blau68d686e2018-06-22 10:49:35 -05001120 return match_expr(opt, bol, eol, ctx, col, icol,
1121 collect_hits);
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001122
1123 /* we do not call with collect_hits without being extended */
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001124 for (p = opt->pattern_list; p; p = p->next) {
Taylor Blau68d686e2018-06-22 10:49:35 -05001125 regmatch_t tmp;
1126 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
Taylor Blau017c0fc2018-06-22 10:49:39 -05001127 hit |= 1;
1128 if (!opt->columnnum) {
1129 /*
1130 * Without --column, any single match on a line
1131 * is enough to know that it needs to be
1132 * printed. With --column, scan _all_ patterns
1133 * to find the earliest.
1134 */
1135 break;
1136 }
1137 if (*col < 0 || tmp.rm_so < *col)
1138 *col = tmp.rm_so;
Taylor Blau68d686e2018-06-22 10:49:35 -05001139 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001140 }
Taylor Blau017c0fc2018-06-22 10:49:39 -05001141 return hit;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001142}
1143
Jeff King1a845fb2021-09-20 23:49:49 -04001144static int match_next_pattern(struct grep_pat *p,
1145 const char *bol, const char *eol,
René Scharfe7e8f59d2009-03-07 13:32:32 +01001146 enum grep_context ctx,
1147 regmatch_t *pmatch, int eflags)
1148{
1149 regmatch_t match;
1150
Hamza Mahfooz3f566c42021-09-29 07:57:15 -04001151 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
René Scharfe7e8f59d2009-03-07 13:32:32 +01001152 return 0;
1153 if (match.rm_so < 0 || match.rm_eo < 0)
1154 return 0;
1155 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1156 if (match.rm_so > pmatch->rm_so)
1157 return 1;
1158 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1159 return 1;
1160 }
1161 pmatch->rm_so = match.rm_so;
1162 pmatch->rm_eo = match.rm_eo;
1163 return 1;
1164}
1165
Hamza Mahfooz3f566c42021-09-29 07:57:15 -04001166int grep_next_match(struct grep_opt *opt,
1167 const char *bol, const char *eol,
1168 enum grep_context ctx, regmatch_t *pmatch,
1169 enum grep_header_field field, int eflags)
René Scharfe7e8f59d2009-03-07 13:32:32 +01001170{
1171 struct grep_pat *p;
1172 int hit = 0;
1173
1174 pmatch->rm_so = pmatch->rm_eo = -1;
1175 if (bol < eol) {
Hamza Mahfooz3f566c42021-09-29 07:57:15 -04001176 for (p = ((ctx == GREP_CONTEXT_HEAD)
1177 ? opt->header_list : opt->pattern_list);
1178 p; p = p->next) {
René Scharfe7e8f59d2009-03-07 13:32:32 +01001179 switch (p->token) {
René Scharfe7e8f59d2009-03-07 13:32:32 +01001180 case GREP_PATTERN_HEAD:
Hamza Mahfooz3f566c42021-09-29 07:57:15 -04001181 if ((field != GREP_HEADER_FIELD_MAX) &&
1182 (p->field != field))
1183 continue;
1184 /* fall thru */
1185 case GREP_PATTERN: /* atom */
René Scharfe7e8f59d2009-03-07 13:32:32 +01001186 case GREP_PATTERN_BODY:
1187 hit |= match_next_pattern(p, bol, eol, ctx,
1188 pmatch, eflags);
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194 }
1195 return hit;
1196}
1197
Taylor Blauc707ded2018-07-03 16:51:56 -05001198static void show_line_header(struct grep_opt *opt, const char *name,
1199 unsigned lno, ssize_t cno, char sign)
René Scharfe7e8f59d2009-03-07 13:32:32 +01001200{
René Scharfe1d84f722011-06-05 17:24:36 +02001201 if (opt->heading && opt->last_shown == 0) {
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +02001202 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
René Scharfe1d84f722011-06-05 17:24:36 +02001203 opt->output(opt, "\n", 1);
1204 }
René Scharfe5dd06d32009-07-02 00:02:38 +02001205 opt->last_shown = lno;
1206
René Scharfe1d84f722011-06-05 17:24:36 +02001207 if (!opt->heading && opt->pathname) {
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +02001208 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
Mark Lodato55f638b2010-03-07 11:52:46 -05001209 output_sep(opt, sign);
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001210 }
1211 if (opt->linenum) {
1212 char buf[32];
Jeff King1a168e52017-03-28 15:46:56 -04001213 xsnprintf(buf, sizeof(buf), "%d", lno);
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +02001214 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
Mark Lodato55f638b2010-03-07 11:52:46 -05001215 output_sep(opt, sign);
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001216 }
Taylor Blau89252cd2018-06-22 10:49:42 -05001217 /*
1218 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1219 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1220 * being called with a context line.
1221 */
1222 if (opt->columnnum && cno) {
1223 char buf[32];
1224 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
Junio C Hamanod036d662018-07-18 12:20:31 -07001225 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
Taylor Blau89252cd2018-06-22 10:49:42 -05001226 output_sep(opt, sign);
1227 }
Taylor Blauc707ded2018-07-03 16:51:56 -05001228}
1229
Jeff King1a845fb2021-09-20 23:49:49 -04001230static void show_line(struct grep_opt *opt,
1231 const char *bol, const char *eol,
Taylor Blauc707ded2018-07-03 16:51:56 -05001232 const char *name, unsigned lno, ssize_t cno, char sign)
1233{
1234 int rest = eol - bol;
Taylor Blau9d8db062018-07-09 15:33:47 -05001235 const char *match_color = NULL;
1236 const char *line_color = NULL;
Taylor Blauc707ded2018-07-03 16:51:56 -05001237
1238 if (opt->file_break && opt->last_shown == 0) {
1239 if (opt->show_hunk_mark)
1240 opt->output(opt, "\n", 1);
1241 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1242 if (opt->last_shown == 0) {
1243 if (opt->show_hunk_mark) {
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001244 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
Taylor Blauc707ded2018-07-03 16:51:56 -05001245 opt->output(opt, "\n", 1);
1246 }
1247 } else if (lno > opt->last_shown + 1) {
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001248 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
Taylor Blauc707ded2018-07-03 16:51:56 -05001249 opt->output(opt, "\n", 1);
1250 }
1251 }
Taylor Blau9d8db062018-07-09 15:33:47 -05001252 if (!opt->only_matching) {
1253 /*
1254 * In case the line we're being called with contains more than
1255 * one match, leave printing each header to the loop below.
1256 */
1257 show_line_header(opt, name, lno, cno, sign);
1258 }
1259 if (opt->color || opt->only_matching) {
René Scharfe7e8f59d2009-03-07 13:32:32 +01001260 regmatch_t match;
1261 enum grep_context ctx = GREP_CONTEXT_BODY;
René Scharfe7e8f59d2009-03-07 13:32:32 +01001262 int eflags = 0;
1263
Taylor Blau9d8db062018-07-09 15:33:47 -05001264 if (opt->color) {
1265 if (sign == ':')
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001266 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
Taylor Blau9d8db062018-07-09 15:33:47 -05001267 else
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001268 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
Taylor Blau9d8db062018-07-09 15:33:47 -05001269 if (sign == ':')
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001270 line_color = opt->colors[GREP_COLOR_SELECTED];
Taylor Blau9d8db062018-07-09 15:33:47 -05001271 else if (sign == '-')
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001272 line_color = opt->colors[GREP_COLOR_CONTEXT];
Taylor Blau9d8db062018-07-09 15:33:47 -05001273 else if (sign == '=')
Junio C Hamano87ece7c2018-08-02 15:30:44 -07001274 line_color = opt->colors[GREP_COLOR_FUNCTION];
Taylor Blau9d8db062018-07-09 15:33:47 -05001275 }
Hamza Mahfooz3f566c42021-09-29 07:57:15 -04001276 while (grep_next_match(opt, bol, eol, ctx, &match,
1277 GREP_HEADER_FIELD_MAX, eflags)) {
René Scharfe1f5b9cc2009-06-01 23:53:05 +02001278 if (match.rm_so == match.rm_eo)
1279 break;
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001280
Taylor Blau9d8db062018-07-09 15:33:47 -05001281 if (opt->only_matching)
1282 show_line_header(opt, name, lno, cno, sign);
1283 else
1284 output_color(opt, bol, match.rm_so, line_color);
Mark Lodato55f638b2010-03-07 11:52:46 -05001285 output_color(opt, bol + match.rm_so,
René Scharfe79a77102014-10-27 19:23:05 +01001286 match.rm_eo - match.rm_so, match_color);
Taylor Blau9d8db062018-07-09 15:33:47 -05001287 if (opt->only_matching)
1288 opt->output(opt, "\n", 1);
René Scharfe7e8f59d2009-03-07 13:32:32 +01001289 bol += match.rm_eo;
Taylor Blau9d8db062018-07-09 15:33:47 -05001290 cno += match.rm_eo;
René Scharfe7e8f59d2009-03-07 13:32:32 +01001291 rest -= match.rm_eo;
1292 eflags = REG_NOTBOL;
1293 }
René Scharfe7e8f59d2009-03-07 13:32:32 +01001294 }
Taylor Blau9d8db062018-07-09 15:33:47 -05001295 if (!opt->only_matching) {
1296 output_color(opt, bol, rest, line_color);
1297 opt->output(opt, "\n", 1);
1298 }
René Scharfe7e8f59d2009-03-07 13:32:32 +01001299}
1300
Jeff King78db6ea2012-02-02 03:18:29 -05001301int grep_use_locks;
1302
Thomas Rast0579f912011-12-12 22:16:07 +01001303/*
1304 * This lock protects access to the gitattributes machinery, which is
1305 * not thread-safe.
1306 */
1307pthread_mutex_t grep_attr_mutex;
1308
Jeff King78db6ea2012-02-02 03:18:29 -05001309static inline void grep_attr_lock(void)
Thomas Rast0579f912011-12-12 22:16:07 +01001310{
Jeff King78db6ea2012-02-02 03:18:29 -05001311 if (grep_use_locks)
Thomas Rast0579f912011-12-12 22:16:07 +01001312 pthread_mutex_lock(&grep_attr_mutex);
1313}
1314
Jeff King78db6ea2012-02-02 03:18:29 -05001315static inline void grep_attr_unlock(void)
Thomas Rast0579f912011-12-12 22:16:07 +01001316{
Jeff King78db6ea2012-02-02 03:18:29 -05001317 if (grep_use_locks)
Thomas Rast0579f912011-12-12 22:16:07 +01001318 pthread_mutex_unlock(&grep_attr_mutex);
1319}
Jeff Kingb3aeb282012-02-02 03:18:41 -05001320
Jeff King1a845fb2021-09-20 23:49:49 -04001321static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
1322 const char *bol, const char *eol)
René Scharfe2944e4e2009-07-02 00:06:34 +02001323{
René Scharfe60ecac92009-07-02 00:07:24 +02001324 xdemitconf_t *xecfg = opt->priv;
Thomas Rast0579f912011-12-12 22:16:07 +01001325 if (xecfg && !xecfg->find_func) {
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001326 grep_source_load_driver(gs, opt->repo->index);
Jeff King94ad9d92012-02-02 03:20:43 -05001327 if (gs->driver->funcname.pattern) {
1328 const struct userdiff_funcname *pe = &gs->driver->funcname;
Thomas Rast0579f912011-12-12 22:16:07 +01001329 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1330 } else {
1331 xecfg = opt->priv = NULL;
1332 }
1333 }
1334
1335 if (xecfg) {
René Scharfe60ecac92009-07-02 00:07:24 +02001336 char buf[1];
1337 return xecfg->find_func(bol, eol - bol, buf, 1,
1338 xecfg->find_func_priv) >= 0;
1339 }
1340
René Scharfe2944e4e2009-07-02 00:06:34 +02001341 if (bol == eol)
1342 return 0;
1343 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1344 return 1;
1345 return 0;
1346}
1347
Jeff Kinge1327022012-02-02 03:19:28 -05001348static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
Jeff King1a845fb2021-09-20 23:49:49 -04001349 const char *bol, unsigned lno)
René Scharfe2944e4e2009-07-02 00:06:34 +02001350{
Jeff Kinge1327022012-02-02 03:19:28 -05001351 while (bol > gs->buf) {
Jeff King1a845fb2021-09-20 23:49:49 -04001352 const char *eol = --bol;
René Scharfe2944e4e2009-07-02 00:06:34 +02001353
Jeff Kinge1327022012-02-02 03:19:28 -05001354 while (bol > gs->buf && bol[-1] != '\n')
René Scharfe2944e4e2009-07-02 00:06:34 +02001355 bol--;
1356 lno--;
1357
1358 if (lno <= opt->last_shown)
1359 break;
1360
Jeff Kinge1327022012-02-02 03:19:28 -05001361 if (match_funcname(opt, gs, bol, eol)) {
Taylor Blau89252cd2018-06-22 10:49:42 -05001362 show_line(opt, bol, eol, gs->name, lno, 0, '=');
René Scharfe2944e4e2009-07-02 00:06:34 +02001363 break;
1364 }
1365 }
1366}
1367
René Scharfea5dc20b2017-11-18 19:08:08 +01001368static int is_empty_line(const char *bol, const char *eol);
1369
Jeff Kinge1327022012-02-02 03:19:28 -05001370static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
Jeff King1a845fb2021-09-20 23:49:49 -04001371 const char *bol, const char *end, unsigned lno)
René Scharfe49de3212009-07-02 00:05:17 +02001372{
René Scharfe6653a012017-11-18 19:07:13 +01001373 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
René Scharfea5dc20b2017-11-18 19:08:08 +01001374 int funcname_needed = !!opt->funcname, comment_needed = 0;
René Scharfeba8ea742011-08-01 19:20:53 +02001375
René Scharfe49de3212009-07-02 00:05:17 +02001376 if (opt->pre_context < lno)
1377 from = lno - opt->pre_context;
1378 if (from <= opt->last_shown)
1379 from = opt->last_shown + 1;
René Scharfe6653a012017-11-18 19:07:13 +01001380 orig_from = from;
René Scharfea5dc20b2017-11-18 19:08:08 +01001381 if (opt->funcbody) {
1382 if (match_funcname(opt, gs, bol, end))
1383 comment_needed = 1;
1384 else
1385 funcname_needed = 1;
René Scharfe6653a012017-11-18 19:07:13 +01001386 from = opt->last_shown + 1;
1387 }
René Scharfe49de3212009-07-02 00:05:17 +02001388
1389 /* Rewind. */
René Scharfe6653a012017-11-18 19:07:13 +01001390 while (bol > gs->buf && cur > from) {
Jeff King1a845fb2021-09-20 23:49:49 -04001391 const char *next_bol = bol;
1392 const char *eol = --bol;
René Scharfe2944e4e2009-07-02 00:06:34 +02001393
Jeff Kinge1327022012-02-02 03:19:28 -05001394 while (bol > gs->buf && bol[-1] != '\n')
René Scharfe49de3212009-07-02 00:05:17 +02001395 bol--;
1396 cur--;
René Scharfea5dc20b2017-11-18 19:08:08 +01001397 if (comment_needed && (is_empty_line(bol, eol) ||
1398 match_funcname(opt, gs, bol, eol))) {
1399 comment_needed = 0;
1400 from = orig_from;
1401 if (cur < from) {
1402 cur++;
1403 bol = next_bol;
1404 break;
1405 }
1406 }
Jeff Kinge1327022012-02-02 03:19:28 -05001407 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
René Scharfe2944e4e2009-07-02 00:06:34 +02001408 funcname_lno = cur;
1409 funcname_needed = 0;
René Scharfea5dc20b2017-11-18 19:08:08 +01001410 if (opt->funcbody)
1411 comment_needed = 1;
1412 else
1413 from = orig_from;
René Scharfe2944e4e2009-07-02 00:06:34 +02001414 }
René Scharfe49de3212009-07-02 00:05:17 +02001415 }
1416
René Scharfe2944e4e2009-07-02 00:06:34 +02001417 /* We need to look even further back to find a function signature. */
1418 if (opt->funcname && funcname_needed)
Jeff Kinge1327022012-02-02 03:19:28 -05001419 show_funcname_line(opt, gs, bol, cur);
René Scharfe2944e4e2009-07-02 00:06:34 +02001420
René Scharfe49de3212009-07-02 00:05:17 +02001421 /* Back forward. */
1422 while (cur < lno) {
Jeff King1a845fb2021-09-20 23:49:49 -04001423 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
René Scharfe49de3212009-07-02 00:05:17 +02001424
1425 while (*eol != '\n')
1426 eol++;
Taylor Blau89252cd2018-06-22 10:49:42 -05001427 show_line(opt, bol, eol, gs->name, cur, 0, sign);
René Scharfe49de3212009-07-02 00:05:17 +02001428 bol = eol + 1;
1429 cur++;
1430 }
1431}
1432
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001433static int should_lookahead(struct grep_opt *opt)
1434{
1435 struct grep_pat *p;
1436
Ævar Arnfjörð Bjarmasondb843762022-10-11 11:48:45 +02001437 if (opt->pattern_expression)
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001438 return 0; /* punt for too complex stuff */
1439 if (opt->invert)
1440 return 0;
1441 for (p = opt->pattern_list; p; p = p->next) {
1442 if (p->token != GREP_PATTERN)
1443 return 0; /* punt for "header only" and stuff */
1444 }
1445 return 1;
1446}
1447
1448static int look_ahead(struct grep_opt *opt,
1449 unsigned long *left_p,
1450 unsigned *lno_p,
Jeff King1a845fb2021-09-20 23:49:49 -04001451 const char **bol_p)
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001452{
1453 unsigned lno = *lno_p;
Jeff King1a845fb2021-09-20 23:49:49 -04001454 const char *bol = *bol_p;
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001455 struct grep_pat *p;
Jeff King1a845fb2021-09-20 23:49:49 -04001456 const char *sp, *last_bol;
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001457 regoff_t earliest = -1;
1458
1459 for (p = opt->pattern_list; p; p = p->next) {
1460 int hit;
1461 regmatch_t m;
1462
Michał Kiedrowicz97e77782011-05-05 00:00:19 +02001463 hit = patmatch(p, bol, bol + *left_p, &m, 0);
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001464 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1465 continue;
1466 if (earliest < 0 || m.rm_so < earliest)
1467 earliest = m.rm_so;
1468 }
1469
1470 if (earliest < 0) {
1471 *bol_p = bol + *left_p;
1472 *left_p = 0;
1473 return 1;
1474 }
1475 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1476 ; /* find the beginning of the line */
1477 last_bol = sp;
1478
1479 for (sp = bol; sp < last_bol; sp++) {
1480 if (*sp == '\n')
1481 lno++;
1482 }
1483 *left_p -= last_bol - bol;
1484 *bol_p = last_bol;
1485 *lno_p = lno;
1486 return 0;
1487}
1488
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 17:57:23 +02001489static int fill_textconv_grep(struct repository *r,
1490 struct userdiff_driver *driver,
Jeff King335ec3b2013-05-10 17:10:15 +02001491 struct grep_source *gs)
1492{
1493 struct diff_filespec *df;
1494 char *buf;
1495 size_t size;
1496
1497 if (!driver || !driver->textconv)
1498 return grep_source_load(gs);
1499
1500 /*
1501 * The textconv interface is intimately tied to diff_filespecs, so we
1502 * have to pretend to be one. If we could unify the grep_source
1503 * and diff_filespec structs, this mess could just go away.
1504 */
1505 df = alloc_filespec(gs->path);
1506 switch (gs->type) {
Brandon Williams1c41c822017-05-30 10:30:44 -07001507 case GREP_SOURCE_OID:
Jeff King335ec3b2013-05-10 17:10:15 +02001508 fill_filespec(df, gs->identifier, 1, 0100644);
1509 break;
1510 case GREP_SOURCE_FILE:
brian m. carlson14228442021-04-26 01:02:56 +00001511 fill_filespec(df, null_oid(), 0, 0100644);
Jeff King335ec3b2013-05-10 17:10:15 +02001512 break;
1513 default:
Johannes Schindelin033abf92018-05-02 11:38:39 +02001514 BUG("attempt to textconv something without a path?");
Jeff King335ec3b2013-05-10 17:10:15 +02001515 }
1516
1517 /*
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001518 * fill_textconv is not remotely thread-safe; it modifies the global
1519 * diff tempfile structure, writes to the_repo's odb and might
1520 * internally call thread-unsafe functions such as the
1521 * prepare_packed_git() lazy-initializator. Because of the last two, we
1522 * must ensure mutual exclusion between this call and the object reading
1523 * API, thus we use obj_read_lock() here.
1524 *
1525 * TODO: allowing text conversion to run in parallel with object
1526 * reading operations might increase performance in the multithreaded
1527 * non-worktreee git-grep with --textconv.
Jeff King335ec3b2013-05-10 17:10:15 +02001528 */
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001529 obj_read_lock();
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 17:57:23 +02001530 size = fill_textconv(r, driver, df, &buf);
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001531 obj_read_unlock();
Jeff King335ec3b2013-05-10 17:10:15 +02001532 free_filespec(df);
1533
1534 /*
1535 * The normal fill_textconv usage by the diff machinery would just keep
1536 * the textconv'd buf separate from the diff_filespec. But much of the
1537 * grep code passes around a grep_source and assumes that its "buf"
1538 * pointer is the beginning of the thing we are searching. So let's
1539 * install our textconv'd version into the grep_source, taking care not
1540 * to leak any existing buffer.
1541 */
1542 grep_source_clear_data(gs);
1543 gs->buf = buf;
1544 gs->size = size;
1545
1546 return 0;
1547}
1548
René Scharfe4aa2c472016-05-28 17:06:19 +02001549static int is_empty_line(const char *bol, const char *eol)
1550{
1551 while (bol < eol && isspace(*bol))
1552 bol++;
1553 return bol == eol;
1554}
1555
Jeff Kinge1327022012-02-02 03:19:28 -05001556static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001557{
Jeff King1a845fb2021-09-20 23:49:49 -04001558 const char *bol;
1559 const char *peek_bol = NULL;
Jeff Kinge1327022012-02-02 03:19:28 -05001560 unsigned long left;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001561 unsigned lno = 1;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001562 unsigned last_hit = 0;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001563 int binary_match_only = 0;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001564 unsigned count = 0;
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001565 int try_lookahead = 0;
René Scharfeba8ea742011-08-01 19:20:53 +02001566 int show_function = 0;
Jeff King335ec3b2013-05-10 17:10:15 +02001567 struct userdiff_driver *textconv = NULL;
Junio C Hamano480c1ca2006-09-20 12:39:46 -07001568 enum grep_context ctx = GREP_CONTEXT_HEAD;
René Scharfe60ecac92009-07-02 00:07:24 +02001569 xdemitconf_t xecfg;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001570
Emily Shafferde99eb02019-05-23 13:23:56 -07001571 if (!opt->status_only && gs->name == NULL)
1572 BUG("grep call which could print a name requires "
1573 "grep_source.name be non-NULL");
1574
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001575 if (!opt->output)
1576 opt->output = std_output;
1577
René Scharfeba8ea742011-08-01 19:20:53 +02001578 if (opt->pre_context || opt->post_context || opt->file_break ||
1579 opt->funcbody) {
René Scharfe08303c32011-06-05 17:24:15 +02001580 /* Show hunk marks, except for the first file. */
1581 if (opt->last_shown)
1582 opt->show_hunk_mark = 1;
1583 /*
1584 * If we're using threads then we can't easily identify
1585 * the first file. Always put hunk marks in that case
1586 * and skip the very first one later in work_done().
1587 */
1588 if (opt->output != std_output)
1589 opt->show_hunk_mark = 1;
1590 }
René Scharfe431d6e72010-03-15 17:21:10 +01001591 opt->last_shown = 0;
1592
Jeff King335ec3b2013-05-10 17:10:15 +02001593 if (opt->allow_textconv) {
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001594 grep_source_load_driver(gs, opt->repo->index);
Jeff King335ec3b2013-05-10 17:10:15 +02001595 /*
1596 * We might set up the shared textconv cache data here, which
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001597 * is not thread-safe. Also, get_oid_with_context() and
1598 * parse_object() might be internally called. As they are not
Steve Kemp84544f22020-07-29 03:33:28 +00001599 * currently thread-safe and might be racy with object reading,
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001600 * obj_read_lock() must be called.
Jeff King335ec3b2013-05-10 17:10:15 +02001601 */
1602 grep_attr_lock();
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001603 obj_read_lock();
Nguyễn Thái Ngọc Duybd7ad452018-11-10 06:49:06 +01001604 textconv = userdiff_get_textconv(opt->repo, gs->driver);
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001605 obj_read_unlock();
Jeff King335ec3b2013-05-10 17:10:15 +02001606 grep_attr_unlock();
1607 }
1608
1609 /*
1610 * We know the result of a textconv is text, so we only have to care
1611 * about binary handling if we are not using it.
1612 */
1613 if (!textconv) {
1614 switch (opt->binary) {
1615 case GREP_BINARY_DEFAULT:
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001616 if (grep_source_is_binary(gs, opt->repo->index))
Jeff King335ec3b2013-05-10 17:10:15 +02001617 binary_match_only = 1;
1618 break;
1619 case GREP_BINARY_NOMATCH:
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001620 if (grep_source_is_binary(gs, opt->repo->index))
Jeff King335ec3b2013-05-10 17:10:15 +02001621 return 0; /* Assume unmatch */
1622 break;
1623 case GREP_BINARY_TEXT:
1624 break;
1625 default:
Johannes Schindelin033abf92018-05-02 11:38:39 +02001626 BUG("unknown binary handling mode");
Jeff King335ec3b2013-05-10 17:10:15 +02001627 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001628 }
1629
René Scharfe60ecac92009-07-02 00:07:24 +02001630 memset(&xecfg, 0, sizeof(xecfg));
Thomas Rast0579f912011-12-12 22:16:07 +01001631 opt->priv = &xecfg;
1632
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001633 try_lookahead = should_lookahead(opt);
René Scharfe60ecac92009-07-02 00:07:24 +02001634
Nguyễn Thái Ngọc Duy38bbc2e2018-09-21 17:57:23 +02001635 if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
Jeff King08265792012-02-02 03:21:11 -05001636 return 0;
1637
Jeff Kinge1327022012-02-02 03:19:28 -05001638 bol = gs->buf;
1639 left = gs->size;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001640 while (left) {
Jeff King1a845fb2021-09-20 23:49:49 -04001641 const char *eol;
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001642 int hit;
Taylor Blau89252cd2018-06-22 10:49:42 -05001643 ssize_t cno;
Taylor Blau68d686e2018-06-22 10:49:35 -05001644 ssize_t col = -1, icol = -1;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001645
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001646 /*
Michał Kiedrowicz8997da32011-05-09 23:52:03 +02001647 * look_ahead() skips quickly to the line that possibly
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001648 * has the next hit; don't call it if we need to do
1649 * something more than just skipping the current line
1650 * in response to an unmatch for the current line. E.g.
1651 * inside a post-context window, we will show the current
1652 * line as a context around the previous hit when it
1653 * doesn't hit.
1654 */
1655 if (try_lookahead
1656 && !(last_hit
René Scharfeba8ea742011-08-01 19:20:53 +02001657 && (show_function ||
1658 lno <= last_hit + opt->post_context))
Junio C Hamanoa26345b2010-01-10 22:39:36 -08001659 && look_ahead(opt, &left, &lno, &bol))
1660 break;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001661 eol = end_of_line(bol, &left);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001662
Junio C Hamano480c1ca2006-09-20 12:39:46 -07001663 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1664 ctx = GREP_CONTEXT_BODY;
1665
Taylor Blau68d686e2018-06-22 10:49:35 -05001666 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001667
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001668 if (collect_hits)
1669 goto next_line;
1670
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001671 /* "grep -v -e foo -e bla" should list lines
1672 * that do not have either, so inversion should
1673 * be done outside.
1674 */
1675 if (opt->invert)
1676 hit = !hit;
1677 if (opt->unmatch_name_only) {
1678 if (hit)
1679 return 0;
1680 goto next_line;
1681 }
Carlos López68437ed2022-06-22 19:47:32 +00001682 if (hit && (opt->max_count < 0 || count < opt->max_count)) {
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001683 count++;
1684 if (opt->status_only)
1685 return 1;
René Scharfe321ffcc2010-05-22 23:30:48 +02001686 if (opt->name_only) {
Jeff Kinge1327022012-02-02 03:19:28 -05001687 show_name(opt, gs->name);
René Scharfe321ffcc2010-05-22 23:30:48 +02001688 return 1;
1689 }
René Scharfec30c10c2010-05-22 23:29:35 +02001690 if (opt->count)
1691 goto next_line;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001692 if (binary_match_only) {
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001693 opt->output(opt, "Binary file ", 12);
Jeff Kinge1327022012-02-02 03:19:28 -05001694 output_color(opt, gs->name, strlen(gs->name),
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +02001695 opt->colors[GREP_COLOR_FILENAME]);
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001696 opt->output(opt, " matches\n", 9);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001697 return 1;
1698 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001699 /* Hit at this line. If we haven't shown the
1700 * pre-context lines, we would need to show them.
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001701 */
René Scharfeba8ea742011-08-01 19:20:53 +02001702 if (opt->pre_context || opt->funcbody)
Jeff Kinge1327022012-02-02 03:19:28 -05001703 show_pre_context(opt, gs, bol, eol, lno);
René Scharfe2944e4e2009-07-02 00:06:34 +02001704 else if (opt->funcname)
Jeff Kinge1327022012-02-02 03:19:28 -05001705 show_funcname_line(opt, gs, bol, lno);
Taylor Blau89252cd2018-06-22 10:49:42 -05001706 cno = opt->invert ? icol : col;
1707 if (cno < 0) {
1708 /*
1709 * A negative cno indicates that there was no
1710 * match on the line. We are thus inverted and
1711 * being asked to show all lines that _don't_
1712 * match a given expression. Therefore, set cno
1713 * to 0 to suggest the whole line matches.
1714 */
1715 cno = 0;
1716 }
1717 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
René Scharfe5dd06d32009-07-02 00:02:38 +02001718 last_hit = lno;
René Scharfeba8ea742011-08-01 19:20:53 +02001719 if (opt->funcbody)
1720 show_function = 1;
1721 goto next_line;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001722 }
René Scharfe4aa2c472016-05-28 17:06:19 +02001723 if (show_function && (!peek_bol || peek_bol < bol)) {
1724 unsigned long peek_left = left;
Jeff King1a845fb2021-09-20 23:49:49 -04001725 const char *peek_eol = eol;
René Scharfe4aa2c472016-05-28 17:06:19 +02001726
1727 /*
1728 * Trailing empty lines are not interesting.
1729 * Peek past them to see if they belong to the
1730 * body of the current function.
1731 */
1732 peek_bol = bol;
1733 while (is_empty_line(peek_bol, peek_eol)) {
1734 peek_bol = peek_eol + 1;
1735 peek_eol = end_of_line(peek_bol, &peek_left);
1736 }
1737
1738 if (match_funcname(opt, gs, peek_bol, peek_eol))
1739 show_function = 0;
1740 }
René Scharfeba8ea742011-08-01 19:20:53 +02001741 if (show_function ||
1742 (last_hit && lno <= last_hit + opt->post_context)) {
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001743 /* If the last hit is within the post context,
1744 * we need to show this line.
1745 */
Taylor Blau89252cd2018-06-22 10:49:42 -05001746 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001747 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001748
1749 next_line:
1750 bol = eol + 1;
1751 if (!left)
1752 break;
1753 left--;
1754 lno++;
1755 }
1756
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001757 if (collect_hits)
1758 return 0;
Junio C Hamanob48fb5b2006-09-27 16:27:10 -07001759
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001760 if (opt->status_only)
Anthony Sottilee1f68c62017-08-17 18:38:51 -07001761 return opt->unmatch_name_only;
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001762 if (opt->unmatch_name_only) {
1763 /* We did not see any hit, so we want to show this */
Jeff Kinge1327022012-02-02 03:19:28 -05001764 show_name(opt, gs->name);
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001765 return 1;
1766 }
1767
René Scharfe60ecac92009-07-02 00:07:24 +02001768 xdiff_clear_find_func(&xecfg);
1769 opt->priv = NULL;
1770
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001771 /* NEEDSWORK:
1772 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1773 * which feels mostly useless but sometimes useful. Maybe
1774 * make it another option? For now suppress them.
1775 */
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001776 if (opt->count && count) {
1777 char buf[32];
René Scharfef76d9472014-03-11 22:15:49 +01001778 if (opt->pathname) {
1779 output_color(opt, gs->name, strlen(gs->name),
Nguyễn Thái Ngọc Duyfa151dc2018-05-26 15:55:22 +02001780 opt->colors[GREP_COLOR_FILENAME]);
René Scharfef76d9472014-03-11 22:15:49 +01001781 output_sep(opt, ':');
1782 }
Jeff King1a168e52017-03-28 15:46:56 -04001783 xsnprintf(buf, sizeof(buf), "%u\n", count);
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001784 opt->output(opt, buf, strlen(buf));
René Scharfec30c10c2010-05-22 23:29:35 +02001785 return 1;
Fredrik Kuivinen5b594f42010-01-25 23:51:39 +01001786 }
Junio C Hamano83b5d2f2006-09-17 16:02:52 -07001787 return !!last_hit;
1788}
1789
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001790static void clr_hit_marker(struct grep_expr *x)
1791{
1792 /* All-hit markers are meaningful only at the very top level
1793 * OR node.
1794 */
1795 while (1) {
1796 x->hit = 0;
1797 if (x->node != GREP_NODE_OR)
1798 return;
1799 x->u.binary.left->hit = 0;
1800 x = x->u.binary.right;
1801 }
1802}
1803
1804static int chk_hit_marker(struct grep_expr *x)
1805{
1806 /* Top level nodes have hit markers. See if they all are hits */
1807 while (1) {
1808 if (x->node != GREP_NODE_OR)
1809 return x->hit;
1810 if (!x->u.binary.left->hit)
1811 return 0;
1812 x = x->u.binary.right;
1813 }
1814}
1815
Jeff Kinge1327022012-02-02 03:19:28 -05001816int grep_source(struct grep_opt *opt, struct grep_source *gs)
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001817{
1818 /*
1819 * we do not have to do the two-pass grep when we do not check
1820 * buffer-wide "all-match".
1821 */
René Scharfe794c0002021-12-17 17:48:49 +01001822 if (!opt->all_match && !opt->no_body_match)
Jeff Kinge1327022012-02-02 03:19:28 -05001823 return grep_source_1(opt, gs, 0);
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001824
1825 /* Otherwise the toplevel "or" terms hit a bit differently.
1826 * We first clear hit markers from them.
1827 */
1828 clr_hit_marker(opt->pattern_expression);
René Scharfe794c0002021-12-17 17:48:49 +01001829 opt->body_hit = 0;
Jeff Kinge1327022012-02-02 03:19:28 -05001830 grep_source_1(opt, gs, 1);
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001831
René Scharfe794c0002021-12-17 17:48:49 +01001832 if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
1833 return 0;
1834 if (opt->no_body_match && opt->body_hit)
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001835 return 0;
1836
Jeff Kinge1327022012-02-02 03:19:28 -05001837 return grep_source_1(opt, gs, 0);
1838}
1839
Jeff King1e668712021-09-20 23:51:28 -04001840static void grep_source_init_buf(struct grep_source *gs,
1841 const char *buf,
Jonathan Tan50d92b52021-08-16 14:09:53 -07001842 unsigned long size)
1843{
1844 gs->type = GREP_SOURCE_BUF;
1845 gs->name = NULL;
1846 gs->path = NULL;
1847 gs->buf = buf;
1848 gs->size = size;
1849 gs->driver = NULL;
1850 gs->identifier = NULL;
1851}
1852
Jeff King1e668712021-09-20 23:51:28 -04001853int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
Jeff Kinge1327022012-02-02 03:19:28 -05001854{
1855 struct grep_source gs;
1856 int r;
1857
Jonathan Tan50d92b52021-08-16 14:09:53 -07001858 grep_source_init_buf(&gs, buf, size);
Jeff Kinge1327022012-02-02 03:19:28 -05001859
1860 r = grep_source(opt, &gs);
1861
1862 grep_source_clear(&gs);
1863 return r;
1864}
1865
Jonathan Tan50d92b52021-08-16 14:09:53 -07001866void grep_source_init_file(struct grep_source *gs, const char *name,
1867 const char *path)
Jeff Kinge1327022012-02-02 03:19:28 -05001868{
Jonathan Tan50d92b52021-08-16 14:09:53 -07001869 gs->type = GREP_SOURCE_FILE;
Jeff King8c53f072015-01-12 20:59:09 -05001870 gs->name = xstrdup_or_null(name);
1871 gs->path = xstrdup_or_null(path);
Jeff Kinge1327022012-02-02 03:19:28 -05001872 gs->buf = NULL;
1873 gs->size = 0;
Jeff King94ad9d92012-02-02 03:20:43 -05001874 gs->driver = NULL;
Jonathan Tan50d92b52021-08-16 14:09:53 -07001875 gs->identifier = xstrdup(path);
1876}
Jeff Kinge1327022012-02-02 03:19:28 -05001877
Jonathan Tan50d92b52021-08-16 14:09:53 -07001878void grep_source_init_oid(struct grep_source *gs, const char *name,
Jonathan Tan06938062021-08-16 14:09:56 -07001879 const char *path, const struct object_id *oid,
1880 struct repository *repo)
Jonathan Tan50d92b52021-08-16 14:09:53 -07001881{
1882 gs->type = GREP_SOURCE_OID;
1883 gs->name = xstrdup_or_null(name);
1884 gs->path = xstrdup_or_null(path);
1885 gs->buf = NULL;
1886 gs->size = 0;
1887 gs->driver = NULL;
1888 gs->identifier = oiddup(oid);
Jonathan Tan06938062021-08-16 14:09:56 -07001889 gs->repo = repo;
Jeff Kinge1327022012-02-02 03:19:28 -05001890}
1891
1892void grep_source_clear(struct grep_source *gs)
1893{
Ævar Arnfjörð Bjarmason88ce3ef2017-06-15 23:15:49 +00001894 FREE_AND_NULL(gs->name);
1895 FREE_AND_NULL(gs->path);
1896 FREE_AND_NULL(gs->identifier);
Jeff Kinge1327022012-02-02 03:19:28 -05001897 grep_source_clear_data(gs);
1898}
1899
1900void grep_source_clear_data(struct grep_source *gs)
1901{
1902 switch (gs->type) {
1903 case GREP_SOURCE_FILE:
Brandon Williams1c41c822017-05-30 10:30:44 -07001904 case GREP_SOURCE_OID:
Jeff King1e668712021-09-20 23:51:28 -04001905 /* these types own the buffer */
1906 free((char *)gs->buf);
1907 gs->buf = NULL;
Jeff Kinge1327022012-02-02 03:19:28 -05001908 gs->size = 0;
1909 break;
1910 case GREP_SOURCE_BUF:
1911 /* leave user-provided buf intact */
1912 break;
1913 }
1914}
1915
Brandon Williams1c41c822017-05-30 10:30:44 -07001916static int grep_source_load_oid(struct grep_source *gs)
Jeff Kinge1327022012-02-02 03:19:28 -05001917{
1918 enum object_type type;
1919
Jonathan Tan06938062021-08-16 14:09:56 -07001920 gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1921 &gs->size);
Jeff Kinge1327022012-02-02 03:19:28 -05001922 if (!gs->buf)
1923 return error(_("'%s': unable to read %s"),
1924 gs->name,
Brandon Williams1c41c822017-05-30 10:30:44 -07001925 oid_to_hex(gs->identifier));
Jeff Kinge1327022012-02-02 03:19:28 -05001926 return 0;
1927}
1928
1929static int grep_source_load_file(struct grep_source *gs)
1930{
1931 const char *filename = gs->identifier;
1932 struct stat st;
1933 char *data;
1934 size_t size;
1935 int i;
1936
1937 if (lstat(filename, &st) < 0) {
1938 err_ret:
1939 if (errno != ENOENT)
Nguyễn Thái Ngọc Duy7645d8f2016-05-08 16:47:47 +07001940 error_errno(_("failed to stat '%s'"), filename);
Jeff Kinge1327022012-02-02 03:19:28 -05001941 return -1;
1942 }
1943 if (!S_ISREG(st.st_mode))
1944 return -1;
1945 size = xsize_t(st.st_size);
1946 i = open(filename, O_RDONLY);
1947 if (i < 0)
1948 goto err_ret;
Jeff King3733e692016-02-22 17:44:28 -05001949 data = xmallocz(size);
Jeff Kinge1327022012-02-02 03:19:28 -05001950 if (st.st_size != read_in_full(i, data, size)) {
Nguyễn Thái Ngọc Duy7645d8f2016-05-08 16:47:47 +07001951 error_errno(_("'%s': short read"), filename);
Jeff Kinge1327022012-02-02 03:19:28 -05001952 close(i);
1953 free(data);
1954 return -1;
1955 }
1956 close(i);
Jeff Kinge1327022012-02-02 03:19:28 -05001957
1958 gs->buf = data;
1959 gs->size = size;
1960 return 0;
1961}
1962
Junio C Hamano30833012012-09-20 14:20:09 -07001963static int grep_source_load(struct grep_source *gs)
Jeff Kinge1327022012-02-02 03:19:28 -05001964{
1965 if (gs->buf)
1966 return 0;
1967
1968 switch (gs->type) {
1969 case GREP_SOURCE_FILE:
1970 return grep_source_load_file(gs);
Brandon Williams1c41c822017-05-30 10:30:44 -07001971 case GREP_SOURCE_OID:
1972 return grep_source_load_oid(gs);
Jeff Kinge1327022012-02-02 03:19:28 -05001973 case GREP_SOURCE_BUF:
1974 return gs->buf ? 0 : -1;
1975 }
Johannes Schindelin033abf92018-05-02 11:38:39 +02001976 BUG("invalid grep_source type to load");
Junio C Hamano0ab7bef2006-09-27 17:50:52 -07001977}
Jeff King94ad9d92012-02-02 03:20:43 -05001978
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001979void grep_source_load_driver(struct grep_source *gs,
1980 struct index_state *istate)
Jeff King94ad9d92012-02-02 03:20:43 -05001981{
1982 if (gs->driver)
1983 return;
1984
1985 grep_attr_lock();
Matheus Tavares1d1729c2020-01-15 23:39:54 -03001986 if (gs->path)
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001987 gs->driver = userdiff_find_by_path(istate, gs->path);
Jeff King94ad9d92012-02-02 03:20:43 -05001988 if (!gs->driver)
1989 gs->driver = userdiff_find_by_name("default");
1990 grep_attr_unlock();
1991}
Jeff King41b59bf2012-02-02 03:21:02 -05001992
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001993static int grep_source_is_binary(struct grep_source *gs,
1994 struct index_state *istate)
Jeff King41b59bf2012-02-02 03:21:02 -05001995{
Nguyễn Thái Ngọc Duyacd00ea2018-09-21 17:57:33 +02001996 grep_source_load_driver(gs, istate);
Jeff King41b59bf2012-02-02 03:21:02 -05001997 if (gs->driver->binary != -1)
1998 return gs->driver->binary;
1999
2000 if (!grep_source_load(gs))
2001 return buffer_is_binary(gs->buf, gs->size);
2002
2003 return 0;
2004}