Junio C Hamano | cf1b786 | 2007-12-06 00:14:14 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Whitespace rules |
| 3 | * |
| 4 | * Copyright (c) 2007 Junio C Hamano |
| 5 | */ |
| 6 | |
| 7 | #include "cache.h" |
| 8 | #include "attr.h" |
| 9 | |
| 10 | static struct whitespace_rule { |
| 11 | const char *rule_name; |
| 12 | unsigned rule_bits; |
Junio C Hamano | a437900 | 2009-06-21 02:35:18 -0700 | [diff] [blame] | 13 | unsigned loosens_error; |
Junio C Hamano | cf1b786 | 2007-12-06 00:14:14 -0800 | [diff] [blame] | 14 | } whitespace_rule_names[] = { |
Junio C Hamano | a437900 | 2009-06-21 02:35:18 -0700 | [diff] [blame] | 15 | { "trailing-space", WS_TRAILING_SPACE, 0 }, |
| 16 | { "space-before-tab", WS_SPACE_BEFORE_TAB, 0 }, |
| 17 | { "indent-with-non-tab", WS_INDENT_WITH_NON_TAB, 0 }, |
| 18 | { "cr-at-eol", WS_CR_AT_EOL, 1 }, |
Junio C Hamano | afd9db4 | 2009-09-15 03:28:08 -0700 | [diff] [blame] | 19 | { "blank-at-eol", WS_BLANK_AT_EOL, 0 }, |
| 20 | { "blank-at-eof", WS_BLANK_AT_EOF, 0 }, |
Junio C Hamano | cf1b786 | 2007-12-06 00:14:14 -0800 | [diff] [blame] | 21 | }; |
| 22 | |
| 23 | unsigned parse_whitespace_rule(const char *string) |
| 24 | { |
| 25 | unsigned rule = WS_DEFAULT_RULE; |
| 26 | |
| 27 | while (string) { |
| 28 | int i; |
| 29 | size_t len; |
| 30 | const char *ep; |
| 31 | int negated = 0; |
| 32 | |
| 33 | string = string + strspn(string, ", \t\n\r"); |
| 34 | ep = strchr(string, ','); |
| 35 | if (!ep) |
| 36 | len = strlen(string); |
| 37 | else |
| 38 | len = ep - string; |
| 39 | |
| 40 | if (*string == '-') { |
| 41 | negated = 1; |
| 42 | string++; |
| 43 | len--; |
| 44 | } |
| 45 | if (!len) |
| 46 | break; |
| 47 | for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++) { |
| 48 | if (strncmp(whitespace_rule_names[i].rule_name, |
| 49 | string, len)) |
| 50 | continue; |
| 51 | if (negated) |
| 52 | rule &= ~whitespace_rule_names[i].rule_bits; |
| 53 | else |
| 54 | rule |= whitespace_rule_names[i].rule_bits; |
| 55 | break; |
| 56 | } |
| 57 | string = ep; |
| 58 | } |
| 59 | return rule; |
| 60 | } |
| 61 | |
| 62 | static void setup_whitespace_attr_check(struct git_attr_check *check) |
| 63 | { |
| 64 | static struct git_attr *attr_whitespace; |
| 65 | |
| 66 | if (!attr_whitespace) |
| 67 | attr_whitespace = git_attr("whitespace", 10); |
| 68 | check[0].attr = attr_whitespace; |
| 69 | } |
| 70 | |
| 71 | unsigned whitespace_rule(const char *pathname) |
| 72 | { |
| 73 | struct git_attr_check attr_whitespace_rule; |
| 74 | |
| 75 | setup_whitespace_attr_check(&attr_whitespace_rule); |
| 76 | if (!git_checkattr(pathname, 1, &attr_whitespace_rule)) { |
| 77 | const char *value; |
| 78 | |
| 79 | value = attr_whitespace_rule.value; |
| 80 | if (ATTR_TRUE(value)) { |
| 81 | /* true (whitespace) */ |
| 82 | unsigned all_rule = 0; |
| 83 | int i; |
| 84 | for (i = 0; i < ARRAY_SIZE(whitespace_rule_names); i++) |
Junio C Hamano | a437900 | 2009-06-21 02:35:18 -0700 | [diff] [blame] | 85 | if (!whitespace_rule_names[i].loosens_error) |
| 86 | all_rule |= whitespace_rule_names[i].rule_bits; |
Junio C Hamano | cf1b786 | 2007-12-06 00:14:14 -0800 | [diff] [blame] | 87 | return all_rule; |
| 88 | } else if (ATTR_FALSE(value)) { |
| 89 | /* false (-whitespace) */ |
| 90 | return 0; |
| 91 | } else if (ATTR_UNSET(value)) { |
| 92 | /* reset to default (!whitespace) */ |
| 93 | return whitespace_rule_cfg; |
| 94 | } else { |
| 95 | /* string */ |
| 96 | return parse_whitespace_rule(value); |
| 97 | } |
| 98 | } else { |
| 99 | return whitespace_rule_cfg; |
| 100 | } |
| 101 | } |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 102 | |
| 103 | /* The returned string should be freed by the caller. */ |
| 104 | char *whitespace_error_string(unsigned ws) |
| 105 | { |
Brandon Casey | f285a2d | 2008-10-09 14:12:12 -0500 | [diff] [blame] | 106 | struct strbuf err = STRBUF_INIT; |
Junio C Hamano | aeb84b0 | 2009-09-05 22:21:17 -0700 | [diff] [blame] | 107 | if ((ws & WS_TRAILING_SPACE) == WS_TRAILING_SPACE) |
Wincent Colaiuta | 420f4f0 | 2007-12-14 12:23:43 +0100 | [diff] [blame] | 108 | strbuf_addstr(&err, "trailing whitespace"); |
Junio C Hamano | aeb84b0 | 2009-09-05 22:21:17 -0700 | [diff] [blame] | 109 | else { |
| 110 | if (ws & WS_BLANK_AT_EOL) |
| 111 | strbuf_addstr(&err, "trailing whitespace"); |
| 112 | if (ws & WS_BLANK_AT_EOF) { |
| 113 | if (err.len) |
| 114 | strbuf_addstr(&err, ", "); |
| 115 | strbuf_addstr(&err, "new blank line at EOF"); |
| 116 | } |
| 117 | } |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 118 | if (ws & WS_SPACE_BEFORE_TAB) { |
| 119 | if (err.len) |
| 120 | strbuf_addstr(&err, ", "); |
Wincent Colaiuta | 420f4f0 | 2007-12-14 12:23:43 +0100 | [diff] [blame] | 121 | strbuf_addstr(&err, "space before tab in indent"); |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 122 | } |
| 123 | if (ws & WS_INDENT_WITH_NON_TAB) { |
| 124 | if (err.len) |
| 125 | strbuf_addstr(&err, ", "); |
Wincent Colaiuta | 420f4f0 | 2007-12-14 12:23:43 +0100 | [diff] [blame] | 126 | strbuf_addstr(&err, "indent with spaces"); |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 127 | } |
| 128 | return strbuf_detach(&err, NULL); |
| 129 | } |
| 130 | |
| 131 | /* If stream is non-NULL, emits the line after checking. */ |
Junio C Hamano | 8f8841e | 2008-06-26 15:35:21 -0700 | [diff] [blame] | 132 | static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule, |
| 133 | FILE *stream, const char *set, |
| 134 | const char *reset, const char *ws) |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 135 | { |
| 136 | unsigned result = 0; |
J. Bruce Fields | 954ecd4 | 2007-12-16 11:31:39 -0500 | [diff] [blame] | 137 | int written = 0; |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 138 | int trailing_whitespace = -1; |
| 139 | int trailing_newline = 0; |
Junio C Hamano | b2979ff | 2008-01-15 00:59:05 -0800 | [diff] [blame] | 140 | int trailing_carriage_return = 0; |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 141 | int i; |
| 142 | |
| 143 | /* Logic is simpler if we temporarily ignore the trailing newline. */ |
| 144 | if (len > 0 && line[len - 1] == '\n') { |
| 145 | trailing_newline = 1; |
| 146 | len--; |
| 147 | } |
Junio C Hamano | b2979ff | 2008-01-15 00:59:05 -0800 | [diff] [blame] | 148 | if ((ws_rule & WS_CR_AT_EOL) && |
| 149 | len > 0 && line[len - 1] == '\r') { |
| 150 | trailing_carriage_return = 1; |
| 151 | len--; |
| 152 | } |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 153 | |
| 154 | /* Check for trailing whitespace. */ |
Junio C Hamano | aeb84b0 | 2009-09-05 22:21:17 -0700 | [diff] [blame] | 155 | if (ws_rule & WS_BLANK_AT_EOL) { |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 156 | for (i = len - 1; i >= 0; i--) { |
| 157 | if (isspace(line[i])) { |
| 158 | trailing_whitespace = i; |
Junio C Hamano | aeb84b0 | 2009-09-05 22:21:17 -0700 | [diff] [blame] | 159 | result |= WS_BLANK_AT_EOL; |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 160 | } |
| 161 | else |
| 162 | break; |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /* Check for space before tab in initial indent. */ |
| 167 | for (i = 0; i < len; i++) { |
J. Bruce Fields | 9afa2d4 | 2007-12-16 11:31:40 -0500 | [diff] [blame] | 168 | if (line[i] == ' ') |
J. Bruce Fields | 1020999 | 2007-12-16 11:31:38 -0500 | [diff] [blame] | 169 | continue; |
J. Bruce Fields | 1020999 | 2007-12-16 11:31:38 -0500 | [diff] [blame] | 170 | if (line[i] != '\t') |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 171 | break; |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 172 | if ((ws_rule & WS_SPACE_BEFORE_TAB) && written < i) { |
J. Bruce Fields | 1020999 | 2007-12-16 11:31:38 -0500 | [diff] [blame] | 173 | result |= WS_SPACE_BEFORE_TAB; |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 174 | if (stream) { |
| 175 | fputs(ws, stream); |
| 176 | fwrite(line + written, i - written, 1, stream); |
| 177 | fputs(reset, stream); |
| 178 | } |
| 179 | } else if (stream) |
| 180 | fwrite(line + written, i - written, 1, stream); |
| 181 | if (stream) |
| 182 | fwrite(line + i, 1, 1, stream); |
J. Bruce Fields | 9afa2d4 | 2007-12-16 11:31:40 -0500 | [diff] [blame] | 183 | written = i + 1; |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 184 | } |
| 185 | |
| 186 | /* Check for indent using non-tab. */ |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 187 | if ((ws_rule & WS_INDENT_WITH_NON_TAB) && i - written >= 8) { |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 188 | result |= WS_INDENT_WITH_NON_TAB; |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 189 | if (stream) { |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 190 | fputs(ws, stream); |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 191 | fwrite(line + written, i - written, 1, stream); |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 192 | fputs(reset, stream); |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 193 | } |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 194 | written = i; |
| 195 | } |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 196 | |
J. Bruce Fields | ffe5688 | 2007-12-16 11:31:41 -0500 | [diff] [blame] | 197 | if (stream) { |
Junio C Hamano | b2979ff | 2008-01-15 00:59:05 -0800 | [diff] [blame] | 198 | /* |
| 199 | * Now the rest of the line starts at "written". |
| 200 | * The non-highlighted part ends at "trailing_whitespace". |
| 201 | */ |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 202 | if (trailing_whitespace == -1) |
| 203 | trailing_whitespace = len; |
| 204 | |
| 205 | /* Emit non-highlighted (middle) segment. */ |
J. Bruce Fields | 954ecd4 | 2007-12-16 11:31:39 -0500 | [diff] [blame] | 206 | if (trailing_whitespace - written > 0) { |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 207 | fputs(set, stream); |
J. Bruce Fields | 954ecd4 | 2007-12-16 11:31:39 -0500 | [diff] [blame] | 208 | fwrite(line + written, |
| 209 | trailing_whitespace - written, 1, stream); |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 210 | fputs(reset, stream); |
| 211 | } |
| 212 | |
| 213 | /* Highlight errors in trailing whitespace. */ |
| 214 | if (trailing_whitespace != len) { |
| 215 | fputs(ws, stream); |
| 216 | fwrite(line + trailing_whitespace, |
| 217 | len - trailing_whitespace, 1, stream); |
| 218 | fputs(reset, stream); |
| 219 | } |
Junio C Hamano | b2979ff | 2008-01-15 00:59:05 -0800 | [diff] [blame] | 220 | if (trailing_carriage_return) |
| 221 | fputc('\r', stream); |
Wincent Colaiuta | c1795bb | 2007-12-13 14:32:29 +0100 | [diff] [blame] | 222 | if (trailing_newline) |
| 223 | fputc('\n', stream); |
| 224 | } |
| 225 | return result; |
| 226 | } |
Junio C Hamano | fe3403c | 2008-02-23 16:59:16 -0800 | [diff] [blame] | 227 | |
Junio C Hamano | 8f8841e | 2008-06-26 15:35:21 -0700 | [diff] [blame] | 228 | void ws_check_emit(const char *line, int len, unsigned ws_rule, |
| 229 | FILE *stream, const char *set, |
| 230 | const char *reset, const char *ws) |
| 231 | { |
| 232 | (void)ws_check_emit_1(line, len, ws_rule, stream, set, reset, ws); |
| 233 | } |
| 234 | |
| 235 | unsigned ws_check(const char *line, int len, unsigned ws_rule) |
| 236 | { |
| 237 | return ws_check_emit_1(line, len, ws_rule, NULL, NULL, NULL, NULL); |
| 238 | } |
| 239 | |
Junio C Hamano | 877f23c | 2008-06-26 15:36:59 -0700 | [diff] [blame] | 240 | int ws_blank_line(const char *line, int len, unsigned ws_rule) |
| 241 | { |
| 242 | /* |
| 243 | * We _might_ want to treat CR differently from other |
| 244 | * whitespace characters when ws_rule has WS_CR_AT_EOL, but |
| 245 | * for now we just use this stupid definition. |
| 246 | */ |
| 247 | while (len-- > 0) { |
| 248 | if (!isspace(*line)) |
| 249 | return 0; |
| 250 | line++; |
| 251 | } |
| 252 | return 1; |
| 253 | } |
| 254 | |
Junio C Hamano | fe3403c | 2008-02-23 16:59:16 -0800 | [diff] [blame] | 255 | /* Copy the line to the buffer while fixing whitespaces */ |
| 256 | int ws_fix_copy(char *dst, const char *src, int len, unsigned ws_rule, int *error_count) |
| 257 | { |
| 258 | /* |
| 259 | * len is number of bytes to be copied from src, starting |
| 260 | * at src. Typically src[len-1] is '\n', unless this is |
| 261 | * the incomplete last line. |
| 262 | */ |
| 263 | int i; |
| 264 | int add_nl_to_tail = 0; |
| 265 | int add_cr_to_tail = 0; |
| 266 | int fixed = 0; |
| 267 | int last_tab_in_indent = -1; |
| 268 | int last_space_in_indent = -1; |
| 269 | int need_fix_leading_space = 0; |
| 270 | char *buf; |
| 271 | |
| 272 | /* |
| 273 | * Strip trailing whitespace |
| 274 | */ |
Junio C Hamano | afd9db4 | 2009-09-15 03:28:08 -0700 | [diff] [blame] | 275 | if (ws_rule & WS_BLANK_AT_EOL) { |
Junio C Hamano | 422a82f | 2009-07-25 01:29:20 -0700 | [diff] [blame] | 276 | if (0 < len && src[len - 1] == '\n') { |
Junio C Hamano | fe3403c | 2008-02-23 16:59:16 -0800 | [diff] [blame] | 277 | add_nl_to_tail = 1; |
| 278 | len--; |
Junio C Hamano | 422a82f | 2009-07-25 01:29:20 -0700 | [diff] [blame] | 279 | if (0 < len && src[len - 1] == '\r') { |
Junio C Hamano | fe3403c | 2008-02-23 16:59:16 -0800 | [diff] [blame] | 280 | add_cr_to_tail = !!(ws_rule & WS_CR_AT_EOL); |
| 281 | len--; |
| 282 | } |
| 283 | } |
| 284 | if (0 < len && isspace(src[len - 1])) { |
| 285 | while (0 < len && isspace(src[len-1])) |
| 286 | len--; |
| 287 | fixed = 1; |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | /* |
| 292 | * Check leading whitespaces (indent) |
| 293 | */ |
| 294 | for (i = 0; i < len; i++) { |
| 295 | char ch = src[i]; |
| 296 | if (ch == '\t') { |
| 297 | last_tab_in_indent = i; |
| 298 | if ((ws_rule & WS_SPACE_BEFORE_TAB) && |
| 299 | 0 <= last_space_in_indent) |
| 300 | need_fix_leading_space = 1; |
| 301 | } else if (ch == ' ') { |
| 302 | last_space_in_indent = i; |
| 303 | if ((ws_rule & WS_INDENT_WITH_NON_TAB) && |
| 304 | 8 <= i - last_tab_in_indent) |
| 305 | need_fix_leading_space = 1; |
| 306 | } else |
| 307 | break; |
| 308 | } |
| 309 | |
| 310 | buf = dst; |
| 311 | if (need_fix_leading_space) { |
| 312 | /* Process indent ourselves */ |
| 313 | int consecutive_spaces = 0; |
| 314 | int last = last_tab_in_indent + 1; |
| 315 | |
| 316 | if (ws_rule & WS_INDENT_WITH_NON_TAB) { |
| 317 | /* have "last" point at one past the indent */ |
| 318 | if (last_tab_in_indent < last_space_in_indent) |
| 319 | last = last_space_in_indent + 1; |
| 320 | else |
| 321 | last = last_tab_in_indent + 1; |
| 322 | } |
| 323 | |
| 324 | /* |
| 325 | * between src[0..last-1], strip the funny spaces, |
| 326 | * updating them to tab as needed. |
| 327 | */ |
| 328 | for (i = 0; i < last; i++) { |
| 329 | char ch = src[i]; |
| 330 | if (ch != ' ') { |
| 331 | consecutive_spaces = 0; |
| 332 | *dst++ = ch; |
| 333 | } else { |
| 334 | consecutive_spaces++; |
| 335 | if (consecutive_spaces == 8) { |
| 336 | *dst++ = '\t'; |
| 337 | consecutive_spaces = 0; |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | while (0 < consecutive_spaces--) |
| 342 | *dst++ = ' '; |
| 343 | len -= last; |
| 344 | src += last; |
| 345 | fixed = 1; |
| 346 | } |
| 347 | |
| 348 | memcpy(dst, src, len); |
| 349 | if (add_cr_to_tail) |
| 350 | dst[len++] = '\r'; |
| 351 | if (add_nl_to_tail) |
| 352 | dst[len++] = '\n'; |
| 353 | if (fixed && error_count) |
| 354 | (*error_count)++; |
| 355 | return dst + len - buf; |
| 356 | } |