Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Totally braindamaged mbox splitter program. |
| 3 | * |
| 4 | * It just splits a mbox into a list of files: "0001" "0002" .. |
| 5 | * so you can process them further from there. |
| 6 | */ |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 7 | #include "cache.h" |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 8 | #include "builtin.h" |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 9 | #include "path-list.h" |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 10 | |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 11 | static const char git_mailsplit_usage[] = |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 12 | "git-mailsplit [-d<prec>] [-f<n>] [-b] -o<directory> <mbox>|<Maildir>..."; |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 13 | |
| 14 | static int is_from_line(const char *line, int len) |
| 15 | { |
| 16 | const char *colon; |
| 17 | |
| 18 | if (len < 20 || memcmp("From ", line, 5)) |
| 19 | return 0; |
| 20 | |
| 21 | colon = line + len - 2; |
| 22 | line += 5; |
| 23 | for (;;) { |
| 24 | if (colon < line) |
| 25 | return 0; |
| 26 | if (*--colon == ':') |
| 27 | break; |
| 28 | } |
| 29 | |
| 30 | if (!isdigit(colon[-4]) || |
| 31 | !isdigit(colon[-2]) || |
| 32 | !isdigit(colon[-1]) || |
| 33 | !isdigit(colon[ 1]) || |
| 34 | !isdigit(colon[ 2])) |
| 35 | return 0; |
| 36 | |
| 37 | /* year */ |
| 38 | if (strtol(colon+3, NULL, 10) <= 90) |
| 39 | return 0; |
| 40 | |
| 41 | /* Ok, close enough */ |
| 42 | return 1; |
| 43 | } |
| 44 | |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 45 | /* Could be as small as 64, enough to hold a Unix "From " line. */ |
| 46 | static char buf[4096]; |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 47 | |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 48 | /* We cannot use fgets() because our lines can contain NULs */ |
| 49 | int read_line_with_nul(char *buf, int size, FILE *in) |
| 50 | { |
| 51 | int len = 0, c; |
| 52 | |
| 53 | for (;;) { |
| 54 | c = getc(in); |
Junio C Hamano | edc5594 | 2008-05-25 01:23:55 -0700 | [diff] [blame] | 55 | if (c == EOF) |
| 56 | break; |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 57 | buf[len++] = c; |
Junio C Hamano | edc5594 | 2008-05-25 01:23:55 -0700 | [diff] [blame] | 58 | if (c == '\n' || len + 1 >= size) |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 59 | break; |
| 60 | } |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 61 | buf[len] = '\0'; |
| 62 | |
| 63 | return len; |
| 64 | } |
| 65 | |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 66 | /* Called with the first line (potentially partial) |
| 67 | * already in buf[] -- normally that should begin with |
| 68 | * the Unix "From " line. Write it into the specified |
| 69 | * file. |
| 70 | */ |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 71 | static int split_one(FILE *mbox, const char *name, int allow_bare) |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 72 | { |
| 73 | FILE *output = NULL; |
| 74 | int len = strlen(buf); |
| 75 | int fd; |
| 76 | int status = 0; |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 77 | int is_bare = !is_from_line(buf, len); |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 78 | |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 79 | if (is_bare && !allow_bare) |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 80 | goto corrupt; |
| 81 | |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 82 | fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666); |
| 83 | if (fd < 0) |
| 84 | die("cannot open output file %s", name); |
| 85 | output = fdopen(fd, "w"); |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 86 | |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 87 | /* Copy it out, while searching for a line that begins with |
| 88 | * "From " and having something that looks like a date format. |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 89 | */ |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 90 | for (;;) { |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 91 | int is_partial = len && buf[len-1] != '\n'; |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 92 | |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 93 | if (fwrite(buf, 1, len, output) != len) |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 94 | die("cannot write output"); |
| 95 | |
Johannes Schindelin | cce8d6f | 2008-05-16 14:03:30 +0100 | [diff] [blame] | 96 | len = read_line_with_nul(buf, sizeof(buf), mbox); |
| 97 | if (len == 0) { |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 98 | if (feof(mbox)) { |
| 99 | status = 1; |
| 100 | break; |
| 101 | } |
| 102 | die("cannot read mbox"); |
| 103 | } |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 104 | if (!is_partial && !is_bare && is_from_line(buf, len)) |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 105 | break; /* done with one message */ |
| 106 | } |
| 107 | fclose(output); |
| 108 | return status; |
| 109 | |
| 110 | corrupt: |
| 111 | if (output) |
| 112 | fclose(output); |
| 113 | unlink(name); |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 114 | fprintf(stderr, "corrupt mailbox\n"); |
| 115 | exit(1); |
| 116 | } |
| 117 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 118 | static int populate_maildir_list(struct path_list *list, const char *path) |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 119 | { |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 120 | DIR *dir; |
| 121 | struct dirent *dent; |
Gerrit Pape | d50a4bc | 2007-11-06 08:54:18 +0000 | [diff] [blame] | 122 | char name[PATH_MAX]; |
| 123 | char *subs[] = { "cur", "new", NULL }; |
| 124 | char **sub; |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 125 | |
Gerrit Pape | d50a4bc | 2007-11-06 08:54:18 +0000 | [diff] [blame] | 126 | for (sub = subs; *sub; ++sub) { |
| 127 | snprintf(name, sizeof(name), "%s/%s", path, *sub); |
| 128 | if ((dir = opendir(name)) == NULL) { |
| 129 | if (errno == ENOENT) |
| 130 | continue; |
| 131 | error("cannot opendir %s (%s)", name, strerror(errno)); |
| 132 | return -1; |
| 133 | } |
| 134 | |
| 135 | while ((dent = readdir(dir)) != NULL) { |
| 136 | if (dent->d_name[0] == '.') |
| 137 | continue; |
| 138 | snprintf(name, sizeof(name), "%s/%s", *sub, dent->d_name); |
| 139 | path_list_insert(name, list); |
| 140 | } |
| 141 | |
| 142 | closedir(dir); |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 143 | } |
| 144 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | static int split_maildir(const char *maildir, const char *dir, |
| 149 | int nr_prec, int skip) |
| 150 | { |
| 151 | char file[PATH_MAX]; |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 152 | char name[PATH_MAX]; |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 153 | int ret = -1; |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 154 | int i; |
| 155 | struct path_list list = {NULL, 0, 0, 1}; |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 156 | |
Gerrit Pape | d50a4bc | 2007-11-06 08:54:18 +0000 | [diff] [blame] | 157 | if (populate_maildir_list(&list, maildir) < 0) |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 158 | goto out; |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 159 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 160 | for (i = 0; i < list.nr; i++) { |
| 161 | FILE *f; |
Gerrit Pape | d50a4bc | 2007-11-06 08:54:18 +0000 | [diff] [blame] | 162 | snprintf(file, sizeof(file), "%s/%s", maildir, list.items[i].path); |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 163 | f = fopen(file, "r"); |
| 164 | if (!f) { |
| 165 | error("cannot open mail %s (%s)", file, strerror(errno)); |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 166 | goto out; |
| 167 | } |
| 168 | |
| 169 | if (fgets(buf, sizeof(buf), f) == NULL) { |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 170 | error("cannot read mail %s (%s)", file, strerror(errno)); |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 171 | goto out; |
| 172 | } |
| 173 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 174 | sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); |
| 175 | split_one(f, name, 1); |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 176 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 177 | fclose(f); |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 178 | } |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 179 | |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 180 | ret = skip; |
| 181 | out: |
Gerrit Pape | d50a4bc | 2007-11-06 08:54:18 +0000 | [diff] [blame] | 182 | path_list_clear(&list, 1); |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 183 | return ret; |
| 184 | } |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 185 | |
Junio C Hamano | fcd056a | 2007-06-08 02:22:56 -0700 | [diff] [blame] | 186 | static int split_mbox(const char *file, const char *dir, int allow_bare, |
| 187 | int nr_prec, int skip) |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 188 | { |
| 189 | char name[PATH_MAX]; |
| 190 | int ret = -1; |
Simon Sasburg | f88a545 | 2007-11-01 23:57:45 +0100 | [diff] [blame] | 191 | int peek; |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 192 | |
| 193 | FILE *f = !strcmp(file, "-") ? stdin : fopen(file, "r"); |
| 194 | int file_done = 0; |
| 195 | |
| 196 | if (!f) { |
| 197 | error("cannot open mbox %s", file); |
| 198 | goto out; |
| 199 | } |
| 200 | |
Simon Sasburg | f88a545 | 2007-11-01 23:57:45 +0100 | [diff] [blame] | 201 | do { |
| 202 | peek = fgetc(f); |
| 203 | } while (isspace(peek)); |
| 204 | ungetc(peek, f); |
| 205 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 206 | if (fgets(buf, sizeof(buf), f) == NULL) { |
| 207 | /* empty stdin is OK */ |
| 208 | if (f != stdin) { |
| 209 | error("cannot read mbox %s", file); |
| 210 | goto out; |
| 211 | } |
| 212 | file_done = 1; |
| 213 | } |
| 214 | |
| 215 | while (!file_done) { |
| 216 | sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); |
| 217 | file_done = split_one(f, name, allow_bare); |
| 218 | } |
| 219 | |
| 220 | if (f != stdin) |
| 221 | fclose(f); |
| 222 | |
| 223 | ret = skip; |
| 224 | out: |
| 225 | return ret; |
| 226 | } |
| 227 | |
Linus Torvalds | a633fca | 2006-07-28 22:44:25 -0700 | [diff] [blame] | 228 | int cmd_mailsplit(int argc, const char **argv, const char *prefix) |
Lukas Sandström | e690e84 | 2006-06-13 22:21:46 +0200 | [diff] [blame] | 229 | { |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 230 | int nr = 0, nr_prec = 4, num = 0; |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 231 | int allow_bare = 0; |
| 232 | const char *dir = NULL; |
| 233 | const char **argp; |
| 234 | static const char *stdin_only[] = { "-", NULL }; |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 235 | |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 236 | for (argp = argv+1; *argp; argp++) { |
| 237 | const char *arg = *argp; |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 238 | |
| 239 | if (arg[0] != '-') |
| 240 | break; |
| 241 | /* do flags here */ |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 242 | if ( arg[1] == 'd' ) { |
| 243 | nr_prec = strtol(arg+2, NULL, 10); |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 244 | if (nr_prec < 3 || 10 <= nr_prec) |
| 245 | usage(git_mailsplit_usage); |
| 246 | continue; |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 247 | } else if ( arg[1] == 'f' ) { |
| 248 | nr = strtol(arg+2, NULL, 10); |
| 249 | } else if ( arg[1] == 'b' && !arg[2] ) { |
| 250 | allow_bare = 1; |
| 251 | } else if ( arg[1] == 'o' && arg[2] ) { |
| 252 | dir = arg+2; |
| 253 | } else if ( arg[1] == '-' && !arg[2] ) { |
| 254 | argp++; /* -- marks end of options */ |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 255 | break; |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 256 | } else { |
| 257 | die("unknown option: %s", arg); |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 258 | } |
Junio C Hamano | 8b73edf | 2005-10-06 15:55:43 -0700 | [diff] [blame] | 259 | } |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 260 | |
| 261 | if ( !dir ) { |
| 262 | /* Backwards compatibility: if no -o specified, accept |
| 263 | <mbox> <dir> or just <dir> */ |
| 264 | switch (argc - (argp-argv)) { |
| 265 | case 1: |
| 266 | dir = argp[0]; |
| 267 | argp = stdin_only; |
| 268 | break; |
| 269 | case 2: |
| 270 | stdin_only[0] = argp[0]; |
| 271 | dir = argp[1]; |
| 272 | argp = stdin_only; |
| 273 | break; |
| 274 | default: |
| 275 | usage(git_mailsplit_usage); |
| 276 | } |
| 277 | } else { |
| 278 | /* New usage: if no more argument, parse stdin */ |
| 279 | if ( !*argp ) |
| 280 | argp = stdin_only; |
| 281 | } |
| 282 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 283 | while (*argp) { |
| 284 | const char *arg = *argp++; |
| 285 | struct stat argstat; |
| 286 | int ret = 0; |
H. Peter Anvin | b3f041f | 2005-12-13 22:39:23 -0800 | [diff] [blame] | 287 | |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 288 | if (arg[0] == '-' && arg[1] == 0) { |
| 289 | ret = split_mbox(arg, dir, allow_bare, nr_prec, nr); |
| 290 | if (ret < 0) { |
| 291 | error("cannot split patches from stdin"); |
| 292 | return 1; |
| 293 | } |
Junio C Hamano | b332718 | 2007-05-28 15:48:07 -0700 | [diff] [blame] | 294 | num += (ret - nr); |
| 295 | nr = ret; |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 296 | continue; |
| 297 | } |
| 298 | |
| 299 | if (stat(arg, &argstat) == -1) { |
| 300 | error("cannot stat %s (%s)", arg, strerror(errno)); |
| 301 | return 1; |
| 302 | } |
| 303 | |
| 304 | if (S_ISDIR(argstat.st_mode)) |
| 305 | ret = split_maildir(arg, dir, nr_prec, nr); |
| 306 | else |
| 307 | ret = split_mbox(arg, dir, allow_bare, nr_prec, nr); |
| 308 | |
| 309 | if (ret < 0) { |
| 310 | error("cannot split patches from %s", arg); |
| 311 | return 1; |
| 312 | } |
Junio C Hamano | b332718 | 2007-05-28 15:48:07 -0700 | [diff] [blame] | 313 | num += (ret - nr); |
| 314 | nr = ret; |
Fernando J. Pereda | d63bd9a | 2007-05-25 00:15:36 +0200 | [diff] [blame] | 315 | } |
| 316 | |
| 317 | printf("%d\n", num); |
| 318 | |
| 319 | return 0; |
Linus Torvalds | 2744b23 | 2005-04-11 23:46:50 -0700 | [diff] [blame] | 320 | } |