Blame - mailinfo.c - jrn/git

blob: f289941f7e516be2f6265326188b3d8da70ef3aa [file] [log] [blame]

Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	1	#include "cache.h"
				2	#include "utf8.h"
				3	#include "strbuf.h"
				4	#include "mailinfo.h"
				5
				6	static void cleanup_space(struct strbuf *sb)
				7	{
				8	size_t pos, cnt;
				9	for (pos = 0; pos < sb->len; pos++) {
				10	if (isspace(sb->buf[pos])) {
				11	sb->buf[pos] = ' ';
				12	for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
				13	strbuf_remove(sb, pos + 1, cnt);
				14	}
				15	}
				16	}
				17
				18	static void get_sane_name(struct strbuf out, struct strbuf name, struct strbuf *email)
				19	{
				20	struct strbuf *src = name;
				21	if (name->len < 3 \|\| 60 < name->len \|\| strchr(name->buf, '@') \|\|
				22	strchr(name->buf, '<') \|\| strchr(name->buf, '>'))
				23	src = email;
				24	else if (name == out)
				25	return;
				26	strbuf_reset(out);
				27	strbuf_addbuf(out, src);
				28	}
				29
				30	static void parse_bogus_from(struct mailinfo mi, const struct strbuf line)
				31	{
				32	/* John Doe <johndoe> */
				33
				34	char bra, ket;
				35	/* This is fallback, so do not bother if we already have an
				36	* e-mail address.
				37	*/
				38	if (mi->email.len)
				39	return;
				40
				41	bra = strchr(line->buf, '<');
				42	if (!bra)
				43	return;
				44	ket = strchr(bra, '>');
				45	if (!ket)
				46	return;
				47
				48	strbuf_reset(&mi->email);
				49	strbuf_add(&mi->email, bra + 1, ket - bra - 1);
				50
				51	strbuf_reset(&mi->name);
				52	strbuf_add(&mi->name, line->buf, bra - line->buf);
				53	strbuf_trim(&mi->name);
				54	get_sane_name(&mi->name, &mi->name, &mi->email);
				55	}
				56
				57	static void handle_from(struct mailinfo mi, const struct strbuf from)
				58	{
				59	char *at;
				60	size_t el;
				61	struct strbuf f;
				62
				63	strbuf_init(&f, from->len);
				64	strbuf_addbuf(&f, from);
				65
				66	at = strchr(f.buf, '@');
				67	if (!at) {
				68	parse_bogus_from(mi, from);
				69	return;
				70	}
				71
				72	/*
				73	* If we already have one email, don't take any confusing lines
				74	*/
				75	if (mi->email.len && strchr(at + 1, '@')) {
				76	strbuf_release(&f);
				77	return;
				78	}
				79
				80	/* Pick up the string around '@', possibly delimited with <>
				81	* pair; that is the email part.
				82	*/
				83	while (at > f.buf) {
				84	char c = at[-1];
				85	if (isspace(c))
				86	break;
				87	if (c == '<') {
				88	at[-1] = ' ';
				89	break;
				90	}
				91	at--;
				92	}
				93	el = strcspn(at, " \n\t\r\v\f>");
				94	strbuf_reset(&mi->email);
				95	strbuf_add(&mi->email, at, el);
				96	strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
				97
				98	/* The remainder is name. It could be
				99	*
				100	* - "John Doe <john.doe@xz>" (a), or
				101	* - "john.doe@xz (John Doe)" (b), or
				102	* - "John (zzz) Doe <john.doe@xz> (Comment)" (c)
				103	*
				104	* but we have removed the email part, so
				105	*
				106	* - remove extra spaces which could stay after email (case 'c'), and
				107	* - trim from both ends, possibly removing the () pair at the end
				108	* (cases 'a' and 'b').
				109	*/
				110	cleanup_space(&f);
				111	strbuf_trim(&f);
				112	if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
				113	strbuf_remove(&f, 0, 1);
				114	strbuf_setlen(&f, f.len - 1);
				115	}
				116
				117	get_sane_name(&mi->name, &f, &mi->email);
				118	strbuf_release(&f);
				119	}
				120
				121	static void handle_header(struct strbuf *out, const struct strbuf line)
				122	{
				123	if (!*out) {
				124	*out = xmalloc(sizeof(struct strbuf));
				125	strbuf_init(*out, line->len);
				126	} else
				127	strbuf_reset(*out);
				128
				129	strbuf_addbuf(*out, line);
				130	}
				131
				132	/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt
				133	* to have enough heuristics to grok MIME encoded patches often found
				134	* on our mailing lists. For example, we do not even treat header lines
				135	* case insensitively.
				136	*/
				137
				138	static int slurp_attr(const char line, const char name, struct strbuf *attr)
				139	{
				140	const char ends, ap = strcasestr(line, name);
				141	size_t sz;
				142
				143	strbuf_setlen(attr, 0);
				144	if (!ap)
				145	return 0;
				146	ap += strlen(name);
				147	if (*ap == '"') {
				148	ap++;
				149	ends = "\"";
				150	}
				151	else
				152	ends = "; \t";
				153	sz = strcspn(ap, ends);
				154	strbuf_add(attr, ap, sz);
				155	return 1;
				156	}
				157
				158	static void handle_content_type(struct mailinfo mi, struct strbuf line)
				159	{
				160	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
				161	strbuf_init(boundary, line->len);
				162
				163	if (slurp_attr(line->buf, "boundary=", boundary)) {
				164	strbuf_insert(boundary, 0, "--", 2);
				165	if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	166	error("Too many boundaries to handle");
				167	mi->input_error = -1;
				168	mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
				169	return;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	170	}
				171	*(mi->content_top) = boundary;
				172	boundary = NULL;
				173	}
				174	slurp_attr(line->buf, "charset=", &mi->charset);
				175
				176	if (boundary) {
				177	strbuf_release(boundary);
				178	free(boundary);
				179	}
				180	}
				181
				182	static void handle_message_id(struct mailinfo mi, const struct strbuf line)
				183	{
				184	if (mi->add_message_id)
				185	mi->message_id = strdup(line->buf);
				186	}
				187
				188	static void handle_content_transfer_encoding(struct mailinfo *mi,
				189	const struct strbuf *line)
				190	{
				191	if (strcasestr(line->buf, "base64"))
				192	mi->transfer_encoding = TE_BASE64;
				193	else if (strcasestr(line->buf, "quoted-printable"))
				194	mi->transfer_encoding = TE_QP;
				195	else
				196	mi->transfer_encoding = TE_DONTCARE;
				197	}
				198
				199	static int is_multipart_boundary(struct mailinfo mi, const struct strbuf line)
				200	{
				201	struct strbuf content_top = (mi->content_top);
				202
				203	return ((content_top->len <= line->len) &&
				204	!memcmp(line->buf, content_top->buf, content_top->len));
				205	}
				206
				207	static void cleanup_subject(struct mailinfo mi, struct strbuf subject)
				208	{
				209	size_t at = 0;
				210
				211	while (at < subject->len) {
				212	char *pos;
				213	size_t remove;
				214
				215	switch (subject->buf[at]) {
				216	case 'r': case 'R':
				217	if (subject->len <= at + 3)
				218	break;
				219	if ((subject->buf[at + 1] == 'e' \|\|
				220	subject->buf[at + 1] == 'E') &&
				221	subject->buf[at + 2] == ':') {
				222	strbuf_remove(subject, at, 3);
				223	continue;
				224	}
				225	at++;
				226	break;
				227	case ' ': case '\t': case ':':
				228	strbuf_remove(subject, at, 1);
				229	continue;
				230	case '[':
				231	pos = strchr(subject->buf + at, ']');
				232	if (!pos)
				233	break;
				234	remove = pos - subject->buf + at + 1;
				235	if (!mi->keep_non_patch_brackets_in_subject \|\|
				236	(7 <= remove &&
				237	memmem(subject->buf + at, remove, "PATCH", 5)))
				238	strbuf_remove(subject, at, remove);
				239	else {
				240	at += remove;
				241	/*
				242	* If the input had a space after the ], keep
				243	* it. We don't bother with finding the end of
				244	* the space, since we later normalize it
				245	* anyway.
				246	*/
				247	if (isspace(subject->buf[at]))
				248	at += 1;
				249	}
				250	continue;
				251	}
				252	break;
				253	}
				254	strbuf_trim(subject);
				255	}
				256
				257	#define MAX_HDR_PARSED 10
				258	static const char *header[MAX_HDR_PARSED] = {
				259	"From","Subject","Date",
				260	};
				261
				262	static inline int cmp_header(const struct strbuf line, const char hdr)
				263	{
				264	int len = strlen(hdr);
				265	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
				266	line->buf[len] == ':' && isspace(line->buf[len + 1]);
				267	}
				268
				269	static int is_format_patch_separator(const char *line, int len)
				270	{
				271	static const char SAMPLE[] =
				272	"From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
				273	const char *cp;
				274
				275	if (len != strlen(SAMPLE))
				276	return 0;
				277	if (!skip_prefix(line, "From ", &cp))
				278	return 0;
				279	if (strspn(cp, "0123456789abcdef") != 40)
				280	return 0;
				281	cp += 40;
				282	return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
				283	}
				284
				285	static struct strbuf decode_q_segment(const struct strbuf q_seg, int rfc2047)
				286	{
				287	const char *in = q_seg->buf;
				288	int c;
				289	struct strbuf *out = xmalloc(sizeof(struct strbuf));
				290	strbuf_init(out, q_seg->len);
				291
				292	while ((c = *in++) != 0) {
				293	if (c == '=') {
				294	int d = *in++;
				295	if (d == '\n' \|\| !d)
				296	break; /* drop trailing newline */
				297	strbuf_addch(out, (hexval(d) << 4) \| hexval(*in++));
				298	continue;
				299	}
				300	if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
				301	c = 0x20;
				302	strbuf_addch(out, c);
				303	}
				304	return out;
				305	}
				306
				307	static struct strbuf decode_b_segment(const struct strbuf b_seg)
				308	{
				309	/* Decode in..ep, possibly in-place to ot */
				310	int c, pos = 0, acc = 0;
				311	const char *in = b_seg->buf;
				312	struct strbuf *out = xmalloc(sizeof(struct strbuf));
				313	strbuf_init(out, b_seg->len);
				314
				315	while ((c = *in++) != 0) {
				316	if (c == '+')
				317	c = 62;
				318	else if (c == '/')
				319	c = 63;
				320	else if ('A' <= c && c <= 'Z')
				321	c -= 'A';
				322	else if ('a' <= c && c <= 'z')
				323	c -= 'a' - 26;
				324	else if ('0' <= c && c <= '9')
				325	c -= '0' - 52;
				326	else
				327	continue; /* garbage */
				328	switch (pos++) {
				329	case 0:
				330	acc = (c << 2);
				331	break;
				332	case 1:
				333	strbuf_addch(out, (acc \| (c >> 4)));
				334	acc = (c & 15) << 4;
				335	break;
				336	case 2:
				337	strbuf_addch(out, (acc \| (c >> 2)));
				338	acc = (c & 3) << 6;
				339	break;
				340	case 3:
				341	strbuf_addch(out, (acc \| c));
				342	acc = pos = 0;
				343	break;
				344	}
				345	}
				346	return out;
				347	}
				348
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	349	static int convert_to_utf8(struct mailinfo *mi,
				350	struct strbuf line, const char charset)
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	351	{
				352	char *out;
				353
				354	if (!mi->metainfo_charset \|\| !charset \|\| !*charset)
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	355	return 0;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	356
				357	if (same_encoding(mi->metainfo_charset, charset))
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	358	return 0;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	359	out = reencode_string(line->buf, mi->metainfo_charset, charset);
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	360	if (!out) {
				361	mi->input_error = -1;
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	362	return error("cannot convert from %s to %s",
				363	charset, mi->metainfo_charset);
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	364	}
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	365	strbuf_attach(line, out, strlen(out), strlen(out));
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	366	return 0;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	367	}
				368
				369	static void decode_header(struct mailinfo mi, struct strbuf it)
				370	{
				371	char in, ep, *cp;
				372	struct strbuf outbuf = STRBUF_INIT, *dec;
				373	struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	374	int found_error = 1; /* pessimism */
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	375
				376	in = it->buf;
				377	while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
				378	int encoding;
				379	strbuf_reset(&charset_q);
				380	strbuf_reset(&piecebuf);
				381
				382	if (in != ep) {
				383	/*
				384	* We are about to process an encoded-word
				385	* that begins at ep, but there is something
				386	* before the encoded word.
				387	*/
				388	char *scan;
				389	for (scan = in; scan < ep; scan++)
				390	if (!isspace(*scan))
				391	break;
				392
				393	if (scan != ep \|\| in == it->buf) {
				394	/*
				395	* We should not lose that "something",
				396	* unless we have just processed an
				397	* encoded-word, and there is only LWS
				398	* before the one we are about to process.
				399	*/
				400	strbuf_add(&outbuf, in, ep - in);
				401	}
				402	}
				403	/* E.g.
				404	* ep : "=?iso-2022-jp?B?GyR...?= foo"
				405	* ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
				406	*/
				407	ep += 2;
				408
				409	if (ep - it->buf >= it->len \|\| !(cp = strchr(ep, '?')))
				410	goto release_return;
				411
				412	if (cp + 3 - it->buf > it->len)
				413	goto release_return;
				414	strbuf_add(&charset_q, ep, cp - ep);
				415
				416	encoding = cp[1];
				417	if (!encoding \|\| cp[2] != '?')
				418	goto release_return;
				419	ep = strstr(cp + 3, "?=");
				420	if (!ep)
				421	goto release_return;
				422	strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
				423	switch (tolower(encoding)) {
				424	default:
				425	goto release_return;
				426	case 'b':
				427	dec = decode_b_segment(&piecebuf);
				428	break;
				429	case 'q':
				430	dec = decode_q_segment(&piecebuf, 1);
				431	break;
				432	}
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	433	if (convert_to_utf8(mi, dec, charset_q.buf))
				434	goto release_return;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	435
				436	strbuf_addbuf(&outbuf, dec);
				437	strbuf_release(dec);
				438	free(dec);
				439	in = ep + 2;
				440	}
				441	strbuf_addstr(&outbuf, in);
				442	strbuf_reset(it);
				443	strbuf_addbuf(it, &outbuf);
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	444	found_error = 0;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	445	release_return:
				446	strbuf_release(&outbuf);
				447	strbuf_release(&charset_q);
				448	strbuf_release(&piecebuf);
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	449
				450	if (found_error)
				451	mi->input_error = -1;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	452	}
				453
				454	static int check_header(struct mailinfo *mi,
				455	const struct strbuf *line,
				456	struct strbuf *hdr_data[], int overwrite)
				457	{
				458	int i, ret = 0, len;
				459	struct strbuf sb = STRBUF_INIT;
				460
				461	/* search for the interesting parts */
				462	for (i = 0; header[i]; i++) {
				463	int len = strlen(header[i]);
				464	if ((!hdr_data[i] \|\| overwrite) && cmp_header(line, header[i])) {
				465	/* Unwrap inline B and Q encoding, and optionally
				466	* normalize the meta information to utf8.
				467	*/
				468	strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
				469	decode_header(mi, &sb);
				470	handle_header(&hdr_data[i], &sb);
				471	ret = 1;
				472	goto check_header_out;
				473	}
				474	}
				475
				476	/* Content stuff */
				477	if (cmp_header(line, "Content-Type")) {
				478	len = strlen("Content-Type: ");
				479	strbuf_add(&sb, line->buf + len, line->len - len);
				480	decode_header(mi, &sb);
				481	strbuf_insert(&sb, 0, "Content-Type: ", len);
				482	handle_content_type(mi, &sb);
				483	ret = 1;
				484	goto check_header_out;
				485	}
				486	if (cmp_header(line, "Content-Transfer-Encoding")) {
				487	len = strlen("Content-Transfer-Encoding: ");
				488	strbuf_add(&sb, line->buf + len, line->len - len);
				489	decode_header(mi, &sb);
				490	handle_content_transfer_encoding(mi, &sb);
				491	ret = 1;
				492	goto check_header_out;
				493	}
				494	if (cmp_header(line, "Message-Id")) {
				495	len = strlen("Message-Id: ");
				496	strbuf_add(&sb, line->buf + len, line->len - len);
				497	decode_header(mi, &sb);
				498	handle_message_id(mi, &sb);
				499	ret = 1;
				500	goto check_header_out;
				501	}
				502
				503	/* for inbody stuff */
				504	if (starts_with(line->buf, ">From") && isspace(line->buf[5])) {
				505	ret = is_format_patch_separator(line->buf + 1, line->len - 1);
				506	goto check_header_out;
				507	}
				508	if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
				509	for (i = 0; header[i]; i++) {
				510	if (!strcmp("Subject", header[i])) {
				511	handle_header(&hdr_data[i], line);
				512	ret = 1;
				513	goto check_header_out;
				514	}
				515	}
				516	}
				517
				518	check_header_out:
				519	strbuf_release(&sb);
				520	return ret;
				521	}
				522
				523	static void decode_transfer_encoding(struct mailinfo mi, struct strbuf line)
				524	{
				525	struct strbuf *ret;
				526
				527	switch (mi->transfer_encoding) {
				528	case TE_QP:
				529	ret = decode_q_segment(line, 0);
				530	break;
				531	case TE_BASE64:
				532	ret = decode_b_segment(line);
				533	break;
				534	case TE_DONTCARE:
				535	default:
				536	return;
				537	}
				538	strbuf_reset(line);
				539	strbuf_addbuf(line, ret);
				540	strbuf_release(ret);
				541	free(ret);
				542	}
				543
				544	static inline int patchbreak(const struct strbuf *line)
				545	{
				546	size_t i;
				547
				548	/* Beginning of a "diff -" header? */
				549	if (starts_with(line->buf, "diff -"))
				550	return 1;
				551
				552	/* CVS "Index: " line? */
				553	if (starts_with(line->buf, "Index: "))
				554	return 1;
				555
				556	/*
				557	* "--- <filename>" starts patches without headers
				558	* "---<sp>*" is a manual separator
				559	*/
				560	if (line->len < 4)
				561	return 0;
				562
				563	if (starts_with(line->buf, "---")) {
				564	/* space followed by a filename? */
				565	if (line->buf[3] == ' ' && !isspace(line->buf[4]))
				566	return 1;
				567	/* Just whitespace? */
				568	for (i = 3; i < line->len; i++) {
				569	unsigned char c = line->buf[i];
				570	if (c == '\n')
				571	return 1;
				572	if (!isspace(c))
				573	break;
				574	}
				575	return 0;
				576	}
				577	return 0;
				578	}
				579
				580	static int is_scissors_line(const struct strbuf *line)
				581	{
				582	size_t i, len = line->len;
				583	int scissors = 0, gap = 0;
				584	int first_nonblank = -1;
				585	int last_nonblank = 0, visible, perforation = 0, in_perforation = 0;
				586	const char *buf = line->buf;
				587
				588	for (i = 0; i < len; i++) {
				589	if (isspace(buf[i])) {
				590	if (in_perforation) {
				591	perforation++;
				592	gap++;
				593	}
				594	continue;
				595	}
				596	last_nonblank = i;
				597	if (first_nonblank < 0)
				598	first_nonblank = i;
				599	if (buf[i] == '-') {
				600	in_perforation = 1;
				601	perforation++;
				602	continue;
				603	}
				604	if (i + 1 < len &&
				605	(!memcmp(buf + i, ">8", 2) \|\| !memcmp(buf + i, "8<", 2) \|\|
				606	!memcmp(buf + i, ">%", 2) \|\| !memcmp(buf + i, "%<", 2))) {
				607	in_perforation = 1;
				608	perforation += 2;
				609	scissors += 2;
				610	i++;
				611	continue;
				612	}
				613	in_perforation = 0;
				614	}
				615
				616	/*
				617	* The mark must be at least 8 bytes long (e.g. "-- >8 --").
				618	* Even though there can be arbitrary cruft on the same line
				619	* (e.g. "cut here"), in order to avoid misidentification, the
				620	* perforation must occupy more than a third of the visible
				621	* width of the line, and dashes and scissors must occupy more
				622	* than half of the perforation.
				623	*/
				624
				625	visible = last_nonblank - first_nonblank + 1;
				626	return (scissors && 8 <= visible &&
				627	visible < perforation * 3 &&
				628	gap * 2 < perforation);
				629	}
				630
				631	static int handle_commit_msg(struct mailinfo mi, struct strbuf line)
				632	{
				633	assert(!mi->filter_stage);
				634
				635	if (mi->header_stage) {
				636	if (!line->len \|\| (line->len == 1 && line->buf[0] == '\n'))
				637	return 0;
				638	}
				639
				640	if (mi->use_inbody_headers && mi->header_stage) {
				641	mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0);
				642	if (mi->header_stage)
				643	return 0;
				644	} else
				645	/* Only trim the first (blank) line of the commit message
				646	* when ignoring in-body headers.
				647	*/
				648	mi->header_stage = 0;
				649
				650	/* normalize the log message to UTF-8. */
Junio C Hamano	669b963	2015-10-14 17:45:16 -0700	[diff] [blame]	651	if (convert_to_utf8(mi, line, mi->charset.buf))
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	652	return 0; /* mi->input_error already set */
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	653
				654	if (mi->use_scissors && is_scissors_line(line)) {
				655	int i;
				656
				657	strbuf_setlen(&mi->log_message, 0);
				658	mi->header_stage = 1;
				659
				660	/*
				661	* We may have already read "secondary headers"; purge
				662	* them to give ourselves a clean restart.
				663	*/
				664	for (i = 0; header[i]; i++) {
				665	if (mi->s_hdr_data[i])
				666	strbuf_release(mi->s_hdr_data[i]);
				667	mi->s_hdr_data[i] = NULL;
				668	}
				669	return 0;
				670	}
				671
				672	if (patchbreak(line)) {
				673	if (mi->message_id)
				674	strbuf_addf(&mi->log_message,
				675	"Message-Id: %s\n", mi->message_id);
				676	return 1;
				677	}
				678
				679	strbuf_addbuf(&mi->log_message, line);
				680	return 0;
				681	}
				682
				683	static void handle_patch(struct mailinfo mi, const struct strbuf line)
				684	{
				685	fwrite(line->buf, 1, line->len, mi->patchfile);
				686	mi->patch_lines++;
				687	}
				688
				689	static void handle_filter(struct mailinfo mi, struct strbuf line)
				690	{
				691	switch (mi->filter_stage) {
				692	case 0:
				693	if (!handle_commit_msg(mi, line))
				694	break;
				695	mi->filter_stage++;
				696	case 1:
				697	handle_patch(mi, line);
				698	break;
				699	}
				700	}
				701
				702	static int is_rfc2822_header(const struct strbuf *line)
				703	{
				704	/*
				705	* The section that defines the loosest possible
				706	* field name is "3.6.8 Optional fields".
				707	*
				708	* optional-field = field-name ":" unstructured CRLF
				709	* field-name = 1*ftext
				710	* ftext = %d33-57 / %59-126
				711	*/
				712	int ch;
				713	char *cp = line->buf;
				714
				715	/* Count mbox From headers as headers */
				716	if (starts_with(cp, "From ") \|\| starts_with(cp, ">From "))
				717	return 1;
				718
				719	while ((ch = *cp++)) {
				720	if (ch == ':')
				721	return 1;
				722	if ((33 <= ch && ch <= 57) \|\|
				723	(59 <= ch && ch <= 126))
				724	continue;
				725	break;
				726	}
				727	return 0;
				728	}
				729
				730	static int read_one_header_line(struct strbuf line, FILE in)
				731	{
				732	struct strbuf continuation = STRBUF_INIT;
				733
				734	/* Get the first part of the line. */
				735	if (strbuf_getline(line, in, '\n'))
				736	return 0;
				737
				738	/*
				739	* Is it an empty line or not a valid rfc2822 header?
				740	* If so, stop here, and return false ("not a header")
				741	*/
				742	strbuf_rtrim(line);
				743	if (!line->len \|\| !is_rfc2822_header(line)) {
				744	/* Re-add the newline */
				745	strbuf_addch(line, '\n');
				746	return 0;
				747	}
				748
				749	/*
				750	* Now we need to eat all the continuation lines..
				751	* Yuck, 2822 header "folding"
				752	*/
				753	for (;;) {
				754	int peek;
				755
				756	peek = fgetc(in); ungetc(peek, in);
				757	if (peek != ' ' && peek != '\t')
				758	break;
				759	if (strbuf_getline(&continuation, in, '\n'))
				760	break;
				761	continuation.buf[0] = ' ';
				762	strbuf_rtrim(&continuation);
				763	strbuf_addbuf(line, &continuation);
				764	}
				765	strbuf_release(&continuation);
				766
				767	return 1;
				768	}
				769
				770	static int find_boundary(struct mailinfo mi, struct strbuf line)
				771	{
				772	while (!strbuf_getline(line, mi->input, '\n')) {
				773	if (*(mi->content_top) && is_multipart_boundary(mi, line))
				774	return 1;
				775	}
				776	return 0;
				777	}
				778
				779	static int handle_boundary(struct mailinfo mi, struct strbuf line)
				780	{
				781	struct strbuf newline = STRBUF_INIT;
				782
				783	strbuf_addch(&newline, '\n');
				784	again:
				785	if (line->len >= (*(mi->content_top))->len + 2 &&
				786	!memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
				787	/* we hit an end boundary */
				788	/* pop the current boundary off the stack */
				789	strbuf_release(*(mi->content_top));
				790	free(*(mi->content_top));
				791	*(mi->content_top) = NULL;
				792
				793	/* technically won't happen as is_multipart_boundary()
				794	will fail first. But just in case..
				795	*/
				796	if (--mi->content_top < mi->content) {
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	797	error("Detected mismatched boundaries, can't recover");
				798	mi->input_error = -1;
				799	mi->content_top = mi->content;
				800	return 0;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	801	}
				802	handle_filter(mi, &newline);
				803	strbuf_release(&newline);
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	804	if (mi->input_error)
				805	return 0;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	806
				807	/* skip to the next boundary */
				808	if (!find_boundary(mi, line))
				809	return 0;
				810	goto again;
				811	}
				812
				813	/* set some defaults */
				814	mi->transfer_encoding = TE_DONTCARE;
				815	strbuf_reset(&mi->charset);
				816
				817	/* slurp in this section's info */
				818	while (read_one_header_line(line, mi->input))
				819	check_header(mi, line, mi->p_hdr_data, 0);
				820
				821	strbuf_release(&newline);
				822	/* replenish line */
				823	if (strbuf_getline(line, mi->input, '\n'))
				824	return 0;
				825	strbuf_addch(line, '\n');
				826	return 1;
				827	}
				828
				829	static void handle_body(struct mailinfo mi, struct strbuf line)
				830	{
				831	struct strbuf prev = STRBUF_INIT;
				832
				833	/* Skip up to the first boundary */
				834	if (*(mi->content_top)) {
				835	if (!find_boundary(mi, line))
				836	goto handle_body_out;
				837	}
				838
				839	do {
				840	/* process any boundary lines */
				841	if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
				842	/* flush any leftover */
				843	if (prev.len) {
				844	handle_filter(mi, &prev);
				845	strbuf_reset(&prev);
				846	}
				847	if (!handle_boundary(mi, line))
				848	goto handle_body_out;
				849	}
				850
				851	/* Unwrap transfer encoding */
				852	decode_transfer_encoding(mi, line);
				853
				854	switch (mi->transfer_encoding) {
				855	case TE_BASE64:
				856	case TE_QP:
				857	{
				858	struct strbuf lines, it, *sb;
				859
				860	/* Prepend any previous partial lines */
				861	strbuf_insert(line, 0, prev.buf, prev.len);
				862	strbuf_reset(&prev);
				863
				864	/*
				865	* This is a decoded line that may contain
				866	* multiple new lines. Pass only one chunk
				867	* at a time to handle_filter()
				868	*/
				869	lines = strbuf_split(line, '\n');
				870	for (it = lines; (sb = *it); it++) {
				871	if ((it + 1) == NULL) / The last line */
				872	if (sb->buf[sb->len - 1] != '\n') {
				873	/* Partial line, save it for later. */
				874	strbuf_addbuf(&prev, sb);
				875	break;
				876	}
				877	handle_filter(mi, sb);
				878	}
				879	/*
				880	* The partial chunk is saved in "prev" and will be
				881	* appended by the next iteration of read_line_with_nul().
				882	*/
				883	strbuf_list_free(lines);
				884	break;
				885	}
				886	default:
				887	handle_filter(mi, line);
				888	}
				889
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	890	if (mi->input_error)
				891	break;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	892	} while (!strbuf_getwholeline(line, mi->input, '\n'));
				893
				894	handle_body_out:
				895	strbuf_release(&prev);
				896	}
				897
				898	static void output_header_lines(FILE fout, const char hdr, const struct strbuf *data)
				899	{
				900	const char *sp = data->buf;
				901	while (1) {
				902	char *ep = strchr(sp, '\n');
				903	int len;
				904	if (!ep)
				905	len = strlen(sp);
				906	else
				907	len = ep - sp;
				908	fprintf(fout, "%s: %.*s\n", hdr, len, sp);
				909	if (!ep)
				910	break;
				911	sp = ep + 1;
				912	}
				913	}
				914
				915	static void handle_info(struct mailinfo *mi)
				916	{
				917	struct strbuf *hdr;
				918	int i;
				919
				920	for (i = 0; header[i]; i++) {
				921	/* only print inbody headers if we output a patch file */
				922	if (mi->patch_lines && mi->s_hdr_data[i])
				923	hdr = mi->s_hdr_data[i];
				924	else if (mi->p_hdr_data[i])
				925	hdr = mi->p_hdr_data[i];
				926	else
				927	continue;
				928
				929	if (!strcmp(header[i], "Subject")) {
				930	if (!mi->keep_subject) {
				931	cleanup_subject(mi, hdr);
				932	cleanup_space(hdr);
				933	}
				934	output_header_lines(mi->output, "Subject", hdr);
				935	} else if (!strcmp(header[i], "From")) {
				936	cleanup_space(hdr);
				937	handle_from(mi, hdr);
				938	fprintf(mi->output, "Author: %s\n", mi->name.buf);
				939	fprintf(mi->output, "Email: %s\n", mi->email.buf);
				940	} else {
				941	cleanup_space(hdr);
				942	fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
				943	}
				944	}
				945	fprintf(mi->output, "\n");
				946	}
				947
				948	int mailinfo(struct mailinfo mi, const char msg, const char *patch)
				949	{
				950	FILE *cmitmsg;
				951	int peek;
				952	struct strbuf line = STRBUF_INIT;
				953
				954	cmitmsg = fopen(msg, "w");
				955	if (!cmitmsg) {
				956	perror(msg);
				957	return -1;
				958	}
				959	mi->patchfile = fopen(patch, "w");
				960	if (!mi->patchfile) {
				961	perror(patch);
				962	fclose(cmitmsg);
				963	return -1;
				964	}
				965
				966	mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
				967	mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
				968
				969	do {
				970	peek = fgetc(mi->input);
				971	} while (isspace(peek));
				972	ungetc(peek, mi->input);
				973
				974	/* process the email header */
				975	while (read_one_header_line(&line, mi->input))
				976	check_header(mi, &line, mi->p_hdr_data, 1);
				977
				978	handle_body(mi, &line);
				979	fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
				980	fclose(cmitmsg);
				981	fclose(mi->patchfile);
				982
				983	handle_info(mi);
				984	strbuf_release(&line);
Junio C Hamano	6ac617a	2015-10-14 17:45:29 -0700	[diff] [blame]	985	return mi->input_error;
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	986	}
				987
				988	static int git_mailinfo_config(const char var, const char value, void *mi_)
				989	{
				990	struct mailinfo *mi = mi_;
				991
				992	if (!starts_with(var, "mailinfo."))
				993	return git_default_config(var, value, NULL);
				994	if (!strcmp(var, "mailinfo.scissors")) {
				995	mi->use_scissors = git_config_bool(var, value);
				996	return 0;
				997	}
				998	/* perhaps others here */
				999	return 0;
				1000	}
				1001
				1002	void setup_mailinfo(struct mailinfo *mi)
				1003	{
				1004	memset(mi, 0, sizeof(*mi));
				1005	strbuf_init(&mi->name, 0);
				1006	strbuf_init(&mi->email, 0);
				1007	strbuf_init(&mi->charset, 0);
				1008	strbuf_init(&mi->log_message, 0);
				1009	mi->header_stage = 1;
				1010	mi->use_inbody_headers = 1;
				1011	mi->content_top = mi->content;
Nguyễn Thái Ngọc Duy	85d9d9d	2015-11-01 15:30:30 +0100	[diff] [blame]	1012	git_config(git_mailinfo_config, mi);
Junio C Hamano	c6905e4	2015-10-14 17:44:55 -0700	[diff] [blame]	1013	}
				1014
				1015	void clear_mailinfo(struct mailinfo *mi)
				1016	{
				1017	int i;
				1018
				1019	strbuf_release(&mi->name);
				1020	strbuf_release(&mi->email);
				1021	strbuf_release(&mi->charset);
				1022	free(mi->message_id);
				1023
				1024	for (i = 0; mi->p_hdr_data[i]; i++)
				1025	strbuf_release(mi->p_hdr_data[i]);
				1026	free(mi->p_hdr_data);
				1027	for (i = 0; mi->s_hdr_data[i]; i++)
				1028	strbuf_release(mi->s_hdr_data[i]);
				1029	free(mi->s_hdr_data);
				1030
				1031	while (mi->content < mi->content_top) {
				1032	free(*(mi->content_top));
				1033	mi->content_top--;
				1034	}
				1035
				1036	strbuf_release(&mi->log_message);
				1037	}