pretty: two phase conversion for non utf-8 commits
Always assume format_commit_item() takes an utf-8 string for string
handling simplicity (we can handle utf-8 strings, but can't with other
encodings).
If commit message is in non-utf8, or output encoding is not, then the
commit is first converted to utf-8, processed, then output converted
to output encoding. This of course only works with encodings that are
compatible with Unicode.
This also fixes the iso8859-1 test in t6006. It's supposed to create
an iso8859-1 commit, but the commit content in t6006 is in UTF-8.
t6006 is now converted back in UTF-8 (the downside is we can't put
utf-8 strings there anymore).
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/pretty.c b/pretty.c
index e0f93ba..5947275 100644
--- a/pretty.c
+++ b/pretty.c
@@ -954,7 +954,8 @@
return format_person_part(sb, part, ident, strlen(ident), dmode);
}
-static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
+ const char *placeholder,
void *context)
{
struct format_commit_context *c = context;
@@ -1193,7 +1194,8 @@
return 0; /* unknown placeholder */
}
-static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
+ const char *placeholder,
void *context)
{
int consumed;
@@ -1273,6 +1275,7 @@
{
struct format_commit_context context;
const char *output_enc = pretty_ctx->output_encoding;
+ const char *utf8 = "UTF-8";
memset(&context, 0, sizeof(context));
context.commit = commit;
@@ -1285,6 +1288,23 @@
strbuf_expand(sb, format, format_commit_item, &context);
rewrap_message_tail(sb, &context, 0, 0, 0);
+ if (output_enc) {
+ if (same_encoding(utf8, output_enc))
+ output_enc = NULL;
+ } else {
+ if (context.commit_encoding &&
+ !same_encoding(context.commit_encoding, utf8))
+ output_enc = context.commit_encoding;
+ }
+
+ if (output_enc) {
+ int outsz;
+ char *out = reencode_string_len(sb->buf, sb->len,
+ output_enc, utf8, &outsz);
+ if (out)
+ strbuf_attach(sb, out, outsz, outsz + 1);
+ }
+
free(context.commit_encoding);
logmsg_free(context.message, commit);
free(context.signature_check.gpg_output);