| #include "test-tool.h" |
| |
| static const char *utf8_replace_character = "�"; |
| |
| /* |
| * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded |
| * in an XML file. |
| */ |
| int cmd__xml_encode(int argc UNUSED, const char **argv UNUSED) |
| { |
| unsigned char buf[1024], tmp[4], *tmp2 = NULL; |
| ssize_t cur = 0, len = 1, remaining = 0; |
| unsigned char ch; |
| |
| for (;;) { |
| if (++cur == len) { |
| len = xread(0, buf, sizeof(buf)); |
| if (!len) |
| return 0; |
| if (len < 0) |
| die_errno("Could not read <stdin>"); |
| cur = 0; |
| } |
| ch = buf[cur]; |
| |
| if (tmp2) { |
| if ((ch & 0xc0) != 0x80) { |
| fputs(utf8_replace_character, stdout); |
| tmp2 = NULL; |
| cur--; |
| continue; |
| } |
| *tmp2 = ch; |
| tmp2++; |
| if (--remaining == 0) { |
| fwrite(tmp, tmp2 - tmp, 1, stdout); |
| tmp2 = NULL; |
| } |
| continue; |
| } |
| |
| if (!(ch & 0x80)) { |
| /* 0xxxxxxx */ |
| if (ch == '&') |
| fputs("&", stdout); |
| else if (ch == '\'') |
| fputs("'", stdout); |
| else if (ch == '"') |
| fputs(""", stdout); |
| else if (ch == '<') |
| fputs("<", stdout); |
| else if (ch == '>') |
| fputs(">", stdout); |
| else if (ch >= 0x20) |
| fputc(ch, stdout); |
| else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) |
| fprintf(stdout, "&#x%02x;", ch); |
| else |
| fputs(utf8_replace_character, stdout); |
| } else if ((ch & 0xe0) == 0xc0) { |
| /* 110XXXXx 10xxxxxx */ |
| tmp[0] = ch; |
| remaining = 1; |
| tmp2 = tmp + 1; |
| } else if ((ch & 0xf0) == 0xe0) { |
| /* 1110XXXX 10Xxxxxx 10xxxxxx */ |
| tmp[0] = ch; |
| remaining = 2; |
| tmp2 = tmp + 1; |
| } else if ((ch & 0xf8) == 0xf0) { |
| /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ |
| tmp[0] = ch; |
| remaining = 3; |
| tmp2 = tmp + 1; |
| } else |
| fputs(utf8_replace_character, stdout); |
| } |
| |
| return 0; |
| } |