Johannes Schindelin | 2223190 | 2019-01-29 06:19:27 -0800 | [diff] [blame] | 1 | #include "test-tool.h" |
| 2 | |
| 3 | static const char *utf8_replace_character = "�"; |
| 4 | |
| 5 | /* |
| 6 | * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded |
| 7 | * in an XML file. |
| 8 | */ |
| 9 | int cmd__xml_encode(int argc, const char **argv) |
| 10 | { |
| 11 | unsigned char buf[1024], tmp[4], *tmp2 = NULL; |
| 12 | ssize_t cur = 0, len = 1, remaining = 0; |
| 13 | unsigned char ch; |
| 14 | |
| 15 | for (;;) { |
| 16 | if (++cur == len) { |
| 17 | len = xread(0, buf, sizeof(buf)); |
| 18 | if (!len) |
| 19 | return 0; |
| 20 | if (len < 0) |
| 21 | die_errno("Could not read <stdin>"); |
| 22 | cur = 0; |
| 23 | } |
| 24 | ch = buf[cur]; |
| 25 | |
| 26 | if (tmp2) { |
| 27 | if ((ch & 0xc0) != 0x80) { |
| 28 | fputs(utf8_replace_character, stdout); |
| 29 | tmp2 = NULL; |
| 30 | cur--; |
| 31 | continue; |
| 32 | } |
| 33 | *tmp2 = ch; |
| 34 | tmp2++; |
| 35 | if (--remaining == 0) { |
| 36 | fwrite(tmp, tmp2 - tmp, 1, stdout); |
| 37 | tmp2 = NULL; |
| 38 | } |
| 39 | continue; |
| 40 | } |
| 41 | |
| 42 | if (!(ch & 0x80)) { |
| 43 | /* 0xxxxxxx */ |
| 44 | if (ch == '&') |
| 45 | fputs("&", stdout); |
| 46 | else if (ch == '\'') |
| 47 | fputs("'", stdout); |
| 48 | else if (ch == '"') |
| 49 | fputs(""", stdout); |
| 50 | else if (ch == '<') |
| 51 | fputs("<", stdout); |
| 52 | else if (ch == '>') |
| 53 | fputs(">", stdout); |
| 54 | else if (ch >= 0x20) |
| 55 | fputc(ch, stdout); |
| 56 | else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) |
| 57 | fprintf(stdout, "&#x%02x;", ch); |
| 58 | else |
| 59 | fputs(utf8_replace_character, stdout); |
| 60 | } else if ((ch & 0xe0) == 0xc0) { |
| 61 | /* 110XXXXx 10xxxxxx */ |
| 62 | tmp[0] = ch; |
| 63 | remaining = 1; |
| 64 | tmp2 = tmp + 1; |
| 65 | } else if ((ch & 0xf0) == 0xe0) { |
| 66 | /* 1110XXXX 10Xxxxxx 10xxxxxx */ |
| 67 | tmp[0] = ch; |
| 68 | remaining = 2; |
| 69 | tmp2 = tmp + 1; |
| 70 | } else if ((ch & 0xf8) == 0xf0) { |
| 71 | /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ |
| 72 | tmp[0] = ch; |
| 73 | remaining = 3; |
| 74 | tmp2 = tmp + 1; |
| 75 | } else |
| 76 | fputs(utf8_replace_character, stdout); |
| 77 | } |
| 78 | |
| 79 | return 0; |
| 80 | } |