Johannes Schindelin | 9e83266 | 2006-12-22 22:06:08 +0100 | [diff] [blame] | 1 | #ifndef GIT_UTF8_H |
| 2 | #define GIT_UTF8_H |
| 3 | |
Elijah Newren | ef3ca95 | 2018-08-15 10:54:05 -0700 | [diff] [blame] | 4 | struct strbuf; |
| 5 | |
Junio C Hamano | 396ccf1 | 2008-01-06 19:02:22 -0800 | [diff] [blame] | 6 | typedef unsigned int ucs_char_t; /* assuming 32bit int */ |
| 7 | |
Nguyễn Thái Ngọc Duy | 1640632 | 2013-04-19 09:08:52 +1000 | [diff] [blame] | 8 | size_t display_mode_esc_sequence_len(const char *s); |
Junio C Hamano | 44b25b8 | 2008-01-02 01:49:58 -0800 | [diff] [blame] | 9 | int utf8_width(const char **start, size_t *remainder_p); |
Patrick Steinhardt | 522cc87 | 2022-12-01 15:46:53 +0100 | [diff] [blame] | 10 | int utf8_strnwidth(const char *string, size_t len, int skip_ansi); |
Geoffrey Thomas | 8a9391e | 2009-01-30 04:41:28 -0500 | [diff] [blame] | 11 | int utf8_strwidth(const char *string); |
Johannes Schindelin | 9e83266 | 2006-12-22 22:06:08 +0100 | [diff] [blame] | 12 | int is_utf8(const char *text); |
Junio C Hamano | 677cfed | 2006-12-30 12:20:43 -0800 | [diff] [blame] | 13 | int is_encoding_utf8(const char *name); |
Junio C Hamano | 0e18bcd | 2012-10-18 22:41:56 -0700 | [diff] [blame] | 14 | int same_encoding(const char *, const char *); |
Jeff King | 4621085 | 2013-07-09 20:18:40 -0400 | [diff] [blame] | 15 | __attribute__((format (printf, 2, 3))) |
Jiang Xin | c082196 | 2013-02-09 14:31:09 +0800 | [diff] [blame] | 16 | int utf8_fprintf(FILE *, const char *, ...); |
Junio C Hamano | 677cfed | 2006-12-30 12:20:43 -0800 | [diff] [blame] | 17 | |
Junio C Hamano | dde843e | 2015-04-16 10:45:29 -0700 | [diff] [blame] | 18 | extern const char utf8_bom[]; |
Denton Liu | 5545442 | 2019-04-29 04:28:14 -0400 | [diff] [blame] | 19 | int skip_utf8_bom(char **, size_t); |
Junio C Hamano | dde843e | 2015-04-16 10:45:29 -0700 | [diff] [blame] | 20 | |
Steffen Prohaska | e0db176 | 2012-12-11 06:59:22 +0100 | [diff] [blame] | 21 | void strbuf_add_wrapped_text(struct strbuf *buf, |
Johannes Schindelin | a94410c | 2008-11-10 18:47:00 +0100 | [diff] [blame] | 22 | const char *text, int indent, int indent2, int width); |
Steffen Prohaska | e0db176 | 2012-12-11 06:59:22 +0100 | [diff] [blame] | 23 | void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, |
Jeff King | 98acc83 | 2011-02-23 04:50:19 -0500 | [diff] [blame] | 24 | int indent, int indent2, int width); |
Nguyễn Thái Ngọc Duy | a7f01c6 | 2013-04-19 09:08:51 +1000 | [diff] [blame] | 25 | void strbuf_utf8_replace(struct strbuf *sb, int pos, int width, |
| 26 | const char *subst); |
Johannes Schindelin | 9e83266 | 2006-12-22 22:06:08 +0100 | [diff] [blame] | 27 | |
Junio C Hamano | b45974a | 2006-12-23 23:36:55 -0800 | [diff] [blame] | 28 | #ifndef NO_ICONV |
Nguyễn Thái Ngọc Duy | b782bba | 2013-04-19 09:08:46 +1000 | [diff] [blame] | 29 | char *reencode_string_iconv(const char *in, size_t insz, |
Torsten Bögershausen | aab2a1a | 2019-01-30 16:01:52 +0100 | [diff] [blame] | 30 | iconv_t conv, size_t bom_len, size_t *outsz); |
Jeff King | c7d017d | 2018-07-24 06:50:33 -0400 | [diff] [blame] | 31 | char *reencode_string_len(const char *in, size_t insz, |
Nguyễn Thái Ngọc Duy | b782bba | 2013-04-19 09:08:46 +1000 | [diff] [blame] | 32 | const char *out_encoding, |
| 33 | const char *in_encoding, |
Jeff King | c7d017d | 2018-07-24 06:50:33 -0400 | [diff] [blame] | 34 | size_t *outsz); |
Junio C Hamano | b45974a | 2006-12-23 23:36:55 -0800 | [diff] [blame] | 35 | #else |
Jeff King | c7d017d | 2018-07-24 06:50:33 -0400 | [diff] [blame] | 36 | static inline char *reencode_string_len(const char *a, size_t b, |
| 37 | const char *c, const char *d, size_t *e) |
Eric Sunshine | e654eb2 | 2015-06-05 02:42:16 -0400 | [diff] [blame] | 38 | { if (e) *e = 0; return NULL; } |
Junio C Hamano | b45974a | 2006-12-23 23:36:55 -0800 | [diff] [blame] | 39 | #endif |
| 40 | |
Nguyễn Thái Ngọc Duy | b782bba | 2013-04-19 09:08:46 +1000 | [diff] [blame] | 41 | static inline char *reencode_string(const char *in, |
| 42 | const char *out_encoding, |
| 43 | const char *in_encoding) |
| 44 | { |
| 45 | return reencode_string_len(in, strlen(in), |
| 46 | out_encoding, in_encoding, |
| 47 | NULL); |
| 48 | } |
| 49 | |
Kirill Smelkov | 6cd3c05 | 2013-03-07 14:55:07 +0400 | [diff] [blame] | 50 | int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding); |
| 51 | |
Jeff King | 6162a1d | 2014-12-15 17:56:59 -0500 | [diff] [blame] | 52 | /* |
Li Peng | 832c0e5 | 2016-05-06 20:36:46 +0800 | [diff] [blame] | 53 | * Returns true if the path would match ".git" after HFS case-folding. |
Jeff King | 6162a1d | 2014-12-15 17:56:59 -0500 | [diff] [blame] | 54 | * The path should be NUL-terminated, but we will match variants of both ".git\0" |
| 55 | * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck |
| 56 | * and verify_path(). |
Jeff King | 0fc333b | 2018-05-02 15:23:45 -0400 | [diff] [blame] | 57 | * |
| 58 | * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo". |
Jeff King | 6162a1d | 2014-12-15 17:56:59 -0500 | [diff] [blame] | 59 | */ |
| 60 | int is_hfs_dotgit(const char *path); |
Jeff King | 0fc333b | 2018-05-02 15:23:45 -0400 | [diff] [blame] | 61 | int is_hfs_dotgitmodules(const char *path); |
| 62 | int is_hfs_dotgitignore(const char *path); |
| 63 | int is_hfs_dotgitattributes(const char *path); |
Jeff King | 801ed01 | 2021-05-03 16:43:22 -0400 | [diff] [blame] | 64 | int is_hfs_dotmailmap(const char *path); |
Jeff King | 6162a1d | 2014-12-15 17:56:59 -0500 | [diff] [blame] | 65 | |
Karthik Nayak | 110dcda | 2015-09-10 21:18:19 +0530 | [diff] [blame] | 66 | typedef enum { |
| 67 | ALIGN_LEFT, |
| 68 | ALIGN_MIDDLE, |
| 69 | ALIGN_RIGHT |
| 70 | } align_type; |
| 71 | |
| 72 | /* |
| 73 | * Align the string given and store it into a strbuf as per the |
| 74 | * 'position' and 'width'. If the given string length is larger than |
| 75 | * 'width' than then the input string is not truncated and no |
| 76 | * alignment is done. |
| 77 | */ |
| 78 | void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, |
| 79 | const char *s); |
| 80 | |
Lars Schneider | 10ecb82 | 2018-04-15 20:16:05 +0200 | [diff] [blame] | 81 | /* |
| 82 | * If a data stream is declared as UTF-16BE or UTF-16LE, then a UTF-16 |
| 83 | * BOM must not be used [1]. The same applies for the UTF-32 equivalents. |
| 84 | * The function returns true if this rule is violated. |
| 85 | * |
Josh Soref | d05b08c | 2023-11-24 03:35:13 +0000 | [diff] [blame] | 86 | * [1] https://unicode.org/faq/utf_bom.html#bom10 |
Lars Schneider | 10ecb82 | 2018-04-15 20:16:05 +0200 | [diff] [blame] | 87 | */ |
| 88 | int has_prohibited_utf_bom(const char *enc, const char *data, size_t len); |
| 89 | |
Lars Schneider | c6e4865 | 2018-04-15 20:16:06 +0200 | [diff] [blame] | 90 | /* |
| 91 | * If the endianness is not defined in the encoding name, then we |
| 92 | * require a BOM. The function returns true if a required BOM is missing. |
| 93 | * |
| 94 | * The Unicode standard instructs to assume big-endian if there in no |
| 95 | * BOM for UTF-16/32 [1][2]. However, the W3C/WHATWG encoding standard |
| 96 | * used in HTML5 recommends to assume little-endian to "deal with |
| 97 | * deployed content" [3]. |
| 98 | * |
| 99 | * Therefore, strictly requiring a BOM seems to be the safest option for |
| 100 | * content in Git. |
| 101 | * |
Josh Soref | d05b08c | 2023-11-24 03:35:13 +0000 | [diff] [blame] | 102 | * [1] https://unicode.org/faq/utf_bom.html#gen6 |
| 103 | * [2] https://www.unicode.org/versions/Unicode10.0.0/ch03.pdf |
Lars Schneider | c6e4865 | 2018-04-15 20:16:06 +0200 | [diff] [blame] | 104 | * Section 3.10, D98, page 132 |
| 105 | * [3] https://encoding.spec.whatwg.org/#utf-16le |
| 106 | */ |
| 107 | int is_missing_required_utf_bom(const char *enc, const char *data, size_t len); |
| 108 | |
Johannes Schindelin | 9e83266 | 2006-12-22 22:06:08 +0100 | [diff] [blame] | 109 | #endif |