77 #define WS_UTF8_ENC_1_M 0xffffff80 78 #define WS_UTF8_ENC_2_M 0xfffff800 79 #define WS_UTF8_ENC_3_M 0xffff0000 80 #define WS_UTF8_ENC_4_M 0xffe00000 81 #define WS_UTF8_ENC_5_M 0xfc000000 82 #define WS_UTF8_ENC_6_M 0x80000000 99 #define WS_UTF8_ENC_C_BITS 0x80 102 #define WS_UTF8_CONT_DATA_MASK 0x3f 108 #define WS_UTF8_ENC_TYPE(ch) \ 109 (((ch) & WS_UTF8_ENC_1_M) == 0 \ 111 : (((ch) & WS_UTF8_ENC_2_M) == 0 \ 113 : (((ch) & WS_UTF8_ENC_3_M) == 0 \ 115 : (((ch) & WS_UTF8_ENC_4_M) == 0 \ 117 : (((ch) & WS_UTF8_ENC_5_M) == 0 \ 119 : (((ch) & WS_UTF8_ENC_6_M) == 0 \ 125 #define WS_UTF8_DEC_1_M 0x80 126 #define WS_UTF8_DEC_2_M 0xe0 127 #define WS_UTF8_DEC_3_M 0xf0 128 #define WS_UTF8_DEC_4_M 0xf8 129 #define WS_UTF8_DEC_5_M 0xfc 130 #define WS_UTF8_DEC_6_M 0xfe 132 #define WS_UTF8_DEC_1_V 0x00 133 #define WS_UTF8_DEC_2_V 0xc0 134 #define WS_UTF8_DEC_3_V 0xe0 135 #define WS_UTF8_DEC_4_V 0xf0 136 #define WS_UTF8_DEC_5_V 0xf8 137 #define WS_UTF8_DEC_6_V 0xfc 154 #define WS_UTF8_DEC_C_M 0xc0 155 #define WS_UTF8_DEC_C_V 0x80 161 #define WS_UTF8_DEC_TYPE(b) \ 162 (((b) & WS_UTF8_DEC_1_M) == WS_UTF8_DEC_1_V \ 164 : (((b) & WS_UTF8_DEC_2_M) == WS_UTF8_DEC_2_V \ 166 : (((b) & WS_UTF8_DEC_3_M) == WS_UTF8_DEC_3_V \ 168 : (((b) & WS_UTF8_DEC_4_M) == WS_UTF8_DEC_4_V \ 170 : (((b) & WS_UTF8_DEC_5_M) == WS_UTF8_DEC_5_V \ 172 : (((b) & WS_UTF8_DEC_6_M) == WS_UTF8_DEC_6_V \ 178 #define WS_UTF8_DEC_C_P(b) (((b) & WS_UTF8_DEC_C_M) == WS_UTF8_DEC_C_V) 205 ws_fatal(
"ws_utf8_append_char(): 0x%lx is not a valid UTF-8 character",
215 for (i = num_bytes - 1; i > 0; i--) {
226 string->len += num_bytes;
234 size_t *strlen_return)
236 unsigned int num_bytes, i;
249 for (i = 1; i < num_bytes; i++)
260 *strlen_return = strlen;
279 string->num_chars = 0;
283 if (string->
data == NULL)
287 string->num_chars = num_chars;
297 unsigned int num_bytes, i;
301 if (pos < 0 || pos >= string->
len)
305 data =
string->data + pos;
312 if (pos + num_bytes > string->
len)
320 for (i = 1; i < num_bytes; i++) {
326 *posp = pos + num_bytes;
333 unsigned char unknown_char,
347 for (i = 0; i <
string->num_chars; i++) {
351 ws_fatal(
"ws_utf8_to_latin1_cstr(): internal inconsistency");
354 cstr[i] = unknown_char;
356 cstr[i] = (
unsigned char) ch;
362 *len_return =
string->num_chars;
void ws_fatal(char *fmt,...)
void * ws_calloc(size_t num, size_t size)
static unsigned char utf8_hibits[7]
void * ws_realloc(void *ptr, size_t size)
#define WS_UTF8_DEC_TYPE(b)
void ws_utf8_free(WsUtf8String *string)
static unsigned char utf8_hidata_masks[7]
#define WS_UTF8_ENC_C_BITS
#define WS_UTF8_CONT_DATA_MASK
int ws_utf8_append_char(WsUtf8String *string, unsigned long ch)
WsUtf8String * ws_utf8_alloc()
int ws_utf8_set_data(WsUtf8String *string, const unsigned char *data, size_t len)
void ws_utf8_free_data(unsigned char *data)
void * ws_memdup(const void *ptr, size_t size)
#define WS_UTF8_DEC_C_P(b)
int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return, size_t *posp)
int ws_utf8_verify(const unsigned char *data, size_t len, size_t *strlen_return)
#define WS_UTF8_ENC_TYPE(ch)
void * ws_malloc(size_t size)
unsigned char * ws_utf8_to_latin1(const WsUtf8String *string, unsigned char unknown_char, size_t *len_return)