00001 /* ==================================================================== 00002 * The Kannel Software License, Version 1.0 00003 * 00004 * Copyright (c) 2001-2008 Kannel Group 00005 * Copyright (c) 1998-2001 WapIT Ltd. 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in 00017 * the documentation and/or other materials provided with the 00018 * distribution. 00019 * 00020 * 3. The end-user documentation included with the redistribution, 00021 * if any, must include the following acknowledgment: 00022 * "This product includes software developed by the 00023 * Kannel Group (http://www.kannel.org/)." 00024 * Alternately, this acknowledgment may appear in the software itself, 00025 * if and wherever such third-party acknowledgments normally appear. 00026 * 00027 * 4. The names "Kannel" and "Kannel Group" must not be used to 00028 * endorse or promote products derived from this software without 00029 * prior written permission. For written permission, please 00030 * contact org@kannel.org. 00031 * 00032 * 5. Products derived from this software may not be called "Kannel", 00033 * nor may "Kannel" appear in their name, without prior written 00034 * permission of the Kannel Group. 00035 * 00036 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 00037 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00038 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00039 * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 00040 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 00041 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 00042 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 00043 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00044 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 00045 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 00046 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00047 * ==================================================================== 00048 * 00049 * This software consists of voluntary contributions made by many 00050 * individuals on behalf of the Kannel Group. For more information on 00051 * the Kannel Group, please see <http://www.kannel.org/>. 00052 * 00053 * Portions of this software are based upon software originally written at 00054 * WapIT Ltd., Helsinki, Finland for the Kannel project. 00055 */ 00056 00057 /* 00058 * 00059 * wsutf8.h 00060 * 00061 * Author: Markku Rossi <mtr@iki.fi> 00062 * 00063 * Copyright (c) 1999-2000 WAPIT OY LTD. 00064 * All rights reserved. 00065 * 00066 * Functions to manipulate UTF-8 encoded strings. 00067 * 00068 * Specification: RFC-2279 00069 * 00070 */ 00071 00072 #ifndef WSUTF8_H 00073 #define WSUTF8_H 00074 00075 /********************* Types and defintions *****************************/ 00076 00077 /* UTF-8 string handle. */ 00078 struct WsUtf8StringRec 00079 { 00080 /* The length of the UTF-8 encoded `data'. */ 00081 size_t len; 00082 00083 /* The UTF-8 encoded data. */ 00084 unsigned char *data; 00085 00086 /* The number of characters in the string. */ 00087 size_t num_chars; 00088 }; 00089 00090 typedef struct WsUtf8StringRec WsUtf8String; 00091 00092 /********************* Global functions *********************************/ 00093 00094 /* Allocate an empty UTF-8 string. The function returns NULL if the 00095 allocation failed (out of memory). */ 00096 WsUtf8String *ws_utf8_alloc(void); 00097 00098 /* Free an UTF-8 encoded string. */ 00099 void ws_utf8_free(WsUtf8String *string); 00100 00101 /* Append the character `ch' to the string `string'. The function 00102 returns 1 if the operation was successful or 0 otherwise (out of 00103 memory). */ 00104 int ws_utf8_append_char(WsUtf8String *string, unsigned long ch); 00105 00106 /* Verify the UTF-8 encoded string `data' containing `len' bytes of 00107 data. The function returns 1 if the `data' is correctly encoded 00108 and 0 otherwise. If the argument `strlen_return' is not NULL, it 00109 is set to the number of characters in the string. */ 00110 int ws_utf8_verify(const unsigned char *data, size_t len, 00111 size_t *strlen_return); 00112 00113 /* Set UTF-8 encoded data `data', `len' to the string `string'. The 00114 function returns 1 if the data was UTF-8 encoded and 0 otherwise 00115 (malformed data or out of memory). The function frees the possible 00116 old data from `string'. */ 00117 int ws_utf8_set_data(WsUtf8String *string, const unsigned char *data, 00118 size_t len); 00119 00120 /* Get a character from the UTF-8 string `string'. The argument 00121 `posp' gives the index of the character in the UTF-8 encoded data. 00122 It is not the sequence number of the character. It is its starting 00123 position within the UTF-8 encoded data. The argument `posp' is 00124 updated to point to the beginning of the next character within the 00125 data. The character is returned in `ch_return'. The function 00126 returns 1 if the operation was successful or 0 otherwise (index 00127 `posp' was invalid or there were no more characters in the 00128 string). */ 00129 int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return, 00130 size_t *posp); 00131 00132 /* Convert the UTF-8 encoded string `string' to null-terminated ISO 00133 8859/1 (ISO latin1) string. Those characters of `string' which can 00134 not be presented in latin1 are replaced with the character 00135 `unknown_char'. If the argument `len_return' is not NULL, it is 00136 set to contain the length of the returned string (excluding the 00137 trailing null-character). The function returns a pointer to the 00138 string or NULL if the operation failed (out of memory). The 00139 returned string must be freed with the ws_utf8_free_data() 00140 function. */ 00141 unsigned char *ws_utf8_to_latin1(const WsUtf8String *string, 00142 unsigned char unknown_char, 00143 size_t *len_return); 00144 00145 /* Free a string, returned by the ws_utf8_to_latin1_cstr() 00146 function. */ 00147 void ws_utf8_free_data(unsigned char *data); 00148 00149 #endif /* not WSUTF8_H */