Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

wsutf8.h

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  *
00059  * wsutf8.h
00060  *
00061  * Author: Markku Rossi <mtr@iki.fi>
00062  *
00063  * Copyright (c) 1999-2000 WAPIT OY LTD.
00064  *       All rights reserved.
00065  *
00066  * Functions to manipulate UTF-8 encoded strings.
00067  *
00068  * Specification: RFC-2279
00069  *
00070  */
00071 
00072 #ifndef WSUTF8_H
00073 #define WSUTF8_H
00074 
00075 /********************* Types and defintions *****************************/
00076 
00077 /* UTF-8 string handle. */
00078 struct WsUtf8StringRec
00079 {
00080     /* The length of the UTF-8 encoded `data'. */
00081     size_t len;
00082 
00083     /* The UTF-8 encoded data. */
00084     unsigned char *data;
00085 
00086     /* The number of characters in the string. */
00087     size_t num_chars;
00088 };
00089 
00090 typedef struct WsUtf8StringRec WsUtf8String;
00091 
00092 /********************* Global functions *********************************/
00093 
00094 /* Allocate an empty UTF-8 string.  The function returns NULL if the
00095    allocation failed (out of memory). */
00096 WsUtf8String *ws_utf8_alloc(void);
00097 
00098 /* Free an UTF-8 encoded string. */
00099 void ws_utf8_free(WsUtf8String *string);
00100 
00101 /* Append the character `ch' to the string `string'.  The function
00102    returns 1 if the operation was successful or 0 otherwise (out of
00103    memory). */
00104 int ws_utf8_append_char(WsUtf8String *string, unsigned long ch);
00105 
00106 /* Verify the UTF-8 encoded string `data' containing `len' bytes of
00107    data.  The function returns 1 if the `data' is correctly encoded
00108    and 0 otherwise.  If the argument `strlen_return' is not NULL, it
00109    is set to the number of characters in the string. */
00110 int ws_utf8_verify(const unsigned char *data, size_t len,
00111                    size_t *strlen_return);
00112 
00113 /* Set UTF-8 encoded data `data', `len' to the string `string'.  The
00114    function returns 1 if the data was UTF-8 encoded and 0 otherwise
00115    (malformed data or out of memory).  The function frees the possible
00116    old data from `string'. */
00117 int ws_utf8_set_data(WsUtf8String *string, const unsigned char *data,
00118                      size_t len);
00119 
00120 /* Get a character from the UTF-8 string `string'.  The argument
00121    `posp' gives the index of the character in the UTF-8 encoded data.
00122    It is not the sequence number of the character.  It is its starting
00123    position within the UTF-8 encoded data.  The argument `posp' is
00124    updated to point to the beginning of the next character within the
00125    data.  The character is returned in `ch_return'.  The function
00126    returns 1 if the operation was successful or 0 otherwise (index
00127    `posp' was invalid or there were no more characters in the
00128    string). */
00129 int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return,
00130                      size_t *posp);
00131 
00132 /* Convert the UTF-8 encoded string `string' to null-terminated ISO
00133    8859/1 (ISO latin1) string.  Those characters of `string' which can
00134    not be presented in latin1 are replaced with the character
00135    `unknown_char'.  If the argument `len_return' is not NULL, it is
00136    set to contain the length of the returned string (excluding the
00137    trailing null-character).  The function returns a pointer to the
00138    string or NULL if the operation failed (out of memory).  The
00139    returned string must be freed with the ws_utf8_free_data()
00140    function. */
00141 unsigned char *ws_utf8_to_latin1(const WsUtf8String *string,
00142                                  unsigned char unknown_char,
00143                                  size_t *len_return);
00144 
00145 /* Free a string, returned by the ws_utf8_to_latin1_cstr()
00146    function. */
00147 void ws_utf8_free_data(unsigned char *data);
00148 
00149 #endif /* not WSUTF8_H */
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.