00001 /* ==================================================================== 00002 * The Kannel Software License, Version 1.0 00003 * 00004 * Copyright (c) 2001-2008 Kannel Group 00005 * Copyright (c) 1998-2001 WapIT Ltd. 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in 00017 * the documentation and/or other materials provided with the 00018 * distribution. 00019 * 00020 * 3. The end-user documentation included with the redistribution, 00021 * if any, must include the following acknowledgment: 00022 * "This product includes software developed by the 00023 * Kannel Group (http://www.kannel.org/)." 00024 * Alternately, this acknowledgment may appear in the software itself, 00025 * if and wherever such third-party acknowledgments normally appear. 00026 * 00027 * 4. The names "Kannel" and "Kannel Group" must not be used to 00028 * endorse or promote products derived from this software without 00029 * prior written permission. For written permission, please 00030 * contact org@kannel.org. 00031 * 00032 * 5. Products derived from this software may not be called "Kannel", 00033 * nor may "Kannel" appear in their name, without prior written 00034 * permission of the Kannel Group. 00035 * 00036 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 00037 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00038 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00039 * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 00040 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 00041 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 00042 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 00043 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00044 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 00045 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 00046 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00047 * ==================================================================== 00048 * 00049 * This software consists of voluntary contributions made by many 00050 * individuals on behalf of the Kannel Group. For more information on 00051 * the Kannel Group, please see <http://www.kannel.org/>. 00052 * 00053 * Portions of this software are based upon software originally written at 00054 * WapIT Ltd., Helsinki, Finland for the Kannel project. 00055 */ 00056 00057 /* 00058 * gwlib/charset.h - character set conversions 00059 * 00060 * This header defines some utility functions for converting between 00061 * character sets. Approximations are made when necessary, so avoid 00062 * needless conversions. 00063 * 00064 * Currently only GSM and Latin-1 are supported with Kannel specific 00065 * functions. This module contains also wrappers for libxml2 character 00066 * set conversion functions that work either from or to UTF-8. More 00067 * about libxml2's character set support on the header file 00068 * <libxml/encoding.h> or the implementation file encoding.c. Short 00069 * version: it has a few basic character set supports built in; for 00070 * the rest iconv is used. 00071 * 00072 * Richard Braakman 00073 * Tuomas Luttinen 00074 */ 00075 00076 #ifndef CHARSET_H 00077 #define CHARSET_H 00078 00079 #include <libxml/encoding.h> 00080 #include <libxml/tree.h> 00081 00082 /* 00083 * Initialize the charset subsystem. 00084 */ 00085 void charset_init(void); 00086 00087 /* 00088 * Shutdown the charset subsystem. 00089 */ 00090 void charset_shutdown(void); 00091 00097 void charset_gsm_to_utf8(Octstr *ostr); 00098 00106 void charset_utf8_to_gsm(Octstr *ostr); 00107 00108 /* 00109 * Convert from GSM default character set to NRC ISO 21 (German) 00110 * and vise versa. 00111 */ 00112 void charset_gsm_to_nrc_iso_21_german(Octstr *ostr); 00113 void charset_nrc_iso_21_german_to_gsm(Octstr *ostr); 00114 00115 /* Trunctate a string of GSM characters to a maximum length. 00116 * Make sure the last remaining character is a whole character, 00117 * and not half of an escape sequence. 00118 * Return 1 if any characters were removed, otherwise 0. 00119 */ 00120 int charset_gsm_truncate(Octstr *gsm, long max); 00121 00122 /* Convert a string in the GSM default character set (GSM 03.38) 00123 * to ISO-8859-1. A series of Greek characters (codes 16, 18-26) 00124 * are not representable and are converted to '?' characters. 00125 * GSM default is a 7-bit alphabet. Characters with the 8th bit 00126 * set are left unchanged. */ 00127 void charset_gsm_to_latin1(Octstr *gsm); 00128 00129 /* Convert a string in the ISO-8859-1 character set to the GSM 00130 * default character set (GSM 03.38). A large number of characters 00131 * are not representable. Approximations are made in some cases 00132 * (accented characters to their unaccented versions, for example), 00133 * and the rest are converted to '?' characters. */ 00134 void charset_latin1_to_gsm(Octstr *latin1); 00135 00136 /* Convert a string from character set specified by charset_from into 00137 * UTF-8 character set. The result is stored in the octet string *to that 00138 * is allocated by the function. The function returns the number of bytes 00139 * written for success, -1 for general error, -2 for an transcoding error 00140 * (the input string wasn't valid string in the character set it was said 00141 * to be or there was no converter found for the character set). 00142 */ 00143 int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from); 00144 00145 /* Convert a string from UTF-8 character set into another character set 00146 * specified by charset_from. The result is stored in the octet string *to 00147 * that is allocated by the function. The function returns the number of 00148 * bytes written for success, -1 for general error, -2 for an transcoding 00149 * error (the input string wasn't valid string in the character set it 00150 * was said to be or there was no converter found for the character set). 00151 */ 00152 int charset_from_utf8(Octstr *utf8, Octstr **to, Octstr *charset_to); 00153 00154 /* use iconv library to convert an Octstr in place, from source character set to 00155 * destination character set 00156 */ 00157 int charset_convert(Octstr* string, char* charset_from, char* charset_to); 00158 00159 #endif