Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

charset.h

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * gwlib/charset.h - character set conversions
00059  *
00060  * This header defines some utility functions for converting between
00061  * character sets.  Approximations are made when necessary, so avoid
00062  * needless conversions.
00063  *
00064  * Currently only GSM and Latin-1 are supported with Kannel specific
00065  * functions. This module contains also wrappers for libxml2 character
00066  * set conversion functions that work either from or to UTF-8. More
00067  * about libxml2's character set support on the header file
00068  * <libxml/encoding.h> or the implementation file encoding.c. Short
00069  * version: it has a few basic character set supports built in; for
00070  * the rest iconv is used.
00071  *
00072  * Richard Braakman
00073  * Tuomas Luttinen
00074  */
00075 
00076 #ifndef CHARSET_H
00077 #define CHARSET_H
00078 
00079 #include <libxml/encoding.h>
00080 #include <libxml/tree.h>
00081 
00082 /*
00083  * Initialize the charset subsystem.
00084  */
00085 void charset_init(void);
00086 
00087 /*
00088  * Shutdown the charset subsystem.
00089  */
00090 void charset_shutdown(void);
00091 
00097 void charset_gsm_to_utf8(Octstr *ostr);
00098 
00106 void charset_utf8_to_gsm(Octstr *ostr);
00107 
00108 /*
00109  * Convert from GSM default character set to NRC ISO 21 (German)
00110  * and vise versa.
00111  */
00112 void charset_gsm_to_nrc_iso_21_german(Octstr *ostr);
00113 void charset_nrc_iso_21_german_to_gsm(Octstr *ostr);
00114 
00115 /* Trunctate a string of GSM characters to a maximum length.
00116  * Make sure the last remaining character is a whole character,
00117  * and not half of an escape sequence.
00118  * Return 1 if any characters were removed, otherwise 0.
00119  */
00120 int charset_gsm_truncate(Octstr *gsm, long max);
00121 
00122 /* Convert a string in the GSM default character set (GSM 03.38)
00123  * to ISO-8859-1.  A series of Greek characters (codes 16, 18-26)
00124  * are not representable and are converted to '?' characters.
00125  * GSM default is a 7-bit alphabet.  Characters with the 8th bit
00126  * set are left unchanged. */
00127 void charset_gsm_to_latin1(Octstr *gsm);
00128 
00129 /* Convert a string in the ISO-8859-1 character set to the GSM 
00130  * default character set (GSM 03.38).  A large number of characters
00131  * are not representable.  Approximations are made in some cases
00132  * (accented characters to their unaccented versions, for example),
00133  * and the rest are converted to '?' characters. */
00134 void charset_latin1_to_gsm(Octstr *latin1);
00135 
00136 /* Convert a string from  character set specified by charset_from into
00137  * UTF-8 character set. The result is stored in the octet string *to that 
00138  * is allocated by the function. The function returns the number of bytes 
00139  * written for success, -1 for general error, -2 for an transcoding error 
00140  * (the input string wasn't valid string in the character set it was said 
00141  * to be or there was no converter found for the character set).
00142  */
00143 int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from);
00144 
00145 /* Convert a string from UTF-8 character set into another character set 
00146  * specified by charset_from. The result is stored in the octet string *to
00147  * that is allocated by the function. The function returns the number of 
00148  * bytes written for success, -1 for general error, -2 for an transcoding 
00149  * error (the input string wasn't valid string in the character set it 
00150  * was said to be or there was no converter found for the character set).
00151  */
00152 int charset_from_utf8(Octstr *utf8, Octstr **to, Octstr *charset_to);
00153 
00154 /* use iconv library to convert an Octstr in place, from source character set to
00155  * destination character set
00156  */
00157 int charset_convert(Octstr* string, char* charset_from, char* charset_to);
00158 
00159 #endif
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.