Kannel: Open Source WAP and SMS gateway  $Revision: 5037 $
charset.h
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2016 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * gwlib/charset.h - character set conversions
59  *
60  * This header defines some utility functions for converting between
61  * character sets. Approximations are made when necessary, so avoid
62  * needless conversions.
63  *
64  * Currently only GSM and Latin-1 are supported with Kannel specific
65  * functions. This module contains also wrappers for libxml2 character
66  * set conversion functions that work either from or to UTF-8. More
67  * about libxml2's character set support on the header file
68  * <libxml/encoding.h> or the implementation file encoding.c. Short
69  * version: it has a few basic character set supports built in; for
70  * the rest iconv is used.
71  *
72  * Richard Braakman
73  * Tuomas Luttinen
74  */
75 
76 #ifndef CHARSET_H
77 #define CHARSET_H
78 
79 #include <libxml/encoding.h>
80 #include <libxml/tree.h>
81 
82 /*
83  * Initialize the charset subsystem.
84  */
85 void charset_init(void);
86 
87 /*
88  * Shutdown the charset subsystem.
89  */
90 void charset_shutdown(void);
91 
97 void charset_gsm_to_utf8(Octstr *ostr);
98 
106 void charset_utf8_to_gsm(Octstr *ostr);
107 
108 /*
109  * Convert from GSM default character set to NRC ISO 21 (German)
110  * and vise versa.
111  */
114 
115 /* Trunctate a string of GSM characters to a maximum length.
116  * Make sure the last remaining character is a whole character,
117  * and not half of an escape sequence.
118  * Return 1 if any characters were removed, otherwise 0.
119  */
120 int charset_gsm_truncate(Octstr *gsm, long max);
121 
122 /* Convert a string in the GSM default character set (GSM 03.38)
123  * to ISO-8859-1. A series of Greek characters (codes 16, 18-26)
124  * are not representable and are converted to '?' characters.
125  * GSM default is a 7-bit alphabet. Characters with the 8th bit
126  * set are left unchanged. */
128 
129 /* Convert a string in the ISO-8859-1 character set to the GSM
130  * default character set (GSM 03.38). A large number of characters
131  * are not representable. Approximations are made in some cases
132  * (accented characters to their unaccented versions, for example),
133  * and the rest are converted to '?' characters. */
135 
136 /* Convert a string from character set specified by charset_from into
137  * UTF-8 character set. The result is stored in the octet string *to that
138  * is allocated by the function. The function returns the number of bytes
139  * written for success, -1 for general error, -2 for an transcoding error
140  * (the input string wasn't valid string in the character set it was said
141  * to be or there was no converter found for the character set).
142  */
143 int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from);
144 
145 /* Convert a string from UTF-8 character set into another character set
146  * specified by charset_from. The result is stored in the octet string *to
147  * that is allocated by the function. The function returns the number of
148  * bytes written for success, -1 for general error, -2 for an transcoding
149  * error (the input string wasn't valid string in the character set it
150  * was said to be or there was no converter found for the character set).
151  */
152 int charset_from_utf8(Octstr *utf8, Octstr **to, Octstr *charset_to);
153 
154 /* use iconv library to convert an Octstr in place, from source character set to
155  * destination character set
156  */
157 int charset_convert(Octstr* string, char* charset_from, char* charset_to);
158 
159 #endif
void charset_shutdown(void)
Definition: charset.c:210
void charset_latin1_to_gsm(Octstr *latin1)
Definition: charset.c:430
int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from)
Definition: charset.c:526
unsigned char gsm
Definition: smsc_cimd2.c:1025
void charset_utf8_to_gsm(Octstr *ostr)
Definition: charset.c:288
int charset_gsm_truncate(Octstr *gsm, long max)
Definition: charset.c:512
void charset_gsm_to_nrc_iso_21_german(Octstr *ostr)
Definition: charset.c:460
void charset_gsm_to_latin1(Octstr *gsm)
Definition: charset.c:394
void charset_init(void)
Definition: charset.c:200
int charset_convert(Octstr *string, char *charset_from, char *charset_to)
Definition: charset.c:589
static Octstr * from
Definition: mtbatch.c:95
void charset_nrc_iso_21_german_to_gsm(Octstr *ostr)
Definition: charset.c:486
int latin1
Definition: charset.c:85
int charset_from_utf8(Octstr *utf8, Octstr **to, Octstr *charset_to)
Definition: charset.c:558
void charset_gsm_to_utf8(Octstr *ostr)
Definition: charset.c:220
Definition: octstr.c:118
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.