Kannel: Open Source WAP and SMS gateway  $Revision: 5037 $
xml_shared.c
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2016 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * xml_shared.c: Common functions of xml compilers (mainly charset handling
59  * and operations with wbxml binary not using a string table)
60  *
61  * By Tuomas Luttinen & Aarno Syvšnen (for Wiral Ltd)
62  */
63 
64 #include <ctype.h>
65 
66 #include "xml_shared.h"
67 #include "xml_definitions.h"
68 
69 #include <string.h>
70 
71 struct charset_t {
72  char *charset;
73  char *nro;
74  unsigned int MIBenum;
75 };
76 
78  { "ISO", "8859-1", 4 },
79  { "ISO", "8859-2", 5 },
80  { "ISO", "8859-3", 6 },
81  { "ISO", "8859-4", 7 },
82  { "ISO", "8859-5", 8 },
83  { "ISO", "8859-6", 9 },
84  { "ISO", "8859-7", 10 },
85  { "ISO", "8859-8", 11 },
86  { "ISO", "8859-9", 12 },
87  { "WINDOWS", "1250", 2250 },
88  { "WINDOWS", "1251", 2251 },
89  { "WINDOWS", "1252", 2252 },
90  { "WINDOWS", "1253", 2253 },
91  { "WINDOWS", "1254", 2254 },
92  { "WINDOWS", "1255", 2255 },
93  { "WINDOWS", "1256", 2256 },
94  { "WINDOWS", "1257", 2257 },
95  { "WINDOWS", "1258", 2258 },
96  { "UTF", "8", 106 },
97  { NULL }
98 };
99 
100 /****************************************************************************
101  *
102  * Implementation of external functions
103  */
104 
105 
106 /*
107  * set_charset - if xml doesn't have an <?xml..encoding=something>,
108  * converts body from argument charset to UTF-8
109  */
110 
111 void set_charset(Octstr *document, Octstr *charset)
112 {
113  long gt = 0, enc = 0;
114  Octstr *encoding = NULL, *text = NULL, *temp = NULL;
115 
116  if (octstr_len(charset) == 0)
117  return;
118 
119  encoding = octstr_create(" encoding");
120  enc = octstr_search(document, encoding, 0);
121  gt = octstr_search_char(document, '>', 0);
122 
123  if (enc < 0 || enc > gt) {
124  gt++;
125  text = octstr_copy(document, gt, octstr_len(document) - gt);
126  if (charset_to_utf8(text, &temp, charset) >= 0) {
127  octstr_delete(document, gt, octstr_len(document) - gt);
128  octstr_append_data(document, octstr_get_cstr(temp),
129  octstr_len(temp));
130  }
131 
132  octstr_destroy(temp);
133  octstr_destroy(text);
134  }
135 
136  octstr_destroy(encoding);
137 }
138 
139 
140 /*
141  * find_charset_encoding -- parses for a encoding argument within
142  * the xml preabmle, ie. <?xml verion="xxx" encoding="ISO-8859-1"?>
143  */
144 
146 {
147  long gt = 0, enc = 0;
148  Octstr *encoding = NULL, *temp = NULL;
149 
150  enc = octstr_search(document, octstr_imm(" encoding="), 0);
151  gt = octstr_search(document, octstr_imm("?>"), 0);
152 
153  /* in case there is no encoding argument, assume always UTF-8 */
154  if (enc < 0 || enc + 10 > gt)
155  return NULL;
156 
157  temp = octstr_copy(document, enc + 10, gt - (enc + 10));
158  octstr_strip_blanks(temp);
159  encoding = octstr_copy(temp, 1, octstr_len(temp) - 2);
160  octstr_destroy(temp);
161 
162  return encoding;
163 }
164 
165 
166 /*
167  * only_blanks - checks if a text node contains only white space, when it can
168  * be left out as a element content.
169  */
170 
171 int only_blanks(const char *text)
172 {
173  int blank = 1;
174  int j=0;
175  int len = strlen(text);
176 
177  while ((j<len) && blank) {
178  blank = blank && isspace((int)text[j]);
179  j++;
180  }
181 
182  return blank;
183 }
184 
185 /*
186  * Parses the character set of the document.
187  */
188 
190 {
191  Octstr *charset = NULL;
192  Octstr *number = NULL;
193  int i, j, cut = 0, ret = 0;
194 
195  gw_assert(os != NULL);
196  charset = octstr_duplicate(os);
197 
198  /* The charset might be in lower case, so... */
199  octstr_convert_range(charset, 0, octstr_len(charset), toupper);
200 
201  /*
202  * The character set is handled in two parts to make things easier.
203  * The cutting.
204  */
205  if ((cut = octstr_search_char(charset, '_', 0)) > 0) {
206  number = octstr_copy(charset, cut + 1, (octstr_len(charset) - (cut + 1)));
207  octstr_truncate(charset, cut);
208  }
209  else if ((cut = octstr_search_char(charset, '-', 0)) > 0) {
210  number = octstr_copy(charset, cut + 1, (octstr_len(charset) - (cut + 1)));
211  octstr_truncate(charset, cut);
212  }
213 
214  /* And table search. */
215  for (i = 0; character_sets[i].charset != NULL; i++)
216  if (octstr_str_compare(charset, character_sets[i].charset) == 0) {
217  for (j = i; octstr_str_compare(charset,
218  character_sets[j].charset) == 0; j++)
219  if (octstr_str_compare(number, character_sets[j].nro) == 0) {
220  ret = character_sets[j].MIBenum;
221  break;
222  }
223  break;
224  }
225 
226  /* UTF-8 is the default value */
227  if (character_sets[i].charset == NULL)
228  ret = character_sets[i-1].MIBenum;
229 
230  octstr_destroy(number);
231  octstr_destroy(charset);
232 
233  return ret;
234 }
235 
236 /*
237  * element_check_content - a helper function for parse_element for checking
238  * if an element has content or attributes. Returns status bit for attributes
239  * (0x80) and another for content (0x40) added into one octet.
240  */
241 
242 unsigned char element_check_content(xmlNodePtr node)
243 {
244  unsigned char status_bits = 0x00;
245 
246  if ((node->children != NULL) &&
247  !((node->children->next == NULL) &&
248  (node->children->type == XML_TEXT_NODE) &&
249  (only_blanks((char *)node->children->content))))
250  status_bits = WBXML_CONTENT_BIT;
251 
252  if (node->properties != NULL)
253  status_bits = status_bits | WBXML_ATTR_BIT;
254 
255  return status_bits;
256 }
257 
258 /*
259  * Return the character sets supported by the WML compiler, as a List
260  * of Octstrs, where each string is the MIME identifier for one charset.
261  */
263 {
264  int i;
265  List *result;
266  Octstr *charset;
267 
268  result = gwlist_create();
269  for (i = 0; character_sets[i].charset != NULL; i++) {
270  charset = octstr_create(character_sets[i].charset);
271  octstr_append_char(charset, '-');
272  octstr_append(charset, octstr_imm(character_sets[i].nro));
273  gwlist_append(result, charset);
274  }
275 
276  return result;
277 }
278 
279 /*
280  * Functions working with simple binary data type (no string table). No
281  * variables are present either.
282  */
283 
285 {
287 
288  binary = gw_malloc(sizeof(simple_binary_t));
289 
290  binary->wbxml_version = 0x00;
291  binary->public_id = 0x00;
292  binary->charset = 0x00;
293  binary->binary = octstr_create("");
294 
295  return binary;
296 }
297 
299 {
300  if (binary == NULL)
301  return;
302 
303  octstr_destroy(binary->binary);
304  gw_free(binary);
305 }
306 
307 /*
308  * Output the wbxml content field after field into octet string os. We add
309  * string table length 0 (meaning no string table) before the content.
310  */
312 {
313  gw_assert(octstr_len(os) == 0);
314  octstr_format_append(os, "%c", binary->wbxml_version);
315  octstr_format_append(os, "%c", binary->public_id);
316  octstr_append_uintvar(os, binary->charset);
317  octstr_format_append(os, "%c", 0x00);
318  octstr_format_append(os, "%S", binary->binary);
319 }
320 
322 {
323  output_char(WBXML_END, binary);
324 }
325 
327 {
328  octstr_append_char((**binary).binary, byte);
329 }
330 
332 {
333  output_octet_string(os, binary);
334 }
335 
336 /*
337  * Add global tokens to the start and to the end of an inline string.
338  */
340 {
341  Octstr *startos;
342 
343  octstr_insert(temp, startos = octstr_format("%c", WBXML_STR_I), 0);
344  octstr_destroy(startos);
345  octstr_format_append(temp, "%c", WBXML_STR_END);
346  parse_octet_string(temp, binary);
347 }
348 
350 {
351  octstr_insert((*sibxml)->binary, os, octstr_len((*sibxml)->binary));
352 }
Octstr * binary
Definition: xml_shared.h:88
int number
Definition: smsc_cimd2.c:213
List * wml_charsets(void)
Definition: xml_shared.c:262
void octstr_append_data(Octstr *ostr, const char *data, long len)
Definition: octstr.c:1495
void output_octet_string(Octstr *os, simple_binary_t **sibxml)
Definition: xml_shared.c:349
unsigned char element_check_content(xmlNodePtr node)
Definition: xml_shared.c:242
void octstr_convert_range(Octstr *ostr, long pos, long len, octstr_func_t map)
Definition: octstr.c:834
void gwlist_append(List *list, void *item)
Definition: list.c:179
void octstr_append(Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:1502
int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from)
Definition: charset.c:526
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1515
long octstr_search(const Octstr *haystack, const Octstr *needle, long pos)
Definition: octstr.c:1068
void parse_octet_string(Octstr *os, simple_binary_t **binary)
Definition: xml_shared.c:331
void octstr_strip_blanks(Octstr *text)
Definition: octstr.c:1344
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
#define WBXML_STR_END
#define octstr_copy(ostr, from, len)
Definition: octstr.h:178
long octstr_search_char(const Octstr *ostr, int ch, long pos)
Definition: octstr.c:1010
Octstr * charset
Definition: test_ota.c:68
void simple_binary_destroy(simple_binary_t *binary)
Definition: xml_shared.c:298
unsigned long charset
Definition: xml_shared.h:87
#define WBXML_STR_I
void parse_inline_string(Octstr *temp, simple_binary_t **binary)
Definition: xml_shared.c:339
Octstr * octstr_imm(const char *cstr)
Definition: octstr.c:281
void octstr_insert(Octstr *ostr1, const Octstr *ostr2, long pos)
Definition: octstr.c:1301
int parse_charset(Octstr *os)
Definition: xml_shared.c:189
void octstr_delete(Octstr *ostr1, long pos, long len)
Definition: octstr.c:1525
char * text
Definition: smsc_cimd2.c:921
#define octstr_duplicate(ostr)
Definition: octstr.h:187
char * nro
Definition: xml_shared.c:73
#define WBXML_END
Octstr * octstr_format(const char *fmt,...)
Definition: octstr.c:2462
void octstr_destroy(Octstr *ostr)
Definition: octstr.c:322
#define octstr_create(cstr)
Definition: octstr.h:125
unsigned char wbxml_version
Definition: xml_shared.h:85
unsigned char public_id
Definition: xml_shared.h:86
gw_assert(wtls_machine->packet_to_send!=NULL)
long octstr_len(const Octstr *ostr)
Definition: octstr.c:340
void simple_binary_output(Octstr *os, simple_binary_t *binary)
Definition: xml_shared.c:311
void octstr_append_uintvar(Octstr *ostr, unsigned long value)
Definition: octstr.c:1929
Definition: octstr.c:118
charset_t character_sets[]
Definition: xml_shared.c:77
char * charset
Definition: xml_shared.c:72
Octstr * find_charset_encoding(Octstr *document)
Definition: xml_shared.c:145
void set_charset(Octstr *document, Octstr *charset)
Definition: xml_shared.c:111
int only_blanks(const char *text)
Definition: xml_shared.c:171
int octstr_str_compare(const Octstr *ostr, const char *str)
Definition: octstr.c:971
void parse_end(simple_binary_t **binary)
Definition: xml_shared.c:321
void octstr_format_append(Octstr *os, const char *fmt,...)
Definition: octstr.c:2505
#define gwlist_create()
Definition: list.h:136
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1325
void output_char(int byte, simple_binary_t **binary)
Definition: xml_shared.c:326
#define WBXML_CONTENT_BIT
#define WBXML_ATTR_BIT
unsigned int MIBenum
Definition: xml_shared.c:74
simple_binary_t * simple_binary_create(void)
Definition: xml_shared.c:284
Definition: list.c:102
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.