Kannel: Open Source WAP and SMS gateway  $Revision: 5037 $
wap_push_sl_compiler.c
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2016 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * wap_push_sl_compiler.c: Tokenizes a SL document. SL DTD is defined in
59  * Wapforum specification WAP-168-ServiceLoad-20010731-a (hereafter called sl),
60  * chapter 9.2.
61  *
62  * By Aarno Syvšnen for Wiral Ltd
63  */
64 
65 #include <ctype.h>
66 #include <inttypes.h>
67 #include <libxml/xmlmemory.h>
68 #include <libxml/tree.h>
69 #include <libxml/debugXML.h>
70 #include <libxml/encoding.h>
71 
72 #include "xml_shared.h"
73 #include "wap_push_sl_compiler.h"
74 
75 /******************************************************************************
76  *
77  * Following global variables are unique to SL compiler. See sl, chapter 10.3.
78  *
79  * Two token table types, with one and two token fields.
80  */
81 
82 struct sl_2table_t {
83  char *name;
84  unsigned char token;
85 };
86 
87 typedef struct sl_2table_t sl_2table_t;
88 
89 /*
90  * Value part can mean whole or part of the value. It can be NULL, too; then
91  * no part of the value will be tokenized, see sl, chapter 10.3.2.
92  */
93 struct sl_3table_t {
94  char *name;
95  char *value_part;
96  unsigned char token;
97 };
98 
99 typedef struct sl_3table_t sl_3table_t;
100 
101 /*
102  * Element from tag code page zero. It is defined in sl, chapter 10.3.1.
103  */
104 
106  { "sl", 0x05 }
107 };
108 
109 #define NUMBER_OF_ELEMENTS sizeof(sl_elements)/sizeof(sl_elements[0])
110 
111 /*
112  * Attributes (and sometimes start or whole of their value) from code page
113  * zero. These are defined in sl, chapter 10.3.2.
114  */
115 
117  { "action", "execute-low", 0x05 },
118  { "action", "execute-high", 0x06 },
119  { "action", "cache", 0x07 },
120  { "href", "http://", 0x09 },
121  { "href", "http://www.", 0x0a },
122  { "href", "https://", 0x0b },
123  { "href", "https://www.", 0x0c },
124  { "href", NULL, 0x08 }
125 };
126 
127 #define NUMBER_OF_ATTRIBUTES sizeof(sl_attributes)/sizeof(sl_attributes[0])
128 
129 /*
130  * URL value codes from code page zero. These are defined in sl, chapter
131  * 10.3.3.
132  */
133 
135  { ".com/", 0x85 },
136  { ".edu/", 0x86 },
137  { ".net/", 0x87 },
138  { ".org/", 0x88 },
139 };
140 
141 #define NUMBER_OF_URL_VALUES sizeof(sl_url_values)/sizeof(sl_url_values[0])
142 
143 #include "xml_definitions.h"
144 
145 /****************************************************************************
146  *
147  * Prototypes of internal functions. Note that 'Ptr' means here '*'.
148  */
149 static int parse_document(xmlDocPtr document, Octstr *charset,
150  simple_binary_t **slbxml);
151 static int parse_node(xmlNodePtr node, simple_binary_t **slbxml);
152 static int parse_element(xmlNodePtr node, simple_binary_t **slbxml);
153 static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml);
154 static int url(int hex);
155 static int action(int hex);
156 static void parse_url_value(Octstr *value, simple_binary_t **slbxml);
157 
158 /****************************************************************************
159  *
160  * Implementation of the external function
161  */
162 
163 int sl_compile(Octstr *sl_doc, Octstr *charset, Octstr **sl_binary)
164 {
165  simple_binary_t *slbxml;
166  int ret;
167  xmlDocPtr pDoc;
168  size_t size;
169  char *sl_c_text;
170 
171  *sl_binary = octstr_create("");
172  slbxml = simple_binary_create();
173 
174  octstr_strip_blanks(sl_doc);
175  set_charset(sl_doc, charset);
176  size = octstr_len(sl_doc);
177  sl_c_text = octstr_get_cstr(sl_doc);
178  pDoc = xmlParseMemory(sl_c_text, size);
179 
180  ret = 0;
181  if (pDoc) {
182  ret = parse_document(pDoc, charset, &slbxml);
183  simple_binary_output(*sl_binary, slbxml);
184  xmlFreeDoc(pDoc);
185  } else {
186  xmlFreeDoc(pDoc);
187  octstr_destroy(*sl_binary);
188  simple_binary_destroy(slbxml);
189  error(0, "SL: No document to parse. Probably an error in SL source");
190  return -1;
191  }
192 
193  simple_binary_destroy(slbxml);
194 
195  return ret;
196 }
197 
198 /****************************************************************************
199  *
200  * Implementation of internal functions
201  *
202  * Parse document node. Store sl version number, public identifier and
203  * character set at the start of the document
204  */
205 
206 static int parse_document(xmlDocPtr document, Octstr *charset,
207  simple_binary_t **slbxml)
208 {
209  xmlNodePtr node;
210 
211  (**slbxml).wbxml_version = 0x02; /* WBXML Version number 1.2 */
212  (**slbxml).public_id = 0x06; /* SL 1.0 Public ID */
213 
214  charset = octstr_create("UTF-8");
215  (**slbxml).charset = parse_charset(charset);
216  octstr_destroy(charset);
217 
218  node = xmlDocGetRootElement(document);
219  return parse_node(node, slbxml);
220 }
221 
222 /*
223  * The recursive parsing function for the parsing tree. Function checks the
224  * type of the node, calls for the right parse function for the type, then
225  * calls itself for the first child of the current node if there's one and
226  * after that calls itself for the next child on the list. We parse whole
227  * tree, even though SL DTD defines only one node (see sl, chapter 9.2); this
228  * allows us throw an error message when an unknown element is found.
229  */
230 
231 static int parse_node(xmlNodePtr node, simple_binary_t **slbxml)
232 {
233  int status = 0;
234 
235  /* Call for the parser function of the node type. */
236  switch (node->type) {
237  case XML_ELEMENT_NODE:
238  status = parse_element(node, slbxml);
239  break;
240  case XML_TEXT_NODE:
241  case XML_COMMENT_NODE:
242  case XML_PI_NODE:
243  /* Text nodes, comments and PIs are ignored. */
244  break;
245  /*
246  * XML has also many other node types, these are not needed with
247  * SL. Therefore they are assumed to be an error.
248  */
249  default:
250  error(0, "SL COMPILER: Unknown XML node in the SL source.");
251  return -1;
252  break;
253  }
254 
255  /*
256  * If node is an element with content, it will need an end tag after it's
257  * children. The status for it is returned by parse_element.
258  */
259  switch (status) {
260  case 0:
261 
262  if (node->children != NULL)
263  if (parse_node(node->children, slbxml) == -1)
264  return -1;
265  break;
266  case 1:
267  if (node->children != NULL)
268  if (parse_node(node->children, slbxml) == -1)
269  return -1;
270  parse_end(slbxml);
271  break;
272 
273  case -1: /* Something went wrong in the parsing. */
274  return -1;
275  default:
276  warning(0,"SL compiler: undefined return value in a parse function.");
277  return -1;
278  break;
279  }
280 
281  if (node->next != NULL)
282  if (parse_node(node->next, slbxml) == -1)
283  return -1;
284 
285  return 0;
286 }
287 
288 /*
289  * Parse an element node. Check if there is a token for an element tag; if not
290  * output the element as a string, else ouput the token. After that, call
291  * attribute parsing functions. Note that we take advantage of the fact that
292  * sl documents have only one element (see sl, chapter 6.2).
293  * Returns: 1, add an end tag (element node has no children)
294  * 0, do not add an end tag (it has children)
295  * -1, an error occurred
296  */
297 static int parse_element(xmlNodePtr node, simple_binary_t **slbxml)
298 {
299  Octstr *name,
300  *nameos;
301  unsigned char status_bits,
302  sl_hex;
303  int add_end_tag;
304  xmlAttrPtr attribute;
305 
306  name = octstr_create((char *)node->name);
307  if (octstr_len(name) == 0) {
308  octstr_destroy(name);
309  return -1;
310  }
311 
312  status_bits = 0x00;
313  sl_hex = 0x00;
314  add_end_tag = 0;
315 
316  if (octstr_compare(name, octstr_imm(sl_elements[0].name)) != 0) {
317  warning(0, "unknown tag %s in SL source", octstr_get_cstr(name));
318  sl_hex = WBXML_LITERAL;
319  if ((status_bits = element_check_content(node)) > 0) {
320  sl_hex = sl_hex | status_bits;
321  /* If this node has children, the end tag must be added after
322  them. */
323  if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
324  add_end_tag = 1;
325  }
326  output_char(sl_hex, slbxml);
327  output_octet_string(nameos = octstr_duplicate(name), slbxml);
328  octstr_destroy(nameos);
329  } else {
330  sl_hex = sl_elements[0].token;
331  if ((status_bits = element_check_content(node)) > 0) {
332  sl_hex = sl_hex | status_bits;
333 
334  if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT) {
335  add_end_tag = 1;
336  }
337  output_char(sl_hex, slbxml);
338  }
339  }
340 
341  if (node->properties != NULL) {
342  attribute = node->properties;
343  while (attribute != NULL) {
344  parse_attribute(attribute, slbxml);
345  attribute = attribute->next;
346  }
347  parse_end(slbxml);
348  }
349 
350  octstr_destroy(name);
351  return add_end_tag;
352 }
353 
354 static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml)
355 {
356  Octstr *name,
357  *value,
358  *valueos;
359  unsigned char sl_hex;
360  size_t i,
361  value_len;
362 
363  name = octstr_create((char *)attr->name);
364 
365  if (attr->children != NULL)
366  value = create_octstr_from_node((char *)attr->children);
367  else
368  value = NULL;
369 
370  if (value == NULL)
371  goto error;
372 
373  i = 0;
374  valueos = NULL;
375  while (i < NUMBER_OF_ATTRIBUTES) {
376  if (octstr_compare(name, octstr_imm(sl_attributes[i].name)) == 0) {
377  if (sl_attributes[i].value_part == NULL) {
378  debug("wap.push.sl.compiler", 0, "value part was NULL");
379  break;
380  } else {
381  value_len = octstr_len(valueos =
382  octstr_imm(sl_attributes[i].value_part));
383  if (octstr_ncompare(value, valueos, value_len) == 0) {
384  break;
385  }
386  }
387  }
388  ++i;
389  }
390 
391  if (i == NUMBER_OF_ATTRIBUTES) {
392  warning(0, "unknown attribute in SL source");
393  goto error;
394  }
395 
396  sl_hex = sl_attributes[i].token;
397  if (action(sl_hex)) {
398  output_char(sl_hex, slbxml);
399  } else if (url(sl_hex)) {
400  output_char(sl_hex, slbxml);
401  octstr_delete(value, 0, octstr_len(valueos));
402  parse_url_value(value, slbxml);
403  } else {
404  output_char(sl_hex, slbxml);
405  parse_inline_string(value, slbxml);
406  }
407 
408  octstr_destroy(name);
409  octstr_destroy(value);
410  return 0;
411 
412 error:
413  octstr_destroy(name);
414  octstr_destroy(value);
415  return -1;
416 }
417 
418 /*
419  * checks whether a sl attribute value is an URL or some other kind of value.
420  * Returns 1 for an URL and 0 otherwise.
421  */
422 
423 static int url(int hex)
424 {
425  switch ((unsigned char) hex) {
426  case 0x08: /* href */
427  case 0x09: case 0x0b: /* href http://, href https:// */
428  case 0x0a: case 0x0c: /* href http://www., href https://www. */
429  return 1;
430  }
431  return 0;
432 }
433 
434 /*
435  * checks whether a sl attribute value is an action attribute or some other
436  * kind of value.
437  * Returns 1 for an action attribute and 0 otherwise.
438  */
439 
440 static int action(int hex)
441 {
442  switch ((unsigned char) hex) {
443  case 0x05: case 0x06: /* action execute-low, action execute-high */
444  case 0x07: /* action cache */
445  return 1;
446  }
447  return 0;
448 }
449 
450 /*
451  * In the case of SL document, only attribute values to be tokenised are parts
452  * of urls. See sl, chapter 10.3.3. The caller removes the start of the url.
453  * Check whether we can find one of tokenisable values in value. If not, parse
454  * value as a inline string, else parse parts before and after the tokenisable
455  * url value as a inline string.
456  */
457 static void parse_url_value(Octstr *value, simple_binary_t **slbxml)
458 {
459  size_t i;
460  long pos;
461  Octstr *urlos,
462  *first_part,
463  *last_part;
464  size_t first_part_len;
465 
466  i = 0;
467  first_part_len = 0;
468  first_part = NULL;
469  last_part = NULL;
470  while (i < NUMBER_OF_URL_VALUES) {
471  pos = octstr_search(value,
472  urlos = octstr_imm(sl_url_values[i].name), 0);
473  if (pos >= 0) {
474  first_part = octstr_duplicate(value);
475  octstr_delete(first_part, pos, octstr_len(first_part) - pos);
476  first_part_len = octstr_len(first_part);
477  parse_inline_string(first_part, slbxml);
478  output_char(sl_url_values[i].token, slbxml);
479  last_part = octstr_duplicate(value);
480  octstr_delete(last_part, 0, first_part_len + octstr_len(urlos));
481  parse_inline_string(last_part, slbxml);
482  octstr_destroy(first_part);
483  octstr_destroy(last_part);
484  break;
485  }
486  octstr_destroy(urlos);
487  ++i;
488  }
489 
490  if (pos < 0)
491  parse_inline_string(value, slbxml);
492 
493 }
494 
495 
496 
497 
498 
499 
500 
void error(int err, const char *fmt,...)
Definition: log.c:612
static int parse_element(xmlNodePtr node, simple_binary_t **slbxml)
int sl_compile(Octstr *sl_doc, Octstr *charset, Octstr **sl_binary)
int size
Definition: wsasm.c:84
static int action(int hex)
static sl_3table_t sl_attributes[]
void output_octet_string(Octstr *os, simple_binary_t **sibxml)
Definition: xml_shared.c:349
unsigned char element_check_content(xmlNodePtr node)
Definition: xml_shared.c:242
static int parse_document(xmlDocPtr document, Octstr *charset, simple_binary_t **slbxml)
long octstr_search(const Octstr *haystack, const Octstr *needle, long pos)
Definition: octstr.c:1068
void octstr_strip_blanks(Octstr *text)
Definition: octstr.c:1344
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
Octstr * charset
Definition: test_ota.c:68
#define NUMBER_OF_ATTRIBUTES
void simple_binary_destroy(simple_binary_t *binary)
Definition: xml_shared.c:298
void parse_inline_string(Octstr *temp, simple_binary_t **binary)
Definition: xml_shared.c:339
#define NUMBER_OF_URL_VALUES
Octstr * octstr_imm(const char *cstr)
Definition: octstr.c:281
static int parse_node(xmlNodePtr node, simple_binary_t **slbxml)
int parse_charset(Octstr *os)
Definition: xml_shared.c:189
int token
Definition: wslexer.c:157
static sl_2table_t sl_url_values[]
void octstr_delete(Octstr *ostr1, long pos, long len)
Definition: octstr.c:1525
#define WBXML_LITERAL
unsigned char token
static sl_2table_t sl_elements[]
int octstr_ncompare(const Octstr *ostr1, const Octstr *ostr2, long n)
Definition: octstr.c:950
#define octstr_duplicate(ostr)
Definition: octstr.h:187
static int url(int hex)
unsigned char token
char * name
Definition: smsc_cimd2.c:212
void warning(int err, const char *fmt,...)
Definition: log.c:624
void octstr_destroy(Octstr *ostr)
Definition: octstr.c:322
#define octstr_create(cstr)
Definition: octstr.h:125
static void parse_url_value(Octstr *value, simple_binary_t **slbxml)
long octstr_len(const Octstr *ostr)
Definition: octstr.c:340
void simple_binary_output(Octstr *os, simple_binary_t *binary)
Definition: xml_shared.c:311
Definition: octstr.c:118
void debug(const char *place, int err, const char *fmt,...)
Definition: log.c:690
void set_charset(Octstr *document, Octstr *charset)
Definition: xml_shared.c:111
void parse_end(simple_binary_t **binary)
Definition: xml_shared.c:321
static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml)
void output_char(int byte, simple_binary_t **binary)
Definition: xml_shared.c:326
#define WBXML_CONTENT_BIT
#define create_octstr_from_node(node)
simple_binary_t * simple_binary_create(void)
Definition: xml_shared.c:284
int octstr_compare(const Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:869
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.