Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

wap_push_sl_compiler.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * wap_push_sl_compiler.c: Tokenizes a SL document. SL DTD is defined in 
00059  * Wapforum specification WAP-168-ServiceLoad-20010731-a (hereafter called sl),
00060  * chapter 9.2.
00061  *
00062  * By Aarno Syvänen for Wiral Ltd
00063  */
00064 
00065 #include <ctype.h>
00066 #include <libxml/xmlmemory.h>
00067 #include <libxml/tree.h>
00068 #include <libxml/debugXML.h>
00069 #include <libxml/encoding.h>
00070 
00071 #include "xml_shared.h"
00072 #include "wap_push_sl_compiler.h"
00073 
00074 /******************************************************************************
00075  *
00076  * Following global variables are unique to SL compiler. See sl, chapter 10.3.
00077  *
00078  * Two token table types, with one and two token fields.
00079  */
00080 
00081 struct sl_2table_t {
00082     char *name;
00083     unsigned char token;
00084 };
00085 
00086 typedef struct sl_2table_t sl_2table_t;
00087 
00088 /*
00089  * Value part can mean whole or part of the value. It can be NULL, too; then 
00090  * no part of the value will be tokenized, see sl, chapter 10.3.2.
00091  */
00092 struct sl_3table_t {
00093     char *name;
00094     char *value_part;
00095     unsigned char token;
00096 };
00097 
00098 typedef struct sl_3table_t sl_3table_t;
00099 
00100 /*
00101  * Element from tag code page zero. It is defined in sl, chapter 10.3.1.
00102  */
00103 
00104 static sl_2table_t sl_elements[] = {
00105     { "sl", 0x05 }
00106 };
00107 
00108 #define NUMBER_OF_ELEMENTS sizeof(sl_elements)/sizeof(sl_elements[0])
00109 
00110 /*
00111  * Attributes (and sometimes start or whole of their value) from code page 
00112  * zero. These are defined in sl, chapter 10.3.2. 
00113  */
00114 
00115 static sl_3table_t sl_attributes[] = {
00116     { "action", "execute-low", 0x05 }, 
00117     { "action", "execute-high", 0x06 }, 
00118     { "action", "cache", 0x07 }, 
00119     { "href", "http://", 0x09 },
00120     { "href", "http://www.", 0x0a }, 
00121     { "href", "https://", 0x0b },    
00122     { "href", "https://www.", 0x0c },
00123     { "href", NULL, 0x08 }
00124 };
00125 
00126 #define NUMBER_OF_ATTRIBUTES sizeof(sl_attributes)/sizeof(sl_attributes[0])
00127 
00128 /*
00129  * URL value codes from code page zero. These are defined in sl, chapter 
00130  * 10.3.3.
00131  */
00132 
00133 static sl_2table_t sl_url_values[] = {
00134     { ".com/", 0x85 },
00135     { ".edu/", 0x86 },
00136     { ".net/", 0x87 },
00137     { ".org/", 0x88 },
00138 };
00139 
00140 #define NUMBER_OF_URL_VALUES sizeof(sl_url_values)/sizeof(sl_url_values[0])
00141 
00142 #include "xml_definitions.h"
00143 
00144 /****************************************************************************
00145  *
00146  * Prototypes of internal functions. Note that 'Ptr' means here '*'.
00147  */
00148 static int parse_document(xmlDocPtr document, Octstr *charset, 
00149                           simple_binary_t **slbxml);
00150 static int parse_node(xmlNodePtr node, simple_binary_t **slbxml);
00151 static int parse_element(xmlNodePtr node, simple_binary_t **slbxml);
00152 static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml);
00153 static int url(int hex);
00154 static int action(int hex);
00155 static void parse_url_value(Octstr *value, simple_binary_t **slbxml);
00156 
00157 /****************************************************************************
00158  *
00159  * Implementation of the external function
00160  */
00161 
00162 int sl_compile(Octstr *sl_doc, Octstr *charset, Octstr **sl_binary)
00163 {
00164     simple_binary_t *slbxml;
00165     int ret;
00166     xmlDocPtr pDoc;
00167     size_t size;
00168     char *sl_c_text;
00169 
00170     *sl_binary = octstr_create(""); 
00171     slbxml = simple_binary_create();
00172 
00173     octstr_strip_blanks(sl_doc);
00174     set_charset(sl_doc, charset);
00175     size = octstr_len(sl_doc);
00176     sl_c_text = octstr_get_cstr(sl_doc);
00177     pDoc = xmlParseMemory(sl_c_text, size);
00178 
00179     ret = 0;
00180     if (pDoc) {
00181         ret = parse_document(pDoc, charset, &slbxml);
00182         simple_binary_output(*sl_binary, slbxml);
00183         xmlFreeDoc(pDoc);
00184     } else {
00185         xmlFreeDoc(pDoc);
00186         octstr_destroy(*sl_binary);
00187         simple_binary_destroy(slbxml);
00188         error(0, "SL: No document to parse. Probably an error in SL source");
00189         return -1;
00190     }
00191 
00192     simple_binary_destroy(slbxml);
00193 
00194     return ret;
00195 }
00196 
00197 /****************************************************************************
00198  *
00199  * Implementation of internal functions
00200  *
00201  * Parse document node. Store sl version number, public identifier and 
00202  * character set at the start of the document
00203  */
00204 
00205 static int parse_document(xmlDocPtr document, Octstr *charset, 
00206                           simple_binary_t **slbxml)
00207 {
00208     xmlNodePtr node;
00209 
00210     (**slbxml).wbxml_version = 0x02; /* WBXML Version number 1.2  */
00211     (**slbxml).public_id = 0x06;  /* SL 1.0 Public ID */
00212     
00213     charset = octstr_create("UTF-8");
00214     (**slbxml).charset = parse_charset(charset);
00215     octstr_destroy(charset);
00216 
00217     node = xmlDocGetRootElement(document);
00218     return parse_node(node, slbxml);
00219 }
00220 
00221 /*
00222  * The recursive parsing function for the parsing tree. Function checks the 
00223  * type of the node, calls for the right parse function for the type, then 
00224  * calls itself for the first child of the current node if there's one and 
00225  * after that calls itself for the next child on the list. We parse whole 
00226  * tree, even though SL DTD defines only one node (see sl, chapter 9.2); this
00227  * allows us throw an error message when an unknown element is found.
00228  */
00229 
00230 static int parse_node(xmlNodePtr node, simple_binary_t **slbxml)
00231 {
00232     int status = 0;
00233     
00234     /* Call for the parser function of the node type. */
00235     switch (node->type) {
00236     case XML_ELEMENT_NODE:
00237     status = parse_element(node, slbxml);
00238     break;
00239     case XML_TEXT_NODE:
00240     case XML_COMMENT_NODE:
00241     case XML_PI_NODE:
00242     /* Text nodes, comments and PIs are ignored. */
00243     break;
00244     /*
00245      * XML has also many other node types, these are not needed with 
00246      * SL. Therefore they are assumed to be an error.
00247      */
00248     default:
00249     error(0, "SL COMPILER: Unknown XML node in the SL source.");
00250     return -1;
00251     break;
00252     }
00253 
00254     /* 
00255      * If node is an element with content, it will need an end tag after it's
00256      * children. The status for it is returned by parse_element.
00257      */
00258     switch (status) {
00259     case 0:
00260 
00261     if (node->children != NULL)
00262         if (parse_node(node->children, slbxml) == -1)
00263         return -1;
00264     break;
00265     case 1:
00266     if (node->children != NULL)
00267         if (parse_node(node->children, slbxml) == -1)
00268         return -1;
00269     parse_end(slbxml);
00270     break;
00271 
00272     case -1: /* Something went wrong in the parsing. */
00273     return -1;
00274     default:
00275     warning(0,"SL compiler: undefined return value in a parse function.");
00276     return -1;
00277     break;
00278     }
00279 
00280     if (node->next != NULL)
00281     if (parse_node(node->next, slbxml) == -1)
00282         return -1;
00283 
00284     return 0;
00285 }
00286 
00287 /*
00288  * Parse an element node. Check if there is a token for an element tag; if not
00289  * output the element as a string, else ouput the token. After that, call 
00290  * attribute parsing functions. Note that we take advantage of the fact that
00291  * sl documents have only one element (see sl, chapter 6.2).
00292  * Returns:      1, add an end tag (element node has no children)
00293  *               0, do not add an end tag (it has children)
00294  *              -1, an error occurred
00295  */
00296 static int parse_element(xmlNodePtr node, simple_binary_t **slbxml)
00297 {
00298     Octstr *name,
00299            *nameos;
00300     unsigned char status_bits,
00301              sl_hex;
00302     int add_end_tag;
00303     xmlAttrPtr attribute;
00304 
00305     name = octstr_create((char *)node->name);
00306     if (octstr_len(name) == 0) {
00307         octstr_destroy(name);
00308         return -1;
00309     }
00310 
00311     status_bits = 0x00;
00312     sl_hex = 0x00;
00313     add_end_tag = 0;
00314 
00315     if (octstr_compare(name, octstr_imm(sl_elements[0].name)) != 0) {
00316         warning(0, "unknown tag %s in SL source", octstr_get_cstr(name));
00317         sl_hex = WBXML_LITERAL;
00318         if ((status_bits = element_check_content(node)) > 0) {
00319         sl_hex = sl_hex | status_bits;
00320         /* If this node has children, the end tag must be added after 
00321            them. */
00322         if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
00323         add_end_tag = 1;
00324     }
00325     output_char(sl_hex, slbxml);
00326         output_octet_string(nameos = octstr_duplicate(name), slbxml);
00327         octstr_destroy(nameos);
00328     } else {
00329         sl_hex = sl_elements[0].token;
00330         if ((status_bits = element_check_content(node)) > 0) {
00331         sl_hex = sl_hex | status_bits;
00332         
00333         if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT) {
00334             add_end_tag = 1;
00335             }
00336             output_char(sl_hex, slbxml);
00337         }
00338     }
00339 
00340     if (node->properties != NULL) {
00341     attribute = node->properties;
00342     while (attribute != NULL) {
00343         parse_attribute(attribute, slbxml);
00344         attribute = attribute->next;
00345     }
00346     parse_end(slbxml);
00347     }
00348 
00349     octstr_destroy(name);
00350     return add_end_tag;
00351 }
00352 
00353 static int parse_attribute(xmlAttrPtr attr, simple_binary_t **slbxml)
00354 {
00355     Octstr *name,
00356            *value,
00357            *valueos;
00358     unsigned char sl_hex;
00359     size_t i,
00360            value_len;
00361 
00362     name = octstr_create((char *)attr->name);
00363 
00364     if (attr->children != NULL)
00365         value = create_octstr_from_node((char *)attr->children);
00366     else
00367         value = NULL;
00368 
00369     if (value == NULL)
00370         goto error;
00371 
00372     i = 0;
00373     valueos = NULL;
00374     while (i < NUMBER_OF_ATTRIBUTES) {
00375         if (octstr_compare(name, octstr_imm(sl_attributes[i].name)) == 0) {
00376         if (sl_attributes[i].value_part == NULL) {
00377             debug("wap.push.sl.compiler", 0, "value part was NULL");
00378             break; 
00379             } else {
00380                 value_len = octstr_len(valueos = 
00381                     octstr_imm(sl_attributes[i].value_part));
00382             if (octstr_ncompare(value, valueos, value_len) == 0) {
00383             break;
00384                 }
00385             }
00386         }
00387        ++i;
00388     }
00389 
00390     if (i == NUMBER_OF_ATTRIBUTES) {
00391         warning(0, "unknown attribute in SL source");
00392         goto error;
00393     }
00394 
00395     sl_hex = sl_attributes[i].token;
00396     if (action(sl_hex)) {
00397         output_char(sl_hex, slbxml);
00398     } else if (url(sl_hex)) {
00399         output_char(sl_hex, slbxml);
00400         octstr_delete(value, 0, octstr_len(valueos));
00401         parse_url_value(value, slbxml);
00402     } else {
00403         output_char(sl_hex, slbxml);
00404         parse_inline_string(value, slbxml);
00405     } 
00406 
00407     octstr_destroy(name);
00408     octstr_destroy(value);
00409     return 0;
00410 
00411 error:
00412     octstr_destroy(name);
00413     octstr_destroy(value);
00414     return -1;    
00415 }
00416 
00417 /*
00418  * checks whether a sl attribute value is an URL or some other kind of value. 
00419  * Returns 1 for an URL and 0 otherwise.
00420  */
00421 
00422 static int url(int hex)
00423 {
00424     switch ((unsigned char) hex) {
00425     case 0x08:            /* href */
00426     case 0x09: case 0x0b: /* href http://, href https:// */
00427     case 0x0a: case 0x0c: /* href http://www., href https://www. */
00428     return 1;
00429     }
00430     return 0;
00431 }
00432 
00433 /*
00434  * checks whether a sl attribute value is an action attribute or some other 
00435  * kind of value. 
00436  * Returns 1 for an action attribute and 0 otherwise.
00437  */
00438 
00439 static int action(int hex)
00440 {
00441     switch ((unsigned char) hex) {
00442     case 0x05: case 0x06: /* action execute-low, action execute-high */
00443     case 0x07:            /* action cache */
00444     return 1;
00445     }
00446     return 0;
00447 }
00448 
00449 /*
00450  * In the case of SL document, only attribute values to be tokenised are parts
00451  * of urls. See sl, chapter 10.3.3. The caller removes the start of the url.
00452  * Check whether we can find one of tokenisable values in value. If not, parse
00453  * value as a inline string, else parse parts before and after the tokenisable
00454  * url value as a inline string.
00455  */
00456 static void parse_url_value(Octstr *value, simple_binary_t **slbxml)
00457 {
00458     size_t i;
00459     long pos;
00460     Octstr *urlos,
00461            *first_part,
00462        *last_part;
00463     size_t first_part_len;
00464 
00465     i = 0;
00466     first_part_len = 0;
00467     first_part = NULL;
00468     last_part = NULL;
00469     while (i < NUMBER_OF_URL_VALUES) {
00470         pos = octstr_search(value, 
00471             urlos = octstr_imm(sl_url_values[i].name), 0);
00472         if (pos >= 0) {
00473         first_part = octstr_duplicate(value);
00474             octstr_delete(first_part, pos, octstr_len(first_part) - pos);
00475             first_part_len = octstr_len(first_part);
00476             parse_inline_string(first_part, slbxml);
00477             output_char(sl_url_values[i].token, slbxml);
00478             last_part = octstr_duplicate(value);
00479             octstr_delete(last_part, 0, first_part_len + octstr_len(urlos));
00480             parse_inline_string(last_part, slbxml);
00481         octstr_destroy(first_part);
00482             octstr_destroy(last_part);
00483             break;
00484         }
00485         octstr_destroy(urlos);
00486         ++i;
00487     }
00488 
00489     if (pos < 0) 
00490     parse_inline_string(value, slbxml);
00491         
00492 }
00493 
00494 
00495 
00496 
00497 
00498 
00499 
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.