Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

wap_push_si_compiler.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * wap_push_si_compiler.c: Tokenizes a SI document. SI DTD is defined in 
00059  * Wapforum specification WAP-167-ServiceInd-20010731-a (hereafter called si),
00060  * chapter 8.2.
00061  *
00062  * By Aarno Syvänen for Wiral Ltd
00063  */
00064 
00065 #include <ctype.h>
00066 #include <libxml/xmlmemory.h>
00067 #include <libxml/tree.h>
00068 #include <libxml/debugXML.h>
00069 #include <libxml/encoding.h>
00070 
00071 #include "shared.h"
00072 #include "xml_shared.h"
00073 #include "wap_push_si_compiler.h"
00074 
00075 /****************************************************************************
00076  *
00077  * Global variables
00078  *
00079  * Two token table types, one and two token fields
00080  */
00081 
00082 struct si_2table_t {
00083     char *name;
00084     unsigned char token;
00085 };
00086 
00087 typedef struct si_2table_t si_2table_t;
00088 
00089 /*
00090  * Value part can mean part or whole of the value. It can be NULL, too, which
00091  * means that no part of the value will be tokenised. See si, chapter 9.3.2.
00092  */
00093 struct si_3table_t {
00094     char *name;
00095     char *value_part;
00096     unsigned char token;
00097 };
00098 
00099 typedef struct si_3table_t si_3table_t;
00100 
00101 /*
00102  * Elements from tag code page zero. These are defined in si, chapter 9.3.1.
00103  */
00104 
00105 static si_2table_t si_elements[] = {
00106     { "si", 0x05 },
00107     { "indication", 0x06 },
00108     { "info", 0x07 },
00109     { "item", 0x08 }
00110 };
00111 
00112 #define NUMBER_OF_ELEMENTS sizeof(si_elements)/sizeof(si_elements[0])
00113 
00114 /*
00115  * Attributes (and start or whole value of ) from attribute code page zero. 
00116  * These are defined in si, chapter 9.3.2.
00117  */
00118 
00119 static si_3table_t si_attributes[] = {
00120     { "action", "signal-none", 0x05 },
00121     { "action", "signal-low", 0x06 },
00122     { "action", "signal-medium", 0x07 },
00123     { "action", "signal-high", 0x08 },
00124     { "action", "delete", 0x09 },
00125     { "created", NULL, 0x0a },
00126     { "href", "https://www.", 0x0f },
00127     { "href", "http://www.", 0x0d },
00128     { "href", "https://", 0x0e },
00129     { "href", "http://", 0x0c },
00130     { "href", NULL, 0x0b },
00131     { "si-expires", NULL, 0x10 },
00132     { "si-id", NULL, 0x11 },
00133     { "class", NULL, 0x12 }
00134 };
00135 
00136 #define NUMBER_OF_ATTRIBUTES sizeof(si_attributes)/sizeof(si_attributes[0])
00137 
00138 /*
00139  * Attribute value tokes (URL value codes), from si, chapter 9.3.3.
00140  */
00141 
00142 static si_2table_t si_URL_values[] = {
00143   { ".com/", 0x85 },
00144   { ".edu/", 0x86 },
00145   { ".net/", 0x87 },
00146   { ".org/", 0x88 }
00147 };
00148 
00149 #define NUMBER_OF_URL_VALUES sizeof(si_URL_values)/sizeof(si_URL_values[0])
00150 
00151 #include "xml_definitions.h"
00152 
00153 /****************************************************************************
00154  *
00155  * Prototypes of internal functions. Note that 'Ptr' means here '*'.
00156  */
00157 
00158 static int parse_document(xmlDocPtr document, Octstr *charset, 
00159               simple_binary_t **si_binary);
00160 static int parse_node(xmlNodePtr node, simple_binary_t **sibxml);    
00161 static int parse_element(xmlNodePtr node, simple_binary_t **sibxml);
00162 static int parse_text(xmlNodePtr node, simple_binary_t **sibxml);   
00163 static int parse_cdata(xmlNodePtr node, simple_binary_t **sibxml);             static int parse_attribute(xmlAttrPtr attr, simple_binary_t **sibxml);       
00164 static int url(int hex);   
00165 static int action(int hex);
00166 static int date(int hex);
00167 static Octstr *tokenize_date(Octstr *date);
00168 static void octstr_drop_trailing_zeros(Octstr **date_token);
00169 static void flag_date_length(Octstr **token);
00170 static void parse_url_value(Octstr *value, simple_binary_t **sibxml);
00171                           
00172 /****************************************************************************
00173  *
00174  * Implementation of the external function
00175  */
00176 
00177 int si_compile(Octstr *si_doc, Octstr *charset, Octstr **si_binary)
00178 {
00179     simple_binary_t *sibxml;
00180     int ret;
00181     xmlDocPtr pDoc;
00182     size_t size;
00183     char *si_c_text;
00184 
00185     *si_binary = octstr_create(""); 
00186     sibxml = simple_binary_create();
00187 
00188     octstr_strip_blanks(si_doc);
00189     set_charset(si_doc, charset);
00190     size = octstr_len(si_doc);
00191     si_c_text = octstr_get_cstr(si_doc);
00192     pDoc = xmlParseMemory(si_c_text, size);
00193 
00194     ret = 0;
00195     if (pDoc) {
00196         ret = parse_document(pDoc, charset, &sibxml);
00197         simple_binary_output(*si_binary, sibxml);
00198         xmlFreeDoc(pDoc);
00199     } else {
00200         xmlFreeDoc(pDoc);
00201         octstr_destroy(*si_binary);
00202         simple_binary_destroy(sibxml);
00203         error(0, "SI: No document to parse. Probably an error in SI source");
00204         return -1;
00205     }
00206 
00207     simple_binary_destroy(sibxml);
00208 
00209     return ret;
00210 }
00211 
00212 /****************************************************************************
00213  *
00214  * Implementation of internal functions
00215  *
00216  * Parse document node. Store si version number, public identifier and char-
00217  * acter set into the start of the document. FIXME: Add parse_prologue!
00218  */
00219 
00220 static int parse_document(xmlDocPtr document, Octstr *charset, 
00221                           simple_binary_t **sibxml)
00222 {
00223     xmlNodePtr node;
00224 
00225     (*sibxml)->wbxml_version = 0x02; /* WBXML Version number 1.2  */
00226     (*sibxml)->public_id = 0x05; /* SI 1.0 Public ID */
00227     
00228     charset = octstr_create("UTF-8");
00229     (*sibxml)->charset = parse_charset(charset);
00230     octstr_destroy(charset);
00231 
00232     node = xmlDocGetRootElement(document);
00233     return parse_node(node, sibxml);
00234 }
00235 
00236 /*
00237  * Parse an element node. Check if there is a token for an element tag; if not
00238  * output the element as a string, else ouput the token. After that, call 
00239  * attribute parsing functions
00240  * Returns:      1, add an end tag (element node has no children)
00241  *               0, do not add an end tag (it has children)
00242  *              -1, an error occurred
00243  */
00244 static int parse_element(xmlNodePtr node, simple_binary_t **sibxml)
00245 {
00246     Octstr *name,
00247            *outos;
00248     size_t i;
00249     unsigned char status_bits,
00250              si_hex;
00251     int add_end_tag;
00252     xmlAttrPtr attribute;
00253 
00254     name = octstr_create((char *)node->name);
00255     outos = NULL;
00256     if (octstr_len(name) == 0) {
00257         octstr_destroy(name);
00258         return -1;
00259     }
00260 
00261     i = 0;
00262     while (i < NUMBER_OF_ELEMENTS) {
00263         if (octstr_compare(name, octstr_imm(si_elements[i].name)) == 0)
00264             break;
00265         ++i;
00266     }
00267 
00268     status_bits = 0x00;
00269     si_hex = 0x00;
00270     add_end_tag = 0;
00271 
00272     if (i != NUMBER_OF_ELEMENTS) {
00273         si_hex = si_elements[i].token;
00274         if ((status_bits = element_check_content(node)) > 0) {
00275         si_hex = si_hex | status_bits;
00276         
00277         if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
00278             add_end_tag = 1;
00279         }
00280         output_char(si_hex, sibxml);
00281     } else {
00282         warning(0, "unknown tag %s in SI source", octstr_get_cstr(name));
00283         si_hex = WBXML_LITERAL;
00284         if ((status_bits = element_check_content(node)) > 0) {
00285         si_hex = si_hex | status_bits;
00286         /* If this node has children, the end tag must be added after 
00287            them. */
00288         if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
00289         add_end_tag = 1;
00290     }
00291     output_char(si_hex, sibxml);
00292         output_octet_string(outos = octstr_duplicate(name), sibxml);
00293     }
00294 
00295     if (node->properties != NULL) {
00296     attribute = node->properties;
00297     while (attribute != NULL) {
00298         parse_attribute(attribute, sibxml);
00299         attribute = attribute->next;
00300     }
00301     parse_end(sibxml);
00302     }
00303 
00304     octstr_destroy(outos);
00305     octstr_destroy(name);
00306     return add_end_tag;
00307 }
00308 
00309 /*
00310  * Parse a text node of a si document. Ignore empty text nodes (space addi-
00311  * tions to certain points will produce these). Si codes text nodes as an
00312  * inline string.
00313  */
00314 
00315 static int parse_text(xmlNodePtr node, simple_binary_t **sibxml)
00316 {
00317     Octstr *temp;
00318 
00319     temp = create_octstr_from_node((char *)node);
00320 
00321     octstr_shrink_blanks(temp);
00322     octstr_strip_blanks(temp);
00323 
00324     if (octstr_len(temp) == 0) {
00325         octstr_destroy(temp);
00326         return 0;
00327     }
00328 
00329     parse_inline_string(temp, sibxml);    
00330     octstr_destroy(temp);
00331 
00332     return 0;
00333 }
00334 
00335 /*
00336  * Tokenises an attribute, and in most cases, the start of its value (some-
00337  * times whole of it). Tokenisation is based on tables in si, chapters 9.3.2
00338  * and 9.3.3. 
00339  * Returns 0 when success, -1 when error.
00340  */
00341 static int parse_attribute(xmlAttrPtr attr, simple_binary_t **sibxml)
00342 {
00343     Octstr *name,
00344            *value,
00345            *valueos,
00346            *tokenized_date;
00347     unsigned char si_hex;
00348     size_t i,
00349            value_len;
00350 
00351     name = octstr_create((char *)attr->name);
00352 
00353     if (attr->children != NULL)
00354     value = create_octstr_from_node((char *)attr->children);
00355     else 
00356     value = NULL;
00357 
00358     if (value == NULL)
00359         goto error;
00360 
00361     i = 0;
00362     valueos = NULL;
00363     while (i < NUMBER_OF_ATTRIBUTES) {
00364         if (octstr_compare(name, octstr_imm(si_attributes[i].name)) == 0) {
00365         if (si_attributes[i].value_part == NULL) {
00366             break; 
00367             } else {
00368                 value_len = octstr_len(valueos = 
00369                     octstr_imm(si_attributes[i].value_part));
00370             if (octstr_ncompare(value, valueos, value_len) == 0) {
00371             break;
00372                 }
00373             }
00374         }
00375        ++i;
00376     }
00377 
00378     if (i == NUMBER_OF_ATTRIBUTES)
00379         goto error;
00380 
00381     tokenized_date = NULL;
00382     si_hex = si_attributes[i].token;
00383     if (action(si_hex)) {
00384         output_char(si_hex, sibxml);
00385     } else if (url(si_hex)) {
00386         output_char(si_hex, sibxml);
00387         octstr_delete(value, 0, octstr_len(valueos));
00388         parse_url_value(value, sibxml);
00389     } else if (date(si_hex)) {
00390         if ((tokenized_date = tokenize_date(value)) == NULL)
00391             goto error;
00392         output_char(si_hex, sibxml);
00393         output_octet_string(tokenized_date, sibxml);
00394     } else {
00395         output_char(si_hex, sibxml);
00396         parse_inline_string(value, sibxml);
00397     }  
00398 
00399     octstr_destroy(tokenized_date);
00400     octstr_destroy(name);
00401     octstr_destroy(value);
00402     return 0;
00403 
00404 error:
00405     octstr_destroy(name);
00406     octstr_destroy(value);
00407     return -1;
00408 }
00409 
00410 
00411 /*
00412  * checks whether a si attribute value is an URL or some other kind of value. 
00413  * Returns 1 for an URL and 0 otherwise.
00414  */
00415 
00416 static int url(int hex)
00417 {
00418     switch ((unsigned char) hex) {
00419     case 0x0b:            /* href */
00420     case 0x0c: case 0x0e: /* href http://, href https:// */
00421     case 0x0d: case 0x0f: /* href http://www., href https://www. */
00422     return 1;
00423     }
00424     return 0;
00425 }
00426 
00427 /*
00428  * checks whether a si attribute value is an action attribute or some other 
00429  * kind of value. 
00430  * Returns 1 for an action attribute and 0 otherwise.
00431  */
00432 
00433 static int action(int hex)
00434 {
00435     switch ((unsigned char) hex) {
00436     case 0x05: case 0x06: /* action signal-none, action signal-low */
00437     case 0x07: case 0x08: /* action signal-medium, action signal-high */
00438     case 0x09:            /* action delete */
00439     return 1;
00440     }
00441     return 0;
00442 }
00443 
00444 /*
00445  * checks whether a si attribute value is an OSI date or some other kind of 
00446  * value. 
00447  * Returns 1 for an action attribute and 0 otherwise.
00448  */
00449 
00450 static int date(int hex)
00451 {
00452     switch ((unsigned char) hex) {
00453     case 0x0a: case 0x10: /* created, si-expires */
00454     return 1;
00455     }
00456     return 0;
00457 }
00458 
00459 /*
00460  * Tokenises an OSI date. Procedure is defined in si, chapter 9.2.2. Validate
00461  * OSI date as specified in 9.2.1.1. Returns NULL when error, a tokenised date 
00462  * string otherwise.
00463  */
00464 static Octstr *tokenize_date(Octstr *date)
00465 {
00466     Octstr *date_token;
00467     long j;
00468     size_t i,
00469            date_len;
00470     unsigned char c;
00471 
00472     if (!parse_date(date)) {
00473         return NULL;
00474     }
00475 
00476     date_token = octstr_create("");
00477     octstr_append_char(date_token, WBXML_OPAQUE);
00478 
00479     i = 0;
00480     j = 0;
00481     date_len = octstr_len(date);
00482     while (i < date_len) {
00483         c = octstr_get_char(date, i);
00484         if (c != 'T' && c != 'Z' && c != '-' && c != ':') {
00485             if (isdigit(c)) {
00486                 octstr_set_bits(date_token, 4*j + 8, 4, c & 0x0f);
00487                 ++j;
00488             } else {
00489                 octstr_destroy(date_token);
00490                 return NULL;
00491             }
00492         }  
00493         ++i; 
00494     }
00495 
00496     octstr_drop_trailing_zeros(&date_token);
00497     flag_date_length(&date_token);
00498 
00499     return date_token;
00500 }
00501 
00502 static void octstr_drop_trailing_zeros(Octstr **date_token)
00503 {
00504     while (1) {
00505         if (octstr_get_char(*date_token, octstr_len(*date_token) - 1) == '\0')
00506             octstr_delete(*date_token, octstr_len(*date_token) - 1, 1);
00507         else
00508             return;
00509     }
00510 }
00511 
00512 static void flag_date_length(Octstr **token)
00513 {
00514     Octstr *lenos;
00515 
00516     lenos = octstr_format("%c", octstr_len(*token) - 1);
00517     octstr_insert(*token, lenos, 1);
00518 
00519     octstr_destroy(lenos);
00520 }
00521 
00522 /*
00523  * The recursive parsing function for the parsing tree. Function checks the 
00524  * type of the node, calls for the right parse function for the type, then 
00525  * calls itself for the first child of the current node if there's one and 
00526  * after that calls itself for the next child on the list.
00527  */
00528 
00529 static int parse_node(xmlNodePtr node, simple_binary_t **sibxml)
00530 {
00531     int status = 0;
00532     
00533     /* Call for the parser function of the node type. */
00534     switch (node->type) {
00535     case XML_ELEMENT_NODE:
00536     status = parse_element(node, sibxml);
00537     break;
00538     case XML_TEXT_NODE:
00539     status = parse_text(node, sibxml);
00540     break;
00541     case XML_CDATA_SECTION_NODE:
00542     status = parse_cdata(node, sibxml);
00543     break;
00544     case XML_COMMENT_NODE:
00545     case XML_PI_NODE:
00546     /* Comments and PIs are ignored. */
00547     break;
00548     /*
00549      * XML has also many other node types, these are not needed with 
00550      * SI. Therefore they are assumed to be an error.
00551      */
00552     default:
00553     error(0, "SI compiler: Unknown XML node in the SI source.");
00554     return -1;
00555     break;
00556     }
00557 
00558     /* 
00559      * If node is an element with content, it will need an end tag after it's
00560      * children. The status for it is returned by parse_element.
00561      */
00562     switch (status) {
00563     case 0:
00564 
00565     if (node->children != NULL)
00566         if (parse_node(node->children, sibxml) == -1)
00567         return -1;
00568     break;
00569     case 1:
00570     if (node->children != NULL)
00571         if (parse_node(node->children, sibxml) == -1)
00572         return -1;
00573     parse_end(sibxml);
00574     break;
00575 
00576     case -1: /* Something went wrong in the parsing. */
00577     return -1;
00578     default:
00579     warning(0,"SI compiler: undefined return value in a parse function.");
00580     return -1;
00581     break;
00582     }
00583 
00584     if (node->next != NULL)
00585     if (parse_node(node->next, sibxml) == -1)
00586         return -1;
00587 
00588     return 0;
00589 }
00590 
00591 /*
00592  * Cdata section parsing function. Output this "as it is"
00593  */
00594 
00595 static int parse_cdata(xmlNodePtr node, simple_binary_t **sibxml)
00596 {
00597     int ret = 0;
00598     Octstr *temp;
00599 
00600     temp = create_octstr_from_node((char *)node);
00601     parse_octet_string(temp, sibxml);
00602     octstr_destroy(temp);
00603 
00604     return ret;
00605 }
00606 
00607 /*
00608  * In the case of SI documents, only attribute values to be tokenized are
00609  * parts of urls (see si, chapter 9.3.3). The caller romoves the start of an
00610  * url. Check whether we can find parts in the value. If not, parse value a an
00611  * inline string, otherwise parse parts before and after tokenizable parts as
00612  * inline strings.
00613  */
00614 void parse_url_value(Octstr *value, simple_binary_t **sibxml)
00615 {
00616     size_t i;
00617     long pos;
00618     Octstr *urlos,
00619            *first_part,
00620        *last_part;
00621     size_t first_part_len;
00622 
00623     i = 0;
00624     first_part_len = 0;
00625     first_part = NULL;
00626     last_part = NULL;
00627     while (i < NUMBER_OF_URL_VALUES) {
00628         pos = octstr_search(value, 
00629             urlos = octstr_imm(si_URL_values[i].name), 0);
00630         if (pos >= 0) {
00631         first_part = octstr_duplicate(value);
00632             octstr_delete(first_part, pos, octstr_len(first_part) - pos);
00633             first_part_len = octstr_len(first_part);
00634             parse_inline_string(first_part, sibxml);
00635             output_char(si_URL_values[i].token, sibxml);
00636             last_part = octstr_duplicate(value);
00637             octstr_delete(last_part, 0, first_part_len + octstr_len(urlos));
00638             parse_inline_string(last_part, sibxml);
00639         octstr_destroy(first_part);
00640             octstr_destroy(last_part);
00641             break;
00642         }
00643         octstr_destroy(urlos);
00644         ++i;
00645     }
00646 
00647     if (pos < 0) 
00648     parse_inline_string(value, sibxml);
00649         
00650 }
00651 
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.