Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

wml_compiler.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /* 
00058  * wml_compiler.c - compiling WML to WML binary
00059  *
00060  * This is an implemention for WML compiler for compiling the WML text 
00061  * format to WML binary format, which is used for transmitting the 
00062  * decks to the mobile terminal to decrease the use of the bandwidth.
00063  *
00064  *
00065  * Tuomas Luttinen for Wapit Ltd.
00066  */
00067 
00068 #include <time.h>
00069 #include <unistd.h>
00070 #include <sys/types.h>
00071 #include <sys/stat.h>
00072 #include <fcntl.h>
00073 #include <string.h>
00074 #include <math.h>
00075 #include <ctype.h>
00076 
00077 #include <libxml/xmlmemory.h>
00078 #include <libxml/tree.h>
00079 #include <libxml/debugXML.h>
00080 #include <libxml/encoding.h>
00081 #include <libxml/parser.h>
00082 #include <libxml/xmlerror.h>
00083 
00084 #include "gwlib/gwlib.h"
00085 #include "wml_compiler.h"
00086 #include "xml_definitions.h"
00087 
00088 /***********************************************************************
00089  * Declarations of data types. 
00090  * 
00091  * Binary code values are defined by OMNA, see 
00092  * http://www.openmobilealliance.org/tech/omna/omna-wbxml-public-docid.htm
00093  */
00094 
00095 struct wml_externalid_t {
00096     char *string;
00097     unsigned long value;
00098 };
00099 
00100 typedef struct wml_externalid_t wml_externalid_t;
00101 
00102 #define NUMBERED(name, strings) \
00103     static const wml_externalid_t name##_strings[] = { strings };
00104 #define ASSIGN(string, number) { string, number },
00105 #include "wbxml_tokens.def"
00106 
00107 #define NUMBER_OF_WML_EXTERNALID ((long) sizeof(public_ids_strings)/sizeof(public_ids_strings[0]))
00108 
00109 struct wbxml_version_t {
00110     char *string;
00111     char value;
00112 };
00113 
00114 typedef struct wbxml_version_t wbxml_version_t;
00115 
00116 static wbxml_version_t wbxml_version[] = {
00117     { "1.1", 0x01 },
00118     { "1.2", 0x02 },
00119     { "1.3", 0x03 },
00120     { "1.4", 0x04 },
00121     { "1.5", 0x05 }
00122 };
00123 
00124 #define NUMBER_OF_WBXML_VERSION sizeof(wbxml_version)/sizeof(wbxml_version[0])
00125 
00126 
00127 typedef enum { NOESC, ESC, UNESC, FAILED } var_esc_t;
00128 
00129 
00130 /*
00131  * The wml token table node with two fields.
00132  */
00133 
00134 typedef struct {
00135     char *text;
00136     unsigned char token;
00137 } wml_table_t;
00138 
00139 
00140 /*
00141  * The wml token table node with three fields.
00142  */
00143 
00144 typedef struct {
00145     char *text1;
00146     char *text2;
00147     unsigned char token;
00148 } wml_table3_t;
00149 
00150 
00151 /*
00152  * The binary WML structure, that has been passed around between the 
00153  * internal functions. It contains the header fields for wbxml version, 
00154  * the WML public ID and the character set, the length of the string table, 
00155  * the list structure implementing the string table and the octet string 
00156  * containing the encoded WML binary.
00157  */
00158 
00159 typedef struct {
00160     unsigned char wbxml_version;
00161     unsigned long wml_public_id;
00162     unsigned long character_set;
00163     unsigned long string_table_length;
00164     List *string_table;
00165     Octstr *wbxml_string;
00166 } wml_binary_t;
00167 
00168 
00169 /*
00170  * The string table list node.
00171  */
00172 
00173 typedef struct {
00174     unsigned long offset;
00175     Octstr *string;
00176 } string_table_t;
00177 
00178 
00179 /*
00180  * The string table proposal list node.
00181  */
00182 
00183 typedef struct {
00184     int count;
00185     Octstr *string;
00186 } string_table_proposal_t;
00187 
00188 
00189 /*
00190  * The wml hash table node.
00191  */
00192 
00193 typedef struct {
00194     Octstr *item;
00195     unsigned char binary;
00196 } wml_hash_t;
00197 
00198 
00199 /*
00200  * The hash table node for attribute and values.
00201  */
00202 
00203 typedef struct {
00204     Octstr *attribute;
00205     unsigned char binary;
00206     List *value_list;
00207 } wml_attribute_t;
00208 
00209 #include "xml_shared.h"
00210 #include "wml_definitions.h"
00211 
00212 
00213 /***********************************************************************
00214  * Declarations of global variables. 
00215  */
00216 
00217 Dict *wml_elements_dict;
00218 
00219 Dict *wml_attributes_dict;
00220 
00221 List *wml_attr_values_list;
00222 
00223 List *wml_URL_values_list;
00224 
00225 int wml_xml_parser_opt;
00226 
00227 
00228 /***********************************************************************
00229  * Declarations of internal functions. These are defined at the end of
00230  * the file.
00231  */
00232 
00233 
00234 /*
00235  * Parsing functions. These funtions operate on a single node or a 
00236  * smaller datatype. Look for more details on the functions at the 
00237  * definitions.
00238  */
00239 
00240 static int parse_document(xmlDocPtr document, Octstr *charset, 
00241               wml_binary_t **wbxml, Octstr *version);
00242 
00243 static int parse_node(xmlNodePtr node, wml_binary_t **wbxml);
00244 static int parse_element(xmlNodePtr node, wml_binary_t **wbxml);
00245 static int parse_attribute(xmlAttrPtr attr, wml_binary_t **wbxml);
00246 static int parse_attr_value(Octstr *attr_value, List *tokens,
00247                 wml_binary_t **wbxml, int charset, var_esc_t default_esc);
00248 static int parse_text(xmlNodePtr node, wml_binary_t **wbxml);
00249 static int parse_cdata(xmlNodePtr node, wml_binary_t **wbxml);
00250 static int parse_st_octet_string(Octstr *ostr, int cdata, var_esc_t default_esc, wml_binary_t **wbxml);
00251 static void parse_st_end(wml_binary_t **wbxml);
00252 static void parse_entities(Octstr *wml_source);
00253 
00254 /*
00255  * Variable functions. These functions are used to find and parse variables.
00256  */
00257 
00258 static int parse_variable(Octstr *text, int start, var_esc_t default_esc, Octstr **output, 
00259               wml_binary_t **wbxml);
00260 static Octstr *get_variable(Octstr *text, int start);
00261 static var_esc_t check_variable_syntax(Octstr *variable, var_esc_t default_esc);
00262 
00263 
00264 /*
00265  * wml_binary-functions. These are used to create, destroy and modify
00266  * wml_binary_t.
00267  */
00268 
00269 static wml_binary_t *wml_binary_create(void);
00270 static void wml_binary_destroy(wml_binary_t *wbxml);
00271 static void wml_binary_output(Octstr *ostr, wml_binary_t *wbxml);
00272 
00273 /* Output into the wml_binary. */
00274 
00275 static void output_st_char(int byte, wml_binary_t **wbxml);
00276 static void output_st_octet_string(Octstr *ostr, wml_binary_t **wbxml);
00277 static void output_variable(Octstr *variable, Octstr **output, 
00278                 var_esc_t escaped, wml_binary_t **wbxml);
00279 
00280 /*
00281  * Memory allocation and deallocations.
00282  */
00283 
00284 static wml_hash_t *hash_create(char *text, unsigned char token);
00285 static wml_attribute_t *attribute_create(void);
00286 static void attr_dict_construct(wml_table3_t *attributes, Dict *attr_dict);
00287 
00288 static void hash_destroy(void *p);
00289 static void attribute_destroy(void *p);
00290 
00291 /*
00292  * Comparison functions for the hash tables.
00293  */
00294 
00295 static int hash_cmp(void *hash1, void *hash2);
00296 
00297 /*
00298  * Miscellaneous help functions.
00299  */
00300 
00301 static int check_do_elements(xmlNodePtr node);
00302 static var_esc_t check_variable_name(xmlNodePtr node);
00303 static Octstr *get_do_element_name(xmlNodePtr node);
00304 static int check_if_url(int hex);
00305 static int check_if_emphasis(xmlNodePtr node);
00306 
00307 static int wml_table_len(wml_table_t *table);
00308 static int wml_table3_len(wml_table3_t *table);
00309 
00310 /* 
00311  * String table functions, used to add and remove strings into and from the
00312  * string table.
00313  */
00314 
00315 static string_table_t *string_table_create(int offset, Octstr *ostr);
00316 static void string_table_destroy(string_table_t *node);
00317 static string_table_proposal_t *string_table_proposal_create(Octstr *ostr);
00318 static void string_table_proposal_destroy(string_table_proposal_t *node);
00319 static void string_table_build(xmlNodePtr node, wml_binary_t **wbxml);
00320 static void string_table_collect_strings(xmlNodePtr node, List *strings);
00321 static List *string_table_collect_words(List *strings);
00322 static List *string_table_sort_list(List *start);
00323 static List *string_table_add_many(List *sorted, wml_binary_t **wbxml);
00324 static unsigned long string_table_add(Octstr *ostr, wml_binary_t **wbxml);
00325 static void string_table_apply(Octstr *ostr, wml_binary_t **wbxml);
00326 static void string_table_output(Octstr *ostr, wml_binary_t **wbxml);
00327 
00328 
00329 /***********************************************************************
00330  * Generic error message formater for libxml2 related errors
00331  */
00332 
00333 static void xml_error(void)
00334 {
00335     xmlErrorPtr err; 
00336     Octstr *msg;
00337     
00338     /* we should have an error, but be more sensitive */
00339     if ((err = xmlGetLastError()) == NULL)
00340         return;
00341         
00342     /* replace annoying line feeds */    
00343     msg = octstr_format("%s", err->message);
00344     octstr_replace(msg, octstr_imm("\n"), octstr_imm(" "));
00345     error(0,"XML error: code: %d, level: %d, line: %d, %s",
00346           err->code, err->level, err->line, octstr_get_cstr(msg));
00347     octstr_destroy(msg);
00348 }
00349 
00350 
00351 /***********************************************************************
00352  * Implementations of the functions declared in wml_compiler.h.
00353  */
00354 
00355 /*
00356  * The actual compiler function. This operates as interface to the compiler.
00357  * For more information, look wml_compiler.h. 
00358  */
00359 int wml_compile(Octstr *wml_text, Octstr *charset, Octstr **wml_binary,
00360                 Octstr *version)
00361 {
00362     int ret = 0;
00363     size_t size;
00364     xmlDocPtr pDoc = NULL;
00365     char *wml_c_text;
00366     wml_binary_t *wbxml = NULL;
00367 
00368     *wml_binary = octstr_create("");
00369     wbxml = wml_binary_create();
00370 
00371     /* Remove the extra space from start and the end of the WML Document. */
00372     octstr_strip_blanks(wml_text);
00373 
00374     /* Check the WML-code for \0-characters and for WML entities. Fast patch.
00375        -- tuo */
00376     parse_entities(wml_text);
00377 
00378     size = octstr_len(wml_text);
00379     wml_c_text = octstr_get_cstr(wml_text);
00380     
00381     debug("wml_compile",0, "WML: Given charset: %s", octstr_get_cstr(charset));
00382 
00383     if (octstr_search_char(wml_text, '\0', 0) != -1) {    
00384         error(0, "WML compiler: Compiling error: "
00385                  "\\0 character found in the middle of the WML source.");
00386         ret = -1;
00387     } else {
00388         /* 
00389          * An empty octet string for the binary output is created, the wml 
00390          * source is parsed into a parsing tree and the tree is then compiled 
00391          * into binary.
00392          */
00393          
00394         pDoc = xmlReadMemory(wml_c_text, size, NULL, octstr_get_cstr(charset), 
00395                              wml_xml_parser_opt);
00396         
00397         if (pDoc != NULL) {
00398             /* 
00399              * If we have a set internal encoding, then apply this information 
00400              * to the XML parsing tree document for later transcoding ability.
00401              */
00402             if (charset)
00403                 pDoc->charset = xmlParseCharEncoding(octstr_get_cstr(charset));
00404 
00405             ret = parse_document(pDoc, charset, &wbxml, version);
00406             wml_binary_output(*wml_binary, wbxml);
00407         } else {    
00408             error(0, "WML compiler: Compiling error: "
00409                      "libxml2 returned a NULL pointer");
00410             xml_error();
00411             ret = -1;
00412         }
00413     }
00414 
00415     wml_binary_destroy(wbxml);
00416 
00417     if (pDoc) 
00418         xmlFreeDoc(pDoc);
00419 
00420     return ret;
00421 }
00422 
00423 
00424 /*
00425  * Initialization: makes up the hash tables for the compiler.
00426  */
00427 
00428 void wml_init(int wml_xml_strict)
00429 {
00430     int i = 0, len = 0;
00431     wml_hash_t *temp = NULL;
00432     
00433     /* The wml elements into a hash table. */
00434     len = wml_table_len(wml_elements);
00435     wml_elements_dict = dict_create(len, hash_destroy);
00436 
00437     for (i = 0; i < len; i++) {
00438     temp = hash_create(wml_elements[i].text, wml_elements[i].token);
00439     dict_put(wml_elements_dict, temp->item, temp);
00440     }
00441 
00442     /* Attributes. */
00443     len = wml_table3_len(wml_attributes);
00444     wml_attributes_dict = dict_create(len, attribute_destroy);
00445     attr_dict_construct(wml_attributes, wml_attributes_dict);
00446 
00447     /* Attribute values. */
00448     len = wml_table_len(wml_attribute_values);
00449     wml_attr_values_list = gwlist_create();
00450 
00451     for (i = 0; i < len; i++) {
00452     temp = hash_create(wml_attribute_values[i].text, 
00453                wml_attribute_values[i].token);
00454     gwlist_append(wml_attr_values_list, temp);
00455     }
00456 
00457     /* URL values. */
00458     len = wml_table_len(wml_URL_values);
00459     wml_URL_values_list = gwlist_create();
00460 
00461     for (i = 0; i < len; i++) {
00462     temp = hash_create(wml_URL_values[i].text, wml_URL_values[i].token);
00463     gwlist_append(wml_URL_values_list, temp);
00464     }
00465     
00466     /* Strict XML parsing. */
00467     wml_xml_parser_opt = wml_xml_strict ? 
00468             (XML_PARSE_NOERROR | XML_PARSE_NONET) :
00469             (XML_PARSE_RECOVER | XML_PARSE_NOERROR | XML_PARSE_NONET);
00470 }
00471 
00472 
00473 
00474 /*
00475  * Shutdown: Frees the memory allocated by initialization.
00476  */
00477 
00478 void wml_shutdown()
00479 {
00480     dict_destroy(wml_elements_dict);
00481     dict_destroy(wml_attributes_dict);
00482     gwlist_destroy(wml_attr_values_list, hash_destroy);
00483     gwlist_destroy(wml_URL_values_list, hash_destroy);
00484 }
00485 
00486 
00487 
00488 /***********************************************************************
00489  * Internal functions.
00490  */
00491 
00492 
00493 /*
00494  * parse_node - the recursive parsing function for the parsing tree.
00495  * Function checks the type of the node, calls for the right parse 
00496  * function for the type, then calls itself for the first child of
00497  * the current node if there's one and after that calls itself for the 
00498  * next child on the list.
00499  */
00500 
00501 static int parse_node(xmlNodePtr node, wml_binary_t **wbxml)
00502 {
00503     int status = 0;
00504     
00505     /* Call for the parser function of the node type. */
00506     switch (node->type) {
00507     case XML_ELEMENT_NODE:
00508     status = parse_element(node, wbxml);
00509     break;
00510     case XML_TEXT_NODE:
00511     status = parse_text(node, wbxml);
00512     break;
00513     case XML_CDATA_SECTION_NODE:
00514     status = parse_cdata(node, wbxml);
00515     break;
00516     case XML_COMMENT_NODE:
00517     case XML_PI_NODE:
00518     /* Comments and PIs are ignored. */
00519     break;
00520     /*
00521      * XML has also many other node types, these are not needed with 
00522      * WML. Therefore they are assumed to be an error.
00523      */
00524     default:
00525     error(0, "WML compiler: Unknown XML node in the WML source.");
00526     return -1;
00527     break;
00528     }
00529 
00530     /* 
00531      * If node is an element with content, it will need an end tag after it's
00532      * children. The status for it is returned by parse_element.
00533      */
00534     switch (status) {
00535     case 0:
00536 
00537     if (node->children != NULL)
00538         if (parse_node(node->children, wbxml) == -1)
00539         return -1;
00540     break;
00541     case 1:
00542     if (node->children != NULL)
00543         if (parse_node(node->children, wbxml) == -1)
00544         return -1;
00545     parse_st_end(wbxml);
00546     break;
00547 
00548     case -1: /* Something went wrong in the parsing. */
00549     return -1;
00550     default:
00551     error(0,
00552           "WML compiler: undefined return value in a parse function.");
00553     return -1;
00554     break;
00555     }
00556 
00557     if (node->next != NULL)
00558     if (parse_node(node->next, wbxml) == -1)
00559         return -1;
00560 
00561     return 0;
00562 }
00563 
00564 
00565 /*
00566  * parse_document - the parsing function for the document node.
00567  * The function outputs the WBXML version, WML public id and the
00568  * character set values into start of the wbxml.
00569  */
00570 static int parse_document(xmlDocPtr document, Octstr *charset,
00571                           wml_binary_t **wbxml, Octstr *version)
00572 {
00573     xmlNodePtr node;
00574     Octstr *externalID = NULL;
00575     long i;
00576 
00577     if (document == NULL) {
00578         error(0, "WML compiler: XML parsing failed, no parsed document.");
00579         error(0, "Most probably an error in the WML source.");
00580         return -1;
00581     }
00582 
00583     /* Return WBXML version dependent on device given Encoding-Version */
00584     if (version == NULL) {
00585         (*wbxml)->wbxml_version = 0x01; /* WBXML Version number 1.1 */
00586         info(0, "WBXML: No wbxml version given, assuming 1.1");
00587     } else {
00588         for (i = 0; i < NUMBER_OF_WBXML_VERSION; i++) {
00589             if (octstr_compare(version, octstr_imm(wbxml_version[i].string)) == 0) {
00590                 (*wbxml)->wbxml_version = wbxml_version[i].value;
00591                 debug("parse_document",0,"WBXML: Encoding with wbxml version <%s>",
00592                       octstr_get_cstr(version));
00593                 break;
00594             }
00595         }
00596         if (i == NUMBER_OF_WBXML_VERSION) {
00597             (*wbxml)->wbxml_version = 0x01; /* WBXML Version number 1.1 */
00598             warning(0, "WBXML: Unknown wbxml version, assuming 1.1 (<%s> is unknown)",
00599                     octstr_get_cstr(version));
00600         }
00601     }
00602 
00603     /* Return WML Version dependent on xml ExternalID string */
00604     if ((document->intSubset != NULL) && (document->intSubset->ExternalID != NULL))    
00605         externalID = octstr_create((char *)document->intSubset->ExternalID);
00606     if (externalID == NULL) {
00607         (*wbxml)->wml_public_id = 0x04; /* WML 1.1 Public ID */
00608         warning(0, "WBXML: WML without ExternalID, assuming 1.1");
00609     } else {
00610         for (i = 0; i < NUMBER_OF_WML_EXTERNALID; i++) {
00611             if (octstr_compare(externalID, octstr_imm(public_ids_strings[i].string)) == 0) {
00612                 (*wbxml)->wml_public_id = public_ids_strings[i].value;
00613                 debug("parse_document",0,"WBXML: WML with ExternalID <%s>",
00614                       octstr_get_cstr(externalID));
00615                 break;
00616             }
00617         }
00618         if (i == NUMBER_OF_WML_EXTERNALID) {
00619             (*wbxml)->wml_public_id = 0x04; /* WML 1.1 Public ID */
00620             warning(0, "WBXML: WML with unknown ExternalID, assuming 1.1 "
00621                     "(<%s> is unknown)",
00622                     octstr_get_cstr(externalID));
00623         }
00624     }
00625     octstr_destroy(externalID);
00626     
00627     (*wbxml)->string_table_length = 0x00; /* String table length=0 */
00628 
00629     /*
00630      * Make sure we set the charset encoding right. If none is given
00631      * then set UTF-8 as default.
00632      */
00633     (*wbxml)->character_set = charset ? 
00634         parse_charset(charset) : parse_charset(octstr_imm("UTF-8"));
00635 
00636     node = xmlDocGetRootElement(document);
00637     
00638     if (node == NULL) {
00639         error(0, "WML compiler: XML parsing failed, no document root element.");
00640         error(0, "Most probably an error in the WML source.");
00641         xml_error();
00642         return -1;
00643     }
00644     
00645     string_table_build(node, wbxml);
00646 
00647     return parse_node(node, wbxml);
00648 }
00649 
00650 
00651 /*
00652  * parse_element - the parsing function for an element node.
00653  * The element tag is encoded into one octet hexadecimal value, 
00654  * if possible. Otherwise it is encoded as text. If the element 
00655  * needs an end tag, the function returns 1, for no end tag 0
00656  * and -1 for an error.
00657  */
00658 
00659 static int parse_element(xmlNodePtr node, wml_binary_t **wbxml)
00660 {
00661     int add_end_tag = 0;
00662     unsigned char wbxml_hex = 0, status_bits;
00663     xmlAttrPtr attribute;
00664     Octstr *name;
00665     wml_hash_t *element;
00666 
00667     name = octstr_create((char *)node->name);
00668 
00669     /* Check, if the tag can be found from the code page. */
00670     if ((element = dict_get(wml_elements_dict, name)) != NULL) {
00671     wbxml_hex = element->binary;
00672     /* A conformance patch: no do-elements of same name in a card or
00673        template. An extremely ugly patch. --tuo */
00674     if (wbxml_hex == 0x27 || /* Card */
00675         wbxml_hex == 0x3B)   /* Template */
00676         if (check_do_elements(node) == -1) {
00677         add_end_tag = -1;
00678         error(0, "WML compiler: Two or more do elements with same"
00679                  " name in a card or template element.");
00680         }
00681     /* A conformance patch: if variable in setvar has a bad name, it's
00682        ignored. */
00683     if (wbxml_hex == 0x3E) /* Setvar */
00684         if (check_variable_name(node) == FAILED) {
00685         octstr_destroy(name);
00686         return add_end_tag;
00687         }
00688     if ((status_bits = element_check_content(node)) > 0) {
00689         wbxml_hex = wbxml_hex | status_bits;
00690         /* If this node has children, the end tag must be added after 
00691            them. */
00692         if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
00693         add_end_tag = 1;
00694     }
00695     
00696     output_st_char(wbxml_hex, wbxml);
00697     } else {    
00698     /* The tag was not on the code page, it has to be encoded as a 
00699        string. */
00700     wbxml_hex = WBXML_LITERAL;
00701     if ((status_bits = element_check_content(node)) > 0) {
00702         wbxml_hex = wbxml_hex | status_bits;
00703         /* If this node has children, the end tag must be added after 
00704            them. */
00705         if ((status_bits & WBXML_CONTENT_BIT) == WBXML_CONTENT_BIT)
00706         add_end_tag = 1;
00707     }
00708     output_st_char(wbxml_hex, wbxml);
00709     octstr_append_uintvar((*wbxml)->wbxml_string,string_table_add(octstr_duplicate(name), wbxml));
00710     warning(0, "WML compiler: Unknown tag in WML source: <%s>", 
00711         octstr_get_cstr(name));
00712     }
00713 
00714     /* Encode the attribute list for this node and add end tag after the 
00715        list. */
00716 
00717     if(node->properties != NULL) {
00718     attribute = node->properties;
00719     while (attribute != NULL) {
00720         parse_attribute(attribute, wbxml);
00721         attribute = attribute->next;
00722     }
00723     parse_st_end(wbxml);
00724     }
00725 
00726     octstr_destroy(name);
00727     return add_end_tag;
00728 }
00729 
00730 
00731 /*
00732  * parse_attribute - the parsing function for attributes. The function 
00733  * encodes the attribute (and probably start of the value) as a one 
00734  * hexadecimal octet. The value (or the rest of it) is coded as a string 
00735  * maybe using predefined attribute value tokens to reduce the length
00736  * of the output. Returns 0 for success, -1 for error.
00737  */
00738 
00739 static int parse_attribute(xmlAttrPtr attr, wml_binary_t **wbxml)
00740 {
00741     int status = 0;
00742     int coded_length = 0;
00743     unsigned char wbxml_hex = 0x00;
00744     wml_hash_t *hit = NULL;
00745     wml_attribute_t *attribute = NULL;
00746     Octstr *name = NULL, *pattern = NULL, *p = NULL;
00747 
00748     name = octstr_create((char *)attr->name);
00749 
00750     if (attr->children != NULL)
00751     pattern = create_octstr_from_node((char *)attr->children);
00752     else 
00753     pattern = NULL;
00754 
00755     /* Check if the attribute is found on the code page. */
00756 
00757     if ((attribute = dict_get(wml_attributes_dict, name)) != NULL) {
00758     if (attr->children == NULL || 
00759         (hit = gwlist_search(attribute->value_list, (void *)pattern, 
00760                    hash_cmp)) == NULL) {
00761                 if(attribute->binary == 0x00) {
00762                     warning(0, "WML compiler: can't compile attribute %s%s%s%s", 
00763                                octstr_get_cstr(attribute->attribute), 
00764                    (attr->children != NULL ? "=\"": ""), 
00765                    (attr->children != NULL ? octstr_get_cstr(pattern) : ""), 
00766                    (attr->children != NULL ? "\"": ""));
00767                 wbxml_hex = WBXML_LITERAL;
00768                 output_st_char(wbxml_hex, wbxml);
00769                 output_st_char(string_table_add(octstr_duplicate(name), wbxml), wbxml);
00770         } else {
00771             wbxml_hex = attribute->binary;
00772             output_st_char(wbxml_hex, wbxml);
00773         }
00774     } else if (hit->binary) {
00775         wbxml_hex = hit->binary;
00776         coded_length = octstr_len(hit->item);
00777         output_st_char(wbxml_hex, wbxml);
00778     } else
00779         status = -1;
00780     } else {
00781     /* The attribute was not on the code page, it has to be encoded as a 
00782        string. */
00783     wbxml_hex = WBXML_LITERAL;
00784     output_st_char(wbxml_hex, wbxml);
00785     octstr_append_uintvar((*wbxml)->wbxml_string,string_table_add(octstr_duplicate(name), wbxml));
00786     warning(0, "WML compiler: Unknown attribute in WML source: <%s>", 
00787         octstr_get_cstr(name));
00788     }
00789 
00790     if (status >= 0) {
00791     var_esc_t default_esc;
00792 
00793     default_esc = (octstr_str_compare (name, "href") == 0) ? ESC : NOESC;
00794 
00795     /* The rest of the attribute is coded as a inline string. */
00796     if (pattern != NULL && 
00797         coded_length < (int) octstr_len(pattern)) {
00798         if (coded_length == 0)
00799         p = create_octstr_from_node((char *)attr->children); 
00800         else
00801         p = octstr_copy(pattern, coded_length, 
00802                 octstr_len(pattern) - coded_length); 
00803 
00804         if (check_if_url(wbxml_hex))
00805         status = parse_attr_value(p, wml_URL_values_list,
00806                       wbxml, attr->doc->charset, default_esc);
00807         else
00808         status = parse_attr_value(p, wml_attr_values_list,
00809                       wbxml, attr->doc->charset, default_esc);
00810         if (status != 0)
00811         error(0, 
00812               "WML compiler: could not output attribute "
00813               "value as a string.");
00814         octstr_destroy(p);
00815     }
00816     }
00817 
00818     /* Memory cleanup. */
00819     octstr_destroy(name);
00820 
00821     if (pattern != NULL)
00822     octstr_destroy(pattern);
00823 
00824     return status;
00825 }
00826 
00827 
00828 
00829 /*
00830  * parse_attr_value - parses an attributes value using WML value codes.
00831  */
00832 
00833 static int parse_attr_value(Octstr *attr_value, List *tokens,
00834                 wml_binary_t **wbxml, int charset, var_esc_t default_esc)
00835 {
00836     int i, pos, wbxml_hex;
00837     wml_hash_t *temp = NULL;
00838     Octstr *cut_text = NULL;
00839     char *tmp;
00840 
00841     /*
00842      * Beware that libxml2 does internal encoding in UTF-8 while parsing.
00843      * So if our original WML source had a different encoding set, we have
00844      * to transcode at least here. Only transcode if target encoding differs
00845      * from libxml2's internal encoding (UTF-8).
00846      */
00847     tmp = (char*) xmlGetCharEncodingName(charset);
00848     if (charset != XML_CHAR_ENCODING_UTF8 && 
00849         charset_convert(attr_value, "UTF-8", 
00850                         tmp) != 0) {
00851         error(0, "Failed to convert XML attribute value from charset "
00852                  "<%s> to <%s>, will leave as is.", "UTF-8", 
00853                  tmp ? tmp : "(undef)");
00854     }
00855 
00856 
00857     /*
00858      * The attribute value is search for text strings that can be replaced 
00859      * with one byte codes. Note that the algorith is not foolproof; seaching 
00860      * is done in an order and the text before first hit is not checked for 
00861      * those tokens that are after the hit in the order. Most likely it would 
00862      * be waste of time anyway. String table is not used here, since at least 
00863      * Nokia 7110 doesn't seem to understand string table references here.
00864      */
00865 
00866     /* A fast patch to allow reserved names to be variable names. May produce 
00867        a little longer binary at some points. --tuo */
00868     if (octstr_search_char(attr_value, '$', 0) >= 0) {
00869     if (parse_st_octet_string(attr_value, 0, default_esc, wbxml) != 0)
00870         return -1;
00871     } else {
00872 
00873     for (i = 0; i < gwlist_len(tokens); i++) {
00874         temp = gwlist_get(tokens, i);
00875         pos = octstr_search(attr_value, temp->item, 0);
00876         switch (pos) {
00877         case -1:
00878         break;
00879         case 0:
00880         wbxml_hex = temp->binary;
00881         output_st_char(wbxml_hex, wbxml);   
00882         octstr_delete(attr_value, 0, octstr_len(temp->item));   
00883         break;
00884         default:
00885         /* 
00886          *  There is some text before the first hit, that has to 
00887          *  be handled too. 
00888          */
00889         gw_assert(pos <= octstr_len(attr_value));
00890     
00891         cut_text = octstr_copy(attr_value, 0, pos);
00892         if (parse_st_octet_string(cut_text, 0, default_esc, wbxml) != 0)
00893             return -1;
00894         octstr_destroy(cut_text);
00895         
00896         wbxml_hex = temp->binary;
00897         output_st_char(wbxml_hex, wbxml);   
00898 
00899         octstr_delete(attr_value, 0, pos + octstr_len(temp->item));
00900         break;
00901         }
00902     }
00903 
00904     /* 
00905      * If no hits, then the attr_value is handled as a normal text, 
00906      * otherwise the remaining part is searched for other hits too. 
00907      */
00908 
00909     if ((int) octstr_len(attr_value) > 0) {
00910         if (i < gwlist_len(tokens))
00911         parse_attr_value(attr_value, tokens, wbxml, charset, default_esc);
00912         else
00913         if (parse_st_octet_string(attr_value, 0, default_esc, wbxml) != 0)
00914             return -1;
00915     }
00916     }
00917 
00918     return 0;
00919 }
00920 
00921 
00922 
00923 /*
00924  * parse_st_end - adds end tag to an element.
00925  */
00926 
00927 static void parse_st_end(wml_binary_t **wbxml)
00928 {
00929     output_st_char(WBXML_END, wbxml);
00930 }
00931 
00932 
00933 
00934 /*
00935  * parse_text - a text string parsing function.
00936  * This function parses a text node. 
00937  */
00938 
00939 static int parse_text(xmlNodePtr node, wml_binary_t **wbxml)
00940 {
00941     int ret;
00942     Octstr *temp;
00943     char* tmp;
00944 
00945     temp = create_octstr_from_node((char *)node); /* returns string in UTF-8 */
00946 
00947     /*
00948      * Beware that libxml2 does internal encoding in UTF-8 while parsing.
00949      * So if our original WML source had a different encoding set, we have
00950      * to transcode at least here. Only transcode if target encoding differs
00951      * from libxml2's internal encoding (UTF-8).
00952      */
00953     tmp = (char*) xmlGetCharEncodingName(node->doc->charset);
00954     if (node->doc->charset != XML_CHAR_ENCODING_UTF8 && 
00955         charset_convert(temp, "UTF-8", 
00956                         tmp) != 0) {
00957         error(0, "Failed to convert XML text entity from charset "
00958                  "<%s> to <%s>, will leave as is.", "UTF-8", 
00959                  tmp ? tmp : "(undef)");
00960     }
00961 
00962     octstr_shrink_blanks(temp);
00963     if (!check_if_emphasis(node->prev) && !check_if_emphasis(node->next))
00964     octstr_strip_blanks(temp);
00965 
00966     if (octstr_len(temp) == 0)
00967         ret = 0;
00968     else 
00969         ret = parse_st_octet_string(temp, 0, NOESC, wbxml);
00970 
00971     /* Memory cleanup. */
00972     octstr_destroy(temp);
00973 
00974     return ret;
00975 }
00976 
00977 
00978 
00979 /*
00980  * parse_cdata - a cdata section parsing function.
00981  * This function parses a cdata section that is outputted into the binary 
00982  * "as is". 
00983  */
00984 
00985 static int parse_cdata(xmlNodePtr node, wml_binary_t **wbxml)
00986 {
00987     int ret = 0;
00988     Octstr *temp;
00989 
00990     temp = create_octstr_from_node((char *)node);
00991 
00992     parse_st_octet_string(temp, 1, NOESC, wbxml);
00993     
00994     /* Memory cleanup. */
00995     octstr_destroy(temp);
00996 
00997     return ret;
00998 }
00999 
01000 
01001 
01002 /*
01003  * parse_variable - a variable parsing function. 
01004  * Arguments:
01005  * - text: the octet string containing a variable
01006  * - start: the starting position of the variable not including 
01007  *   trailing &
01008  * Returns: lenth of the variable for success, -1 for failure, 0 for 
01009  * variable syntax error, when it will be ignored. 
01010  * Parsed variable is returned as an octet string in Octstr **output.
01011  */
01012 
01013 static int parse_variable(Octstr *text, int start, var_esc_t default_esc, Octstr **output, 
01014               wml_binary_t **wbxml)
01015 {
01016     var_esc_t esc;
01017     int ret;
01018     Octstr *variable;
01019 
01020     variable = get_variable(text, start + 1);
01021     octstr_truncate(*output, 0);
01022 
01023     if (variable == NULL)
01024     return 0;
01025 
01026     if (octstr_get_char(variable, 0) == '$') {
01027     octstr_append_char(*output, '$');
01028     octstr_destroy(variable);
01029     ret = 2;
01030     } else {
01031     if (octstr_get_char(text, start + 1) == '(')
01032         ret = octstr_len(variable) + 3;
01033     else
01034         ret = octstr_len(variable) + 1;
01035 
01036     if ((esc = check_variable_syntax(variable, default_esc)) != FAILED)
01037         output_variable(variable, output, esc, wbxml);
01038     else
01039         octstr_destroy(variable);
01040     }
01041 
01042     return ret;
01043 }
01044 
01045 
01046 
01047 /*
01048  * get_variable - get the variable name from text.
01049  * Octstr *text contains the text with a variable name starting at point 
01050  * int start.
01051  */
01052 
01053 static Octstr *get_variable(Octstr *text, int start)
01054 {
01055     Octstr *var = NULL;
01056     long end;
01057     int ch;
01058 
01059     gw_assert(text != NULL);
01060     gw_assert(start >= 0 && start <= (int) octstr_len(text));
01061 
01062     ch = octstr_get_char(text, start);
01063 
01064     if (ch == '$') {
01065     var = octstr_create("$");
01066     } else if (ch == '(') {
01067     start ++;
01068     end = octstr_search_char(text, ')', start);
01069     if (end == -1)
01070         error(0, "WML compiler: braces opened, but not closed for a "
01071           "variable.");
01072     else if (end - start == 0)
01073         error(0, "WML compiler: empty braces without variable.");
01074     else
01075         var = octstr_copy(text, start, end - start);
01076     } else {
01077     end = start + 1;
01078     while (isalnum(ch = octstr_get_char(text, end)) || (ch == '_'))
01079         end ++;
01080 
01081     var = octstr_copy(text, start, end - start);
01082     }
01083 
01084     return var;
01085 }
01086 
01087 
01088 
01089 /*
01090  * check_variable_syntax - checks the variable syntax and the possible 
01091  * escape mode it has. Octstr *variable contains the variable string.
01092  */
01093 
01094 static var_esc_t check_variable_syntax(Octstr *variable, var_esc_t default_esc)
01095 {
01096     Octstr *escape;
01097     char ch;
01098     int pos, len, i;
01099     var_esc_t ret;
01100 
01101     if ((pos = octstr_search_char(variable, ':', 0)) > 0) {
01102     len = octstr_len(variable) - pos;
01103     escape = octstr_copy(variable, pos + 1, len - 1);
01104     octstr_truncate(variable, pos);
01105     octstr_truncate(escape, len);
01106     octstr_convert_range(escape, 0, octstr_len(escape), tolower);
01107 
01108     if (octstr_str_compare(escape, "noesc") == 0 ||
01109         octstr_str_compare(escape, "n") == 0 )
01110         ret = NOESC;
01111     else if (octstr_str_compare(escape, "unesc") == 0 ||
01112          octstr_str_compare(escape, "u") == 0 )
01113         ret = UNESC;
01114     else if (octstr_str_compare(escape, "escape") == 0 ||
01115          octstr_str_compare(escape, "e") == 0 )
01116         ret = ESC;
01117     else {
01118         error(0, "WML compiler: syntax error in variable escaping.");
01119         octstr_destroy(escape);
01120         return FAILED;
01121     }
01122     octstr_destroy(escape);
01123     } else
01124     ret = default_esc;
01125 
01126     ch = octstr_get_char(variable, 0);
01127     if (!(isalpha((int)ch)) && ch != '_') {
01128     error(0, "WML compiler: syntax error in variable; name starting "
01129           "with %c.", ch);
01130     return FAILED;
01131     } else
01132     for (i = 1; i < (int) octstr_len(variable); i++)
01133         if (!isalnum((int)(ch = octstr_get_char(variable, 0))) && 
01134         ch != '_') {
01135         warning(0, "WML compiler: syntax error in variable.");
01136         return FAILED;
01137         }
01138 
01139     return ret;
01140 }
01141 
01142 
01143 
01144 /*
01145  * parse_st_octet_string - parse an octet string into wbxml_string, the string 
01146  * is checked for variables. If string is string table applicable, it will 
01147  * be checked for string insrtances that are in the string table, otherwise 
01148  * not. Returns 0 for success, -1 for error.
01149  */
01150 
01151 static int parse_st_octet_string(Octstr *ostr, int cdata, var_esc_t default_esc, wml_binary_t **wbxml)
01152 {
01153     Octstr *output, *var, *temp = NULL;
01154     int var_len;
01155     int start = 0, pos = 0, len;
01156 
01157     /* No variables? Ok, let's take the easy way... (CDATA never contains 
01158        variables.) */
01159 
01160     if ((pos = octstr_search_char(ostr, '$', 0)) < 0 || cdata == 1) {
01161     string_table_apply(ostr, wbxml);
01162     return 0;
01163     }
01164 
01165     len = octstr_len(ostr);
01166     output = octstr_create("");
01167     var = octstr_create("");
01168 
01169     while (pos < len) {
01170     if (octstr_get_char(ostr, pos) == '$') {
01171         if (pos > start) {
01172         temp = octstr_copy(ostr, start, pos - start);
01173         octstr_insert(output, temp, octstr_len(output));
01174         octstr_destroy(temp);
01175         }
01176       
01177         if ((var_len = parse_variable(ostr, pos, default_esc, &var, wbxml)) > 0)    {
01178         if (octstr_len(var) > 0) {
01179             if (octstr_get_char(var, 0) == '$')
01180             /*
01181              * No, it's not actually variable, but $-character 
01182              * escaped as "$$". So everything should be packed 
01183              * into one string. 
01184              */
01185             octstr_insert(output, var, octstr_len(output));
01186             else {
01187             /*
01188              * The string is output as a inline string and the 
01189              * variable as a string table variable reference. 
01190              */
01191             if (octstr_len(output) > 0)
01192                 string_table_apply(output, wbxml);
01193             octstr_truncate(output, 0);
01194             output_st_octet_string(var, wbxml);
01195             }
01196             /* Variable had a syntax error, so it's skipped. */
01197         }
01198 
01199         pos = pos + var_len;
01200         start = pos;
01201         } else
01202         return -1;
01203     } else
01204         pos ++;
01205     }
01206 
01207     /* Was there still something after the last variable? */
01208     if (start < pos) {
01209     if (octstr_len(output) == 0) {
01210         octstr_destroy(output);
01211         output = octstr_copy(ostr, start, pos - start);
01212     } else {
01213         temp = octstr_copy(ostr, start, pos - start);
01214         octstr_insert(output, temp, octstr_len(output));
01215         octstr_destroy(temp);
01216     }
01217     }
01218 
01219     if (octstr_len(output) > 0)
01220     string_table_apply(output, wbxml);
01221   
01222     octstr_destroy(output);
01223     octstr_destroy(var);
01224   
01225     return 0;
01226 }
01227 
01228 
01229 
01230 
01231 /*
01232  * parse_entities - replaces WML entites in the WML source with equivalent
01233  * numerical entities. A fast patch for WAP 1.1 compliance.
01234  */
01235 
01236 static void parse_entities(Octstr *wml_source)
01237 {
01238     static char entity_nbsp[] = "&nbsp;";
01239     static char entity_shy[] = "&shy;";
01240     static char nbsp[] = "&#160;";
01241     static char shy[] = "&#173;";
01242     int pos = 0;
01243     Octstr *temp;
01244 
01245     if ((pos = octstr_search(wml_source, octstr_imm(entity_nbsp),
01246                  pos)) >= 0) {
01247     temp = octstr_create(nbsp);
01248     while (pos >= 0) {
01249         octstr_delete(wml_source, pos, strlen(entity_nbsp));
01250         octstr_insert(wml_source, temp, pos);
01251         pos = octstr_search(wml_source, 
01252                 octstr_imm(entity_nbsp), pos);
01253     }
01254     octstr_destroy(temp);
01255     }
01256 
01257     pos = 0;
01258     if ((pos = octstr_search(wml_source, octstr_imm(entity_shy),
01259                  pos)) >= 0) {
01260     temp = octstr_create(shy);
01261     while (pos >= 0) {
01262         octstr_delete(wml_source, pos, strlen(entity_shy));
01263         octstr_insert(wml_source, temp, pos);
01264         pos = octstr_search(wml_source, 
01265                 octstr_imm(entity_shy), pos);
01266     }
01267     octstr_destroy(temp);
01268     }   
01269 }
01270 
01271 
01272 
01273 /*
01274  * wml_binary_create - reserves memory for the wml_binary_t and sets the 
01275  * fields to zeros and NULLs.
01276  */
01277 
01278 static wml_binary_t *wml_binary_create(void)
01279 {
01280     wml_binary_t *wbxml;
01281 
01282     wbxml = gw_malloc(sizeof(wml_binary_t));
01283     wbxml->wbxml_version = 0x00;
01284     wbxml->wml_public_id = 0x00;
01285     wbxml->character_set = 0x00;
01286     wbxml->string_table_length = 0x00;
01287     wbxml->string_table = gwlist_create();
01288     wbxml->wbxml_string = octstr_create("");
01289 
01290     return wbxml;
01291 }
01292 
01293 
01294 
01295 /*
01296  * wml_binary_destroy - frees the memory allocated for the wml_binary_t.
01297  */
01298 
01299 static void wml_binary_destroy(wml_binary_t *wbxml)
01300 {
01301     if (wbxml != NULL) {
01302     gwlist_destroy(wbxml->string_table, NULL);
01303     octstr_destroy(wbxml->wbxml_string);
01304     gw_free(wbxml);
01305     }
01306 }
01307 
01308 
01309 
01310 /*
01311  * wml_binary_output - outputs all the fiels of wml_binary_t into ostr.
01312  */
01313 
01314 static void wml_binary_output(Octstr *ostr, wml_binary_t *wbxml)
01315 {
01316     octstr_append_char(ostr, wbxml->wbxml_version);
01317     octstr_append_uintvar(ostr, wbxml->wml_public_id);
01318     octstr_append_uintvar(ostr, wbxml->character_set);
01319     octstr_append_uintvar(ostr, wbxml->string_table_length);
01320 
01321     if (wbxml->string_table_length > 0)
01322     string_table_output(ostr, &wbxml);
01323 
01324     octstr_insert(ostr, wbxml->wbxml_string, octstr_len(ostr));
01325 }
01326 
01327 
01328 
01329 /*
01330  * output_st_char - output a character into wbxml_string.
01331  * Returns 0 for success, -1 for error.
01332  */
01333 
01334 static void output_st_char(int byte, wml_binary_t **wbxml)
01335 {
01336     octstr_append_char((*wbxml)->wbxml_string, byte);
01337 }
01338 
01339 
01340 
01341 /*
01342  * output_st_octet_string - output an octet string into wbxml.
01343  * Returns 0 for success, -1 for an error. No conversions.
01344  */
01345 
01346 static void output_st_octet_string(Octstr *ostr, wml_binary_t **wbxml)
01347 {
01348     octstr_insert((*wbxml)->wbxml_string, ostr, 
01349           octstr_len((*wbxml)->wbxml_string));
01350 }
01351 
01352 
01353 
01354 /*
01355  * output_variable - output a variable reference into the string table.
01356  */
01357 
01358 static void output_variable(Octstr *variable, Octstr **output, 
01359                 var_esc_t escaped, wml_binary_t **wbxml)
01360 {
01361   switch (escaped)
01362     {
01363     case ESC:
01364       octstr_append_char(*output, WBXML_EXT_T_0);
01365       break;
01366     case UNESC:
01367       octstr_append_char(*output, WBXML_EXT_T_1);
01368       break;
01369     default:
01370       octstr_append_char(*output, WBXML_EXT_T_2);
01371       break;
01372     }
01373 
01374   octstr_append_uintvar(*output, string_table_add(variable, wbxml));
01375 }
01376 
01377 
01378 
01379 /*
01380  * hash_create - allocates memory for a 2 field hash table node.
01381  */
01382 
01383 static wml_hash_t *hash_create(char *text, unsigned char token)
01384 {
01385     wml_hash_t *table_node;
01386 
01387     table_node = gw_malloc(sizeof(wml_hash_t));
01388     table_node->item = octstr_create(text);
01389     table_node->binary = token;
01390 
01391     return table_node;
01392 }
01393 
01394 
01395 
01396 /*
01397  * attribute_create - allocates memory for the attributes hash table node 
01398  * that contains the attribute, the binary for it and a list of binary values
01399  * tied with the attribute.
01400  */
01401 
01402 static wml_attribute_t *attribute_create(void)
01403 {
01404     wml_attribute_t *attr;
01405 
01406     attr = gw_malloc(sizeof(wml_attribute_t));
01407     attr->attribute = NULL;
01408     attr->binary = 0;
01409     attr->value_list = gwlist_create();
01410 
01411     return attr;
01412 }
01413 
01414 
01415 
01416 /*
01417  * attr_dict_construct - takes a table of attributes and their values and 
01418  * inputs these into a dictionary. 
01419  */
01420 
01421 static void attr_dict_construct(wml_table3_t *attributes, Dict *attr_dict)
01422 {
01423     int i = 0;
01424     wml_attribute_t *node = NULL;
01425     wml_hash_t *temp = NULL;
01426 
01427     node = attribute_create();
01428 
01429     do {
01430     if (node->attribute == NULL)
01431         node->attribute = octstr_create(attributes[i].text1);
01432     else if (strcmp(attributes[i].text1, attributes[i-1].text1) != 0) {
01433         dict_put(attr_dict, node->attribute, node);
01434         node = attribute_create();
01435         node->attribute = octstr_create(attributes[i].text1);
01436     }
01437 
01438     if (attributes[i].text2 == NULL)
01439         node->binary = attributes[i].token;
01440     else {
01441         temp = hash_create(attributes[i].text2, attributes[i].token);
01442         gwlist_append(node->value_list, (void *)temp);
01443     }   
01444     i++;
01445     } while (attributes[i].text1 != NULL);
01446 
01447     dict_put(attr_dict, node->attribute, node);
01448 }
01449 
01450 
01451 
01452 /*
01453  * hash_destroy - deallocates memory of a 2 field hash table node.
01454  */
01455 
01456 static void hash_destroy(void *p)
01457 {
01458     wml_hash_t *node;
01459 
01460     if (p == NULL)
01461         return;
01462 
01463     node = p;
01464 
01465     octstr_destroy(node->item);
01466     gw_free(node);
01467 }
01468 
01469 
01470 
01471 /*
01472  * attribute_destroy - deallocates memory of a attribute hash table node.
01473  */
01474 
01475 static void attribute_destroy(void *p)
01476 {
01477     wml_attribute_t *node;
01478 
01479     if (p == NULL)
01480     return;
01481 
01482     node = p;
01483 
01484     octstr_destroy(node->attribute);
01485     gwlist_destroy(node->value_list, hash_destroy);
01486     gw_free(node);
01487 }
01488 
01489 
01490 
01491 /*
01492  * hash_cmp - compares pattern against item and if the pattern matches the 
01493  * item returns 1, else 0.
01494  */
01495 
01496 static int hash_cmp(void *item, void *pattern)
01497 {
01498     int ret = 0;
01499 
01500     gw_assert(item != NULL && pattern != NULL);
01501     gw_assert(((wml_hash_t *)item)->item != NULL);
01502 
01503     if (octstr_search(pattern, ((wml_hash_t *)item)->item, 0) == 0)
01504     ret = 1;
01505 
01506     return ret;
01507 }
01508 
01509 
01510 /*
01511  * check_do_elements - a helper function for parse_element for checking if a
01512  * card or template element has two or more do elements of the same name. 
01513  * Returns 0 for OK and -1 for an error (== do elements with same name found).
01514  */
01515 
01516 static int check_do_elements(xmlNodePtr node)
01517 {
01518     xmlNodePtr child;
01519     int i, status = 0;
01520     Octstr *name = NULL;
01521     List *name_list = NULL;
01522     
01523     name_list = gwlist_create();
01524 
01525     if ((child = node->children) != NULL) {
01526         while (child != NULL) {
01527             if (child->name && strcmp((char *)child->name, "do") == 0) {
01528                 name = get_do_element_name(child);
01529 
01530                 if (name == NULL) {
01531                     error(0, "WML compiler: no name or type in a do element");
01532                     return -1;
01533                 }
01534 
01535                 for (i = 0; i < gwlist_len(name_list); i ++)
01536                     if (octstr_compare(gwlist_get(name_list, i), name) == 0) {
01537                         octstr_destroy(name);
01538                         status = -1;
01539                         break;
01540                     }
01541                 if (status != -1)
01542                     gwlist_append(name_list, name);
01543                 else
01544                     break;
01545             }
01546             child = child->next;
01547         }
01548     }
01549 
01550     gwlist_destroy(name_list, octstr_destroy_item);
01551 
01552     return status;
01553 }
01554 
01555 
01556 
01557 /*
01558  * check_variable_name - checks the name for variable in a setvar element.
01559  * If the name has syntax error, -1 is returned, else 0.
01560  */
01561 
01562 static var_esc_t check_variable_name(xmlNodePtr node)
01563 {
01564     Octstr *name = NULL;
01565     xmlAttrPtr attr; 
01566     var_esc_t ret = FAILED;
01567 
01568     if ((attr = node->properties) != NULL) {
01569         while (attr != NULL) {
01570             if (attr->name && strcmp((char *)attr->name, "name") == 0) {
01571                 name = create_octstr_from_node((char *)attr->children);
01572                 break;
01573             }
01574             attr = attr->next;
01575         }
01576     }
01577 
01578     if (attr == NULL) {
01579         error(0, "WML compiler: no name in a setvar element");
01580         return FAILED;
01581     }
01582 
01583     ret = check_variable_syntax(name, NOESC);
01584     octstr_destroy(name);
01585     
01586     return ret;
01587 }
01588 
01589 
01590 
01591 /*
01592  * get_do_element_name - returns the name for a do element. Name is either 
01593  * name when the element has the attribute or defaults to the type attribute 
01594  * if there is no name.
01595  */
01596 
01597 static Octstr *get_do_element_name(xmlNodePtr node)
01598 {
01599     Octstr *name = NULL;
01600     xmlAttrPtr attr; 
01601 
01602     if ((attr = node->properties) != NULL) {
01603         while (attr != NULL) {
01604             if (attr->name && strcmp((char *)attr->name, "name") == 0) {
01605                 name = create_octstr_from_node((char *)attr->children);
01606                 break;
01607             }
01608             attr = attr->next;
01609         }
01610 
01611         if (attr == NULL) {
01612             attr = node->properties;
01613             while (attr != NULL) {
01614                 if (attr->name && strcmp((char *)attr->name, "type") == 0) {
01615                     name = create_octstr_from_node((char *)attr->children);
01616                     break;
01617                 }
01618                 attr = attr->next;
01619             }
01620         }
01621     }
01622 
01623     return name;
01624 }
01625 
01626 
01627 
01628 /*
01629  * check_if_url - checks whether the attribute value is an URL or some other 
01630  * kind of value. Returns 1 for an URL and 0 otherwise.
01631  */
01632 
01633 static int check_if_url(int hex)
01634 {
01635     switch ((unsigned char) hex) {
01636     case 0x4A: case 0x4B: case 0x4C: /* href, href http://, href https:// */
01637     case 0x32: case 0x58: case 0x59: /* src, src http://, src https:// */
01638     return 1;
01639     break;
01640     }
01641     return 0;
01642 }
01643 
01644 
01645 
01646 /*
01647  * check_if_emphasis - checks if the node is an emphasis element. 
01648  * Returns 1 for an emphasis and 0 otherwise.
01649  */
01650 
01651 static int check_if_emphasis(xmlNodePtr node)
01652 {
01653     if (node == NULL || node->name == NULL)
01654     return 0;
01655 
01656     if (strcmp((char *)node->name, "b") == 0)
01657     return 1;
01658     if (strcmp((char *)node->name, "big") == 0)
01659     return 1;
01660     if (strcmp((char *)node->name, "em") == 0)
01661     return 1;
01662     if (strcmp((char *)node->name, "i") == 0)
01663     return 1;
01664     if (strcmp((char *)node->name, "small") == 0)
01665     return 1;
01666     if (strcmp((char *)node->name, "strong") == 0)
01667     return 1;
01668     if (strcmp((char *)node->name, "u") == 0)
01669     return 1;
01670 
01671     return 0;
01672 }
01673 
01674 
01675 /*
01676  * wml_table_len - returns the length of a wml_table_t array.
01677  */
01678 
01679 static int wml_table_len(wml_table_t *table)
01680 {
01681     int i = 0;
01682 
01683     while (table[i].text != NULL)
01684     i++;
01685 
01686     return i;
01687 }
01688 
01689 
01690 
01691 /*
01692  * wml_table3_len - returns the length of a wml_table3_t array.
01693  */
01694 
01695 static int wml_table3_len(wml_table3_t *table)
01696 {
01697     int i = 0;
01698 
01699     while (table[i].text1 != NULL)
01700     i++;
01701 
01702     return i;
01703 }
01704 
01705 
01706 
01707 /*
01708  * string_table_create - reserves memory for the string_table_t and sets the 
01709  * fields.
01710  */
01711 
01712 static string_table_t *string_table_create(int offset, Octstr *ostr)
01713 {
01714     string_table_t *node;
01715 
01716     node = gw_malloc(sizeof(string_table_t));
01717     node->offset = offset;
01718     node->string = ostr;
01719 
01720     return node;
01721 }
01722 
01723 
01724 
01725 /*
01726  * string_table_destroy - frees the memory allocated for the string_table_t.
01727  */
01728 
01729 static void string_table_destroy(string_table_t *node)
01730 {
01731     if (node != NULL) {
01732     octstr_destroy(node->string);
01733     gw_free(node);
01734     }
01735 }
01736 
01737 
01738 
01739 /*
01740  * string_table_proposal_create - reserves memory for the 
01741  * string_table_proposal_t and sets the fields.
01742  */
01743 
01744 static string_table_proposal_t *string_table_proposal_create(Octstr *ostr)
01745 {
01746     string_table_proposal_t *node;
01747 
01748     node = gw_malloc(sizeof(string_table_proposal_t));
01749     node->count = 1;
01750     node->string = ostr;
01751 
01752     return node;
01753 }
01754 
01755 
01756 
01757 /*
01758  * string_table_proposal_destroy - frees the memory allocated for the 
01759  * string_table_proposal_t.
01760  */
01761 
01762 static void string_table_proposal_destroy(string_table_proposal_t *node)
01763 {
01764     if (node != NULL) {
01765     octstr_destroy(node->string);
01766     gw_free(node);
01767     }
01768 }
01769 
01770 
01771 
01772 /*
01773  * string_table_build - collects the strings from the WML source into a list, 
01774  * adds those strings that appear more than once into string table. The rest 
01775  * of the strings are sliced into words and the same procedure is executed to 
01776  * the list of these words.
01777  */
01778 
01779 static void string_table_build(xmlNodePtr node, wml_binary_t **wbxml)
01780 {
01781     string_table_proposal_t *item = NULL;
01782     List *list = NULL;
01783 
01784     list = gwlist_create();
01785 
01786     string_table_collect_strings(node, list);
01787 
01788     list = string_table_add_many(string_table_sort_list(list), wbxml);
01789 
01790     list =  string_table_collect_words(list);
01791 
01792     /* Don't add strings if there aren't any. (no NULLs please) */
01793     if (list) {
01794     list = string_table_add_many(string_table_sort_list(list), wbxml);
01795     }
01796 
01797     /* Memory cleanup. */
01798     while (gwlist_len(list)) {
01799     item = gwlist_extract_first(list);
01800     string_table_proposal_destroy(item);
01801     }
01802 
01803     gwlist_destroy(list, NULL);
01804 }
01805 
01806 
01807 
01808 /*
01809  * string_table_collect_strings - collects the strings from the WML 
01810  * ocument into a list that is then further processed to build the 
01811  * string table for the document.
01812  */
01813 
01814 static void string_table_collect_strings(xmlNodePtr node, List *strings)
01815 {
01816     Octstr *string;
01817     xmlAttrPtr attribute;
01818 
01819     switch (node->type) {
01820     case XML_TEXT_NODE:
01821     string = create_octstr_from_node((char *)node);
01822         
01823     octstr_shrink_blanks(string);
01824     octstr_strip_blanks(string);
01825     if (octstr_len(string) > WBXML_STRING_TABLE_MIN)
01826         octstr_strip_nonalphanums(string);
01827 
01828     if (octstr_len(string) > WBXML_STRING_TABLE_MIN)
01829         gwlist_append(strings, string);
01830     else 
01831         octstr_destroy(string);
01832     break;
01833     case XML_ELEMENT_NODE:
01834     if(node->properties != NULL) {
01835         attribute = node->properties;
01836         while (attribute != NULL) {
01837         if (attribute->children != NULL)
01838             string_table_collect_strings(attribute->children, strings);
01839         attribute = attribute->next;
01840         }
01841     }
01842     break;
01843     default:
01844     break;
01845     }
01846 
01847     if (node->children != NULL)
01848     string_table_collect_strings(node->children, strings);
01849 
01850     if (node->next != NULL)
01851     string_table_collect_strings(node->next, strings);
01852 }
01853 
01854 
01855 
01856 /*
01857  * string_table_sort_list - takes a list of octet strings and returns a list
01858  * of string_table_proposal_t:s that contains the same strings with number of 
01859  * instants of every string in the input list.
01860  */
01861 
01862 static List *string_table_sort_list(List *start)
01863 {
01864     int i;
01865     Octstr *string = NULL;
01866     string_table_proposal_t *item = NULL;
01867     List *sorted = NULL;
01868 
01869     sorted = gwlist_create();
01870 
01871     while (gwlist_len(start)) {
01872     string = gwlist_extract_first(start);
01873       
01874     /* Check whether the string is unique. */
01875     for (i = 0; i < gwlist_len(sorted); i++) {
01876         item = gwlist_get(sorted, i);
01877         if (octstr_compare(item->string, string) == 0) {
01878         octstr_destroy(string);
01879         string = NULL;
01880         item->count ++;
01881         break;
01882         }
01883     }
01884     
01885     if (string != NULL) {
01886         item = string_table_proposal_create(string);
01887         gwlist_append(sorted, item);
01888     }
01889     }
01890 
01891     gwlist_destroy(start, NULL);
01892 
01893     return sorted;
01894 }
01895 
01896 
01897 
01898 /*
01899  * string_table_add_many - takes a list of string with number of instants and
01900  * adds those whose number is greater than 1 into the string table. Returns 
01901  * the list ofrejected strings for memory cleanup.
01902  */
01903 
01904 static List *string_table_add_many(List *sorted, wml_binary_t **wbxml)
01905 {
01906     string_table_proposal_t *item = NULL;
01907     List *list = NULL;
01908 
01909     list = gwlist_create();
01910 
01911     while (gwlist_len(sorted)) {
01912     item = gwlist_extract_first(sorted);
01913 
01914     if (item->count > 1 && octstr_len(item->string) > 
01915         WBXML_STRING_TABLE_MIN) {
01916         string_table_add(octstr_duplicate(item->string), wbxml);
01917         string_table_proposal_destroy(item);
01918     } else
01919         gwlist_append(list, item);
01920     }
01921 
01922     gwlist_destroy(sorted, NULL);
01923 
01924     return list;
01925 }
01926 
01927 
01928 
01929 /*
01930  * string_table_collect_words - takes a list of strings and returns a list 
01931  * of words contained by those strings.
01932  */
01933 
01934 static List *string_table_collect_words(List *strings)
01935 {
01936     Octstr *word = NULL;
01937     string_table_proposal_t *item = NULL;
01938     List *list = NULL, *temp_list = NULL;
01939 
01940     while (gwlist_len(strings)) {
01941     item = gwlist_extract_first(strings);
01942 
01943     if (list == NULL) {
01944         list = octstr_split_words(item->string);
01945         string_table_proposal_destroy(item);
01946     } else {
01947         temp_list = octstr_split_words(item->string);
01948 
01949         while ((word = gwlist_extract_first(temp_list)) != NULL)
01950         gwlist_append(list, word);
01951 
01952         gwlist_destroy(temp_list, NULL);
01953         string_table_proposal_destroy(item);
01954     }
01955     }
01956 
01957     gwlist_destroy(strings, NULL);
01958 
01959     return list;
01960 }
01961 
01962 
01963 
01964 /*
01965  * string_table_add - adds a string to the string table. Duplicates are
01966  * discarded. The function returns the offset of the string in the 
01967  * string table; if the string is already in the table then the offset 
01968  * of the first copy.
01969  */
01970 
01971 static unsigned long string_table_add(Octstr *ostr, wml_binary_t **wbxml)
01972 {
01973     string_table_t *item = NULL;
01974     unsigned long i, offset = 0;
01975 
01976     /* Check whether the string is unique. */
01977     for (i = 0; i < (unsigned long)gwlist_len((*wbxml)->string_table); i++) {
01978     item = gwlist_get((*wbxml)->string_table, i);
01979     if (octstr_compare(item->string, ostr) == 0) {
01980         octstr_destroy(ostr);
01981         return item->offset;
01982     }
01983     }
01984 
01985     /* Create a new list item for the string table. */
01986     offset = (*wbxml)->string_table_length;
01987 
01988     item = string_table_create(offset, ostr);
01989 
01990     (*wbxml)->string_table_length = 
01991     (*wbxml)->string_table_length + octstr_len(ostr) + 1;
01992     gwlist_append((*wbxml)->string_table, item);
01993 
01994     return offset;
01995 }
01996 
01997 
01998 
01999 /*
02000  * string_table_apply - takes a octet string of WML bnary and goes it 
02001  * through searching for substrings that are in the string table and 
02002  * replaces them with string table references.
02003  */
02004 
02005 static void string_table_apply(Octstr *ostr, wml_binary_t **wbxml)
02006 {
02007     Octstr *input = NULL;
02008     string_table_t *item = NULL;
02009     long i = 0, word_s = 0, str_e = 0;
02010 
02011     input = octstr_create("");
02012 
02013     for (i = 0; i < gwlist_len((*wbxml)->string_table); i++) {
02014     item = gwlist_get((*wbxml)->string_table, i);
02015 
02016     if (octstr_len(item->string) > WBXML_STRING_TABLE_MIN)
02017         /* No use to replace 1 to 3 character substring, the reference 
02018            will eat the saving up. A variable will be in the string table 
02019            even though it's only 1 character long. */
02020         if ((word_s = octstr_search(ostr, item->string, 0)) >= 0) {
02021         /* Check whether the octet string are equal if they are equal 
02022            in length. */
02023         if (octstr_len(ostr) == octstr_len(item->string)) {
02024             if ((word_s = octstr_compare(ostr, item->string)) == 0)
02025             {
02026             octstr_truncate(ostr, 0);
02027             octstr_append_char(ostr, WBXML_STR_T);
02028             octstr_append_uintvar(ostr, item->offset);
02029             str_e = 1;
02030             }
02031         }
02032         /* Check the possible substrings. */
02033         else if (octstr_len(ostr) > octstr_len(item->string))
02034         {
02035             if (word_s + octstr_len(item->string) == octstr_len(ostr))
02036             str_e = 1;
02037 
02038             octstr_delete(ostr, word_s, octstr_len(item->string));
02039 
02040             octstr_truncate(input, 0);
02041             /* Substring in the start? No STR_END then. */
02042             if (word_s > 0)
02043             octstr_append_char(input, WBXML_STR_END);
02044                   
02045             octstr_append_char(input, WBXML_STR_T);
02046             octstr_append_uintvar(input, item->offset);
02047 
02048             /* Subtring the end? No need to start a new one. */
02049             if ( word_s < octstr_len(ostr))
02050             octstr_append_char(input, WBXML_STR_I);
02051 
02052             octstr_insert(ostr, input, word_s);
02053         }
02054         /* If te string table entry is longer than the string, it can 
02055            be skipped. */
02056         }
02057     }
02058 
02059     octstr_destroy(input);
02060 
02061     if (octstr_get_char(ostr, 0) != WBXML_STR_T)
02062     output_st_char(WBXML_STR_I, wbxml);
02063     if (!str_e)
02064     octstr_append_char(ostr, WBXML_STR_END);    
02065 
02066     output_st_octet_string(ostr, wbxml);
02067 }
02068 
02069 
02070 
02071 /*
02072  * string_table_output - writes the contents of the string table 
02073  * into an octet string that is sent to the phone.
02074  */
02075 
02076 static void string_table_output(Octstr *ostr, wml_binary_t **wbxml)
02077 {
02078     string_table_t *item;
02079 
02080     while ((item = gwlist_extract_first((*wbxml)->string_table)) != NULL) {
02081     octstr_insert(ostr, item->string, octstr_len(ostr));
02082     octstr_append_char(ostr, WBXML_STR_END);
02083     string_table_destroy(item);
02084     }
02085 }
02086 
02087 
02088 
02089 
02090 
02091 
02092 
02093 
02094 
02095 
02096 
02097 
02098 
02099 
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.