Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

xml_shared.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * xml_shared.c: Common functions of xml compilers (mainly charset handling 
00059  * and operations with wbxml binary not using a string table)
00060  *
00061  * By Tuomas Luttinen & Aarno Syvänen (for Wiral Ltd) 
00062  */
00063 
00064 #include <ctype.h>
00065 
00066 #include "xml_shared.h"
00067 #include "xml_definitions.h"
00068 
00069 #include <string.h>
00070 
00071 struct charset_t {
00072     char *charset; 
00073     char *nro;
00074     unsigned int MIBenum;
00075 };
00076 
00077 charset_t character_sets[] = {
00078     { "ISO", "8859-1", 4 },
00079     { "ISO", "8859-2", 5 },
00080     { "ISO", "8859-3", 6 },
00081     { "ISO", "8859-4", 7 },
00082     { "ISO", "8859-5", 8 },
00083     { "ISO", "8859-6", 9 },
00084     { "ISO", "8859-7", 10 },
00085     { "ISO", "8859-8", 11 },
00086     { "ISO", "8859-9", 12 },
00087     { "WINDOWS", "1250", 2250 },
00088     { "WINDOWS", "1251", 2251 },
00089     { "WINDOWS", "1252", 2252 },
00090     { "WINDOWS", "1253", 2253 },
00091     { "WINDOWS", "1254", 2254 },
00092     { "WINDOWS", "1255", 2255 },
00093     { "WINDOWS", "1256", 2256 },
00094     { "WINDOWS", "1257", 2257 },
00095     { "WINDOWS", "1258", 2258 },
00096     { "UTF", "8", 106 },
00097     { NULL }
00098 };
00099 
00100 /**************************************************************************** 
00101  *
00102  * Implementation of external functions
00103  */
00104 
00105 
00106 /*
00107  * set_charset - if xml doesn't have an <?xml..encoding=something>, 
00108  * converts body from argument charset to UTF-8
00109  */
00110 
00111 void set_charset(Octstr *document, Octstr *charset)
00112 {
00113     long gt = 0, enc = 0;
00114     Octstr *encoding = NULL, *text = NULL, *temp = NULL;
00115 
00116     if (octstr_len(charset) == 0)
00117         return;
00118 
00119     encoding = octstr_create(" encoding");
00120     enc = octstr_search(document, encoding, 0);
00121     gt = octstr_search_char(document, '>', 0);
00122 
00123     if (enc < 0 || enc > gt) {
00124         gt++;
00125         text = octstr_copy(document, gt, octstr_len(document) - gt);
00126         if (charset_to_utf8(text, &temp, charset) >= 0) {
00127             octstr_delete(document, gt, octstr_len(document) - gt);
00128             octstr_append_data(document, octstr_get_cstr(temp), 
00129                                octstr_len(temp));
00130         }
00131 
00132         octstr_destroy(temp);
00133         octstr_destroy(text);
00134     }
00135 
00136     octstr_destroy(encoding);
00137 }
00138 
00139 
00140 /*
00141  * find_charset_encoding -- parses for a encoding argument within
00142  * the xml preabmle, ie. <?xml verion="xxx" encoding="ISO-8859-1"?> 
00143  */
00144 
00145 Octstr *find_charset_encoding(Octstr *document)
00146 {
00147     long gt = 0, enc = 0;
00148     Octstr *encoding = NULL, *temp = NULL;
00149 
00150     enc = octstr_search(document, octstr_imm(" encoding="), 0);
00151     gt = octstr_search(document, octstr_imm("?>"), 0);
00152 
00153     /* in case there is no encoding argument, assume always UTF-8 */
00154     if (enc < 0 || enc + 10 > gt)
00155         return NULL;
00156 
00157     temp = octstr_copy(document, enc + 10, gt - (enc + 10));
00158     octstr_strip_blanks(temp);
00159     encoding = octstr_copy(temp, 1, octstr_len(temp) - 2);
00160     octstr_destroy(temp);
00161 
00162     return encoding;
00163 }
00164 
00165 
00166 /*
00167  * only_blanks - checks if a text node contains only white space, when it can 
00168  * be left out as a element content.
00169  */
00170 
00171 int only_blanks(const char *text)
00172 {
00173     int blank = 1;
00174     int j=0;
00175     int len = strlen(text);
00176 
00177     while ((j<len) && blank) {
00178     blank = blank && isspace((int)text[j]);
00179     j++;
00180     }
00181  
00182     return blank;
00183 }
00184 
00185 /*
00186  * Parses the character set of the document. 
00187  */
00188 
00189 int parse_charset(Octstr *os)
00190 {
00191     Octstr *charset = NULL;
00192     Octstr *number = NULL;
00193     int i, j, cut = 0, ret = 0;
00194 
00195     gw_assert(os != NULL);
00196     charset = octstr_duplicate(os);
00197     
00198     /* The charset might be in lower case, so... */
00199     octstr_convert_range(charset, 0, octstr_len(charset), toupper);
00200 
00201     /*
00202      * The character set is handled in two parts to make things easier. 
00203      * The cutting.
00204      */
00205     if ((cut = octstr_search_char(charset, '_', 0)) > 0) {
00206         number = octstr_copy(charset, cut + 1, (octstr_len(charset) - (cut + 1)));
00207         octstr_truncate(charset, cut);
00208     } 
00209     else if ((cut = octstr_search_char(charset, '-', 0)) > 0) {
00210         number = octstr_copy(charset, cut + 1, (octstr_len(charset) - (cut + 1)));
00211         octstr_truncate(charset, cut);
00212     }
00213 
00214     /* And table search. */
00215     for (i = 0; character_sets[i].charset != NULL; i++)
00216         if (octstr_str_compare(charset, character_sets[i].charset) == 0) {
00217             for (j = i; octstr_str_compare(charset, 
00218                                            character_sets[j].charset) == 0; j++)
00219                 if (octstr_str_compare(number, character_sets[j].nro) == 0) {
00220                     ret = character_sets[j].MIBenum;
00221                     break;
00222                 }
00223             break;
00224         }
00225 
00226     /* UTF-8 is the default value */
00227     if (character_sets[i].charset == NULL)
00228         ret = character_sets[i-1].MIBenum;
00229 
00230     octstr_destroy(number);
00231     octstr_destroy(charset);
00232 
00233     return ret;
00234 }
00235 
00236 /*
00237  * element_check_content - a helper function for parse_element for checking 
00238  * if an element has content or attributes. Returns status bit for attributes 
00239  * (0x80) and another for content (0x40) added into one octet.
00240  */
00241 
00242 unsigned char element_check_content(xmlNodePtr node)
00243 {
00244     unsigned char status_bits = 0x00;
00245 
00246     if ((node->children != NULL) && 
00247     !((node->children->next == NULL) && 
00248       (node->children->type == XML_TEXT_NODE) && 
00249       (only_blanks((char *)node->children->content))))
00250     status_bits = WBXML_CONTENT_BIT;
00251 
00252     if (node->properties != NULL)
00253     status_bits = status_bits | WBXML_ATTR_BIT;
00254 
00255     return status_bits;
00256 }
00257 
00258 /*
00259  * Return the character sets supported by the WML compiler, as a List
00260  * of Octstrs, where each string is the MIME identifier for one charset.
00261  */
00262 List *wml_charsets(void)
00263 {
00264     int i;
00265     List *result;
00266     Octstr *charset;
00267 
00268     result = gwlist_create();
00269     for (i = 0; character_sets[i].charset != NULL; i++) {
00270          charset = octstr_create(character_sets[i].charset);
00271          octstr_append_char(charset, '-');
00272          octstr_append(charset, octstr_imm(character_sets[i].nro));
00273          gwlist_append(result, charset);
00274     }
00275 
00276     return result;  
00277 }
00278 
00279 /*
00280  * Functions working with simple binary data type (no string table). No 
00281  * variables are present either. 
00282  */
00283 
00284 simple_binary_t *simple_binary_create(void)
00285 {
00286     simple_binary_t *binary;
00287 
00288     binary = gw_malloc(sizeof(simple_binary_t));
00289     
00290     binary->wbxml_version = 0x00;
00291     binary->public_id = 0x00;
00292     binary->charset = 0x00;
00293     binary->binary = octstr_create("");
00294 
00295     return binary;
00296 }
00297 
00298 void simple_binary_destroy(simple_binary_t *binary)
00299 {
00300     if (binary == NULL)
00301         return;
00302 
00303     octstr_destroy(binary->binary);
00304     gw_free(binary);
00305 }
00306 
00307 /*
00308  * Output the wbxml content field after field into octet string os. We add 
00309  * string table length 0 (meaning no string table) before the content.
00310  */
00311 void simple_binary_output(Octstr *os, simple_binary_t *binary)
00312 {
00313     gw_assert(octstr_len(os) == 0);
00314     octstr_format_append(os, "%c", binary->wbxml_version);
00315     octstr_format_append(os, "%c", binary->public_id);
00316     octstr_append_uintvar(os, binary->charset);
00317     octstr_format_append(os, "%c", 0x00);
00318     octstr_format_append(os, "%S", binary->binary);
00319 }
00320 
00321 void parse_end(simple_binary_t **binary)
00322 {
00323     output_char(WBXML_END, binary);
00324 }
00325 
00326 void output_char(int byte, simple_binary_t **binary)
00327 {
00328     octstr_append_char((**binary).binary, byte);
00329 }
00330 
00331 void parse_octet_string(Octstr *os, simple_binary_t **binary)
00332 {
00333     output_octet_string(os, binary);
00334 }
00335 
00336 /*
00337  * Add global tokens to the start and to the end of an inline string.
00338  */ 
00339 void parse_inline_string(Octstr *temp, simple_binary_t **binary)
00340 {
00341     Octstr *startos;   
00342 
00343     octstr_insert(temp, startos = octstr_format("%c", WBXML_STR_I), 0);
00344     octstr_destroy(startos);
00345     octstr_format_append(temp, "%c", WBXML_STR_END);
00346     parse_octet_string(temp, binary);
00347 }
00348 
00349 void output_octet_string(Octstr *os, simple_binary_t **sibxml)
00350 {
00351     octstr_insert((*sibxml)->binary, os, octstr_len((*sibxml)->binary));
00352 }
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.