Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

html.c File Reference

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "html.h"
#include "gwlib/gwlib.h"

Include dependency graph for html.c:

Include dependency graph

Go to the source code of this file.

Defines

#define SMS_MAX   161

Functions

int html_comment_begins (Octstr *html, long pos)
void skip_html_comment (Octstr *html, long *pos)
void skip_html_tag (Octstr *html, long *pos)
void convert_html_entity (Octstr *sms, Octstr *html, long *pos)
Octstrhtml_to_sms (Octstr *html)


Define Documentation

#define SMS_MAX   161
 

Definition at line 71 of file html.c.


Function Documentation

void convert_html_entity Octstr sms,
Octstr html,
long *  pos
[static]
 

Definition at line 129 of file html.c.

References code, latin1, octstr_append_char(), octstr_get_char(), octstr_get_many_chars(), octstr_parse_long(), and sms.

Referenced by html_to_sms().

00130 {
00131     static struct {
00132         char *entity;
00133         int latin1;
00134     }
00135     tab[] = {
00136         { "&amp;", '&' },
00137         { "&lt;", '<' },
00138         { "&gt;", '>' },
00139 
00140         /* The following is copied from
00141 
00142             http://www.hut.fi/~jkorpela/HTML3.2/latin1.html
00143 
00144            by Jukka Korpela. Hand and script edited to form this
00145            table. */
00146 
00147         { "&nbsp;", ' ' },
00148         { "&iexcl;", 161 },
00149         { "&cent;", 162 },
00150         { "&pound;", 163 },
00151         { "&curren;", 164 },
00152         { "&yen;", 165 },
00153         { "&brvbar;", 166 },
00154         { "&sect;", 167 },
00155         { "&uml;", 168 },
00156         { "&copy;", 169 },
00157         { "&ordf;", 170 },
00158         { "&laquo;", 171 },
00159         { "&not;", 172 },
00160         { "&shy;", 173 },
00161         { "&reg;", 174 },
00162         { "&macr;", 175 },
00163         { "&deg;", 176 },
00164         { "&plusmn;", 177 },
00165         { "&sup2;", 178 },
00166         { "&sup3;", 179 },
00167         { "&acute;", 180 },
00168         { "&micro;", 181 },
00169         { "&para;", 182 },
00170         { "&middot;", 183 },
00171         { "&cedil;", 184 },
00172         { "&sup1;", 185 },
00173         { "&ordm;", 186 },
00174         { "&raquo;", 187 },
00175         { "&frac14;", 188 },
00176         { "&frac12;", 189 },
00177         { "&frac34;", 190 },
00178         { "&iquest;", 191 },
00179         { "&Agrave;", 192 },
00180         { "&Aacute;", 193 },
00181         { "&Acirc;", 194 },
00182         { "&Atilde;", 195 },
00183         { "&Auml;", 196 },
00184         { "&Aring;", 197 },
00185         { "&AElig;", 198 },
00186         { "&Ccedil;", 199 },
00187         { "&Egrave;", 200 },
00188         { "&Eacute;", 201 },
00189         { "&Ecirc;", 202 },
00190         { "&Euml;", 203 },
00191         { "&Igrave;", 204 },
00192         { "&Iacute;", 205 },
00193         { "&Icirc;", 206 },
00194         { "&Iuml;", 207 },
00195         { "&ETH;", 208 },
00196         { "&Ntilde;", 209 },
00197         { "&Ograve;", 210 },
00198         { "&Oacute;", 211 },
00199         { "&Ocirc;", 212 },
00200         { "&Otilde;", 213 },
00201         { "&Ouml;", 214 },
00202         { "&times;", 215 },
00203         { "&Oslash;", 216 },
00204         { "&Ugrave;", 217 },
00205         { "&Uacute;", 218 },
00206         { "&Ucirc;", 219 },
00207         { "&Uuml;", 220 },
00208         { "&Yacute;", 221 },
00209         { "&THORN;", 222 },
00210         { "&szlig;", 223 },
00211         { "&agrave;", 224 },
00212         { "&aacute;", 225 },
00213         { "&acirc;", 226 },
00214         { "&atilde;", 227 },
00215         { "&auml;", 228 },
00216         { "&aring;", 229 },
00217         { "&aelig;", 230 },
00218         { "&ccedil;", 231 },
00219         { "&egrave;", 232 },
00220         { "&eacute;", 233 },
00221         { "&ecirc;", 234 },
00222         { "&euml;", 235 },
00223         { "&igrave;", 236 },
00224         { "&iacute;", 237 },
00225         { "&icirc;", 238 },
00226         { "&iuml;", 239 },
00227         { "&eth;", 240 },
00228         { "&ntilde;", 241 },
00229         { "&ograve;", 242 },
00230         { "&oacute;", 243 },
00231         { "&ocirc;", 244 },
00232         { "&otilde;", 245 },
00233         { "&ouml;", 246 },
00234         { "&divide;", 247 },
00235         { "&oslash;", 248 },
00236         { "&ugrave;", 249 },
00237         { "&uacute;", 250 },
00238         { "&ucirc;", 251 },
00239         { "&uuml;", 252 },
00240         { "&yacute;", 253 },
00241         { "&thorn;", 254 },
00242         { "&yuml;", 255 },
00243     };
00244     int num_tab = sizeof(tab) / sizeof(tab[0]);
00245     long i, code;
00246     size_t len;
00247     char buf[1024];
00248 
00249     if (octstr_get_char(html, *pos + 1) == '#') {
00250         if (octstr_get_char(html, *pos + 2) == 'x' || octstr_get_char(html, *pos + 2) == 'X')
00251             i = octstr_parse_long(&code, html, *pos + 3, 16); /* hex */
00252         else
00253             i = octstr_parse_long(&code, html, *pos + 2, 10); /* decimal */
00254         if (i > 0) {
00255             if (code < 256)
00256                 octstr_append_char(sms, code);
00257             *pos = i + 1;
00258             if (octstr_get_char(html, *pos) == ';')
00259                 ++(*pos);
00260         } else {
00261             ++(*pos);
00262             octstr_append_char(sms, '&');
00263         }
00264     } else {
00265         for (i = 0; i < num_tab; ++i) {
00266             len = strlen(tab[i].entity);
00267             octstr_get_many_chars(buf, html, *pos, len);
00268             buf[len] = '\0';
00269             if (strcmp(buf, tab[i].entity) == 0) {
00270                 *pos += len;
00271                 octstr_append_char(sms, tab[i].latin1);
00272                 break;
00273             }
00274         }
00275         if (i == num_tab) {
00276             ++(*pos);
00277             octstr_append_char(sms, '&');
00278         }
00279     }
00280 }

Here is the call graph for this function:

int html_comment_begins Octstr html,
long  pos
[static]
 

Definition at line 75 of file html.c.

References octstr_get_many_chars().

Referenced by html_to_sms().

00076 {
00077     char buf[10];
00078 
00079     octstr_get_many_chars(buf, html, pos, 4);
00080     buf[5] = '\0';
00081     return strcmp(buf, "<!--") == 0;
00082 }

Here is the call graph for this function:

Octstr* html_to_sms Octstr html  ) 
 

Definition at line 283 of file html.c.

References convert_html_entity(), html_comment_begins(), octstr_append_char(), octstr_create, octstr_get_char(), octstr_len(), octstr_shrink_blanks(), octstr_strip_blanks(), skip_html_comment(), skip_html_tag(), and sms.

Referenced by smsbox_sendsms_post(), and url_result_thread().

00284 {
00285     long i, len;
00286     int c;
00287     Octstr *sms;
00288 
00289     sms = octstr_create("");
00290     len = octstr_len(html);
00291     i = 0;
00292     while (i < len) {
00293         c = octstr_get_char(html, i);
00294         switch (c) {
00295         case '<':
00296             if (html_comment_begins(html, i))
00297                 skip_html_comment(html, &i);
00298             else
00299                 skip_html_tag(html, &i);
00300             break;
00301         case '&':
00302             convert_html_entity(sms, html, &i);
00303             break;
00304         default:
00305             octstr_append_char(sms, c);
00306             ++i;
00307             break;
00308         }
00309     }
00310     octstr_shrink_blanks(sms);
00311     octstr_strip_blanks(sms);
00312     return sms;
00313 }

Here is the call graph for this function:

void skip_html_comment Octstr html,
long *  pos
[static]
 

Definition at line 86 of file html.c.

References octstr_imm(), octstr_len(), and octstr_search().

Referenced by html_to_sms().

00087 {
00088     long i;
00089 
00090     *pos += 4;  /* Skip "<!--" at beginning of comment. */
00091     i = octstr_search(html, octstr_imm("-->"), *pos);
00092     if (i == -1)
00093         *pos = octstr_len(html);
00094     else
00095         *pos = i;
00096 }

Here is the call graph for this function:

void skip_html_tag Octstr html,
long *  pos
[static]
 

Definition at line 100 of file html.c.

References octstr_get_char(), octstr_len(), and octstr_search_char().

Referenced by html_to_sms().

00101 {
00102     long i, len;
00103     int c;
00104 
00105     /* Skip leading '<'. */
00106     ++(*pos);
00107 
00108     /* Skip name of tag and attributes with values. */
00109     len = octstr_len(html);
00110     while (*pos < len && (c = octstr_get_char(html, *pos)) != '>') {
00111         if (c == '"' || c == '\'') {
00112             i = octstr_search_char(html, c, *pos + 1);
00113             if (i == -1)
00114                 *pos = len;
00115             else
00116                 *pos = i + 1;
00117         } else
00118             ++(*pos);
00119     }
00120 
00121     /* Skip trailing '>' if it is there. */
00122     if (octstr_get_char(html, *pos) == '>')
00123         ++(*pos);
00124 }

Here is the call graph for this function:

See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.