#include <ctype.h>#include <stdio.h>#include <string.h>#include "html.h"#include "gwlib/gwlib.h"Include dependency graph for html.c:

Go to the source code of this file.
Defines | |
| #define | SMS_MAX 161 |
Functions | |
| int | html_comment_begins (Octstr *html, long pos) |
| void | skip_html_comment (Octstr *html, long *pos) |
| void | skip_html_tag (Octstr *html, long *pos) |
| void | convert_html_entity (Octstr *sms, Octstr *html, long *pos) |
| Octstr * | html_to_sms (Octstr *html) |
|
|
|
|
||||||||||||||||
|
Definition at line 129 of file html.c. References code, latin1, octstr_append_char(), octstr_get_char(), octstr_get_many_chars(), octstr_parse_long(), and sms. Referenced by html_to_sms(). 00130 {
00131 static struct {
00132 char *entity;
00133 int latin1;
00134 }
00135 tab[] = {
00136 { "&", '&' },
00137 { "<", '<' },
00138 { ">", '>' },
00139
00140 /* The following is copied from
00141
00142 http://www.hut.fi/~jkorpela/HTML3.2/latin1.html
00143
00144 by Jukka Korpela. Hand and script edited to form this
00145 table. */
00146
00147 { " ", ' ' },
00148 { "¡", 161 },
00149 { "¢", 162 },
00150 { "£", 163 },
00151 { "¤", 164 },
00152 { "¥", 165 },
00153 { "¦", 166 },
00154 { "§", 167 },
00155 { "¨", 168 },
00156 { "©", 169 },
00157 { "ª", 170 },
00158 { "«", 171 },
00159 { "¬", 172 },
00160 { "­", 173 },
00161 { "®", 174 },
00162 { "¯", 175 },
00163 { "°", 176 },
00164 { "±", 177 },
00165 { "²", 178 },
00166 { "³", 179 },
00167 { "´", 180 },
00168 { "µ", 181 },
00169 { "¶", 182 },
00170 { "·", 183 },
00171 { "¸", 184 },
00172 { "¹", 185 },
00173 { "º", 186 },
00174 { "»", 187 },
00175 { "¼", 188 },
00176 { "½", 189 },
00177 { "¾", 190 },
00178 { "¿", 191 },
00179 { "À", 192 },
00180 { "Á", 193 },
00181 { "Â", 194 },
00182 { "Ã", 195 },
00183 { "Ä", 196 },
00184 { "Å", 197 },
00185 { "Æ", 198 },
00186 { "Ç", 199 },
00187 { "È", 200 },
00188 { "É", 201 },
00189 { "Ê", 202 },
00190 { "Ë", 203 },
00191 { "Ì", 204 },
00192 { "Í", 205 },
00193 { "Î", 206 },
00194 { "Ï", 207 },
00195 { "Ð", 208 },
00196 { "Ñ", 209 },
00197 { "Ò", 210 },
00198 { "Ó", 211 },
00199 { "Ô", 212 },
00200 { "Õ", 213 },
00201 { "Ö", 214 },
00202 { "×", 215 },
00203 { "Ø", 216 },
00204 { "Ù", 217 },
00205 { "Ú", 218 },
00206 { "Û", 219 },
00207 { "Ü", 220 },
00208 { "Ý", 221 },
00209 { "Þ", 222 },
00210 { "ß", 223 },
00211 { "à", 224 },
00212 { "á", 225 },
00213 { "â", 226 },
00214 { "ã", 227 },
00215 { "ä", 228 },
00216 { "å", 229 },
00217 { "æ", 230 },
00218 { "ç", 231 },
00219 { "è", 232 },
00220 { "é", 233 },
00221 { "ê", 234 },
00222 { "ë", 235 },
00223 { "ì", 236 },
00224 { "í", 237 },
00225 { "î", 238 },
00226 { "ï", 239 },
00227 { "ð", 240 },
00228 { "ñ", 241 },
00229 { "ò", 242 },
00230 { "ó", 243 },
00231 { "ô", 244 },
00232 { "õ", 245 },
00233 { "ö", 246 },
00234 { "÷", 247 },
00235 { "ø", 248 },
00236 { "ù", 249 },
00237 { "ú", 250 },
00238 { "û", 251 },
00239 { "ü", 252 },
00240 { "ý", 253 },
00241 { "þ", 254 },
00242 { "ÿ", 255 },
00243 };
00244 int num_tab = sizeof(tab) / sizeof(tab[0]);
00245 long i, code;
00246 size_t len;
00247 char buf[1024];
00248
00249 if (octstr_get_char(html, *pos + 1) == '#') {
00250 if (octstr_get_char(html, *pos + 2) == 'x' || octstr_get_char(html, *pos + 2) == 'X')
00251 i = octstr_parse_long(&code, html, *pos + 3, 16); /* hex */
00252 else
00253 i = octstr_parse_long(&code, html, *pos + 2, 10); /* decimal */
00254 if (i > 0) {
00255 if (code < 256)
00256 octstr_append_char(sms, code);
00257 *pos = i + 1;
00258 if (octstr_get_char(html, *pos) == ';')
00259 ++(*pos);
00260 } else {
00261 ++(*pos);
00262 octstr_append_char(sms, '&');
00263 }
00264 } else {
00265 for (i = 0; i < num_tab; ++i) {
00266 len = strlen(tab[i].entity);
00267 octstr_get_many_chars(buf, html, *pos, len);
00268 buf[len] = '\0';
00269 if (strcmp(buf, tab[i].entity) == 0) {
00270 *pos += len;
00271 octstr_append_char(sms, tab[i].latin1);
00272 break;
00273 }
00274 }
00275 if (i == num_tab) {
00276 ++(*pos);
00277 octstr_append_char(sms, '&');
00278 }
00279 }
00280 }
|
Here is the call graph for this function:

|
||||||||||||
|
Definition at line 75 of file html.c. References octstr_get_many_chars(). Referenced by html_to_sms(). 00076 {
00077 char buf[10];
00078
00079 octstr_get_many_chars(buf, html, pos, 4);
00080 buf[5] = '\0';
00081 return strcmp(buf, "<!--") == 0;
00082 }
|
Here is the call graph for this function:

|
|
Definition at line 283 of file html.c. References convert_html_entity(), html_comment_begins(), octstr_append_char(), octstr_create, octstr_get_char(), octstr_len(), octstr_shrink_blanks(), octstr_strip_blanks(), skip_html_comment(), skip_html_tag(), and sms. Referenced by smsbox_sendsms_post(), and url_result_thread(). 00284 {
00285 long i, len;
00286 int c;
00287 Octstr *sms;
00288
00289 sms = octstr_create("");
00290 len = octstr_len(html);
00291 i = 0;
00292 while (i < len) {
00293 c = octstr_get_char(html, i);
00294 switch (c) {
00295 case '<':
00296 if (html_comment_begins(html, i))
00297 skip_html_comment(html, &i);
00298 else
00299 skip_html_tag(html, &i);
00300 break;
00301 case '&':
00302 convert_html_entity(sms, html, &i);
00303 break;
00304 default:
00305 octstr_append_char(sms, c);
00306 ++i;
00307 break;
00308 }
00309 }
00310 octstr_shrink_blanks(sms);
00311 octstr_strip_blanks(sms);
00312 return sms;
00313 }
|
Here is the call graph for this function:

|
||||||||||||
|
Definition at line 86 of file html.c. References octstr_imm(), octstr_len(), and octstr_search(). Referenced by html_to_sms(). 00087 {
00088 long i;
00089
00090 *pos += 4; /* Skip "<!--" at beginning of comment. */
00091 i = octstr_search(html, octstr_imm("-->"), *pos);
00092 if (i == -1)
00093 *pos = octstr_len(html);
00094 else
00095 *pos = i;
00096 }
|
Here is the call graph for this function:

|
||||||||||||
|
Definition at line 100 of file html.c. References octstr_get_char(), octstr_len(), and octstr_search_char(). Referenced by html_to_sms(). 00101 {
00102 long i, len;
00103 int c;
00104
00105 /* Skip leading '<'. */
00106 ++(*pos);
00107
00108 /* Skip name of tag and attributes with values. */
00109 len = octstr_len(html);
00110 while (*pos < len && (c = octstr_get_char(html, *pos)) != '>') {
00111 if (c == '"' || c == '\'') {
00112 i = octstr_search_char(html, c, *pos + 1);
00113 if (i == -1)
00114 *pos = len;
00115 else
00116 *pos = i + 1;
00117 } else
00118 ++(*pos);
00119 }
00120
00121 /* Skip trailing '>' if it is there. */
00122 if (octstr_get_char(html, *pos) == '>')
00123 ++(*pos);
00124 }
|
Here is the call graph for this function:
