Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

regex.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * regex.c - POSIX regular expressions (REs) 
00059  *
00060  * This modules implements wrapper functions to regcomp(3), regexec(3),
00061  * et all functions from the POSIX compliance standard. Additinally
00062  * it provides subexpression substitution routines in order to easily
00063  * substitute strings arround regular expressions.
00064  *
00065  * See regex(3) man page for more details on POSIX regular expressions.
00066  *
00067  * Stipe Tolj <stolj@wapme.de>
00068  */
00069 
00070 #include <ctype.h>
00071 
00072 #include "gwlib/gwlib.h"
00073 #include "regex.h"
00074 
00075 /* 
00076  * We allow to substitute the POSIX compliant regex routines via PCRE 
00077  * provided routines if no system own regex implementation is available.
00078  */
00079 #if defined(HAVE_REGEX) || defined(HAVE_PCRE)
00080 
00081 
00082 /********************************************************************
00083  * Generic regular expression functions.
00084  */
00085 
00086 void gw_regex_destroy(regex_t *preg)
00087 {
00088     if (preg == NULL)
00089         return;
00090         
00091     regfree(preg);
00092     gw_free(preg);
00093 }
00094 
00095 
00096 regex_t *gw_regex_comp_real(const Octstr *pattern, int cflags, const char *file, 
00097                             long line, const char *func)
00098 {
00099     int rc;
00100     regex_t *preg;
00101     
00102     preg = gw_malloc(sizeof(regex_t));
00103 
00104     if ((rc = regcomp(preg, pattern ? octstr_get_cstr(pattern) : NULL, cflags)) != 0) {
00105         char buffer[512];
00106         regerror(rc, preg, buffer, sizeof(buffer)); 
00107         error(0, "%s:%ld: %s: regex compilation `%s' failed: %s (Called from %s:%ld:%s.)",
00108               __FILE__, (long) __LINE__, __func__, octstr_get_cstr(pattern), buffer, 
00109               (file), (long) (line), (func));
00110         return NULL;
00111     }
00112 
00113     return preg;
00114 }
00115 
00116 
00117 int gw_regex_exec_real(const regex_t *preg, const Octstr *string, size_t nmatch, 
00118                        regmatch_t pmatch[], int eflags, const char *file, long line, 
00119                        const char *func)
00120 {
00121     int rc;
00122 
00123     gw_assert(preg != NULL);
00124 
00125     rc = regexec(preg, string ? octstr_get_cstr(string) : NULL,  nmatch, pmatch, eflags);
00126     if (rc != REG_NOMATCH && rc != 0) {
00127         char buffer[512];
00128         regerror(rc, preg, buffer, sizeof(buffer)); 
00129         error(0, "%s:%ld: %s: regex execution on `%s' failed: %s (Called from %s:%ld:%s.)",
00130               __FILE__, (long) __LINE__, __func__, octstr_get_cstr(string), buffer,
00131               (file), (long) (line), (func));
00132     }
00133 
00134     return rc;
00135 }
00136 
00137 
00138 Octstr *gw_regex_error(int errcode, const regex_t *preg)
00139 {
00140     char errbuf[512];
00141     Octstr *os;
00142 
00143     regerror(errcode, preg, errbuf, sizeof(errbuf));
00144     os = octstr_create(errbuf);
00145 
00146     return os;
00147 }
00148 
00149 
00150 /* Duplicate a string. */
00151 static char *pstrdup(const char *s)
00152 {
00153     char *res;
00154     size_t len;
00155 
00156     if (s == NULL)
00157         return NULL;
00158     len = strlen(s) + 1;
00159     res = gw_malloc(len);
00160     memcpy(res, s, len);
00161     return res;
00162 }
00163 
00164 
00165 /* This function substitutes for $0-$9, filling in regular expression
00166  * submatches. Pass it the same nmatch and pmatch arguments that you
00167  * passed gw_regexec(). pmatch should not be greater than the maximum number
00168  * of subexpressions - i.e. one more than the re_nsub member of regex_t.
00169  *
00170  * input should be the string with the $-expressions, source should be the
00171  * string that was matched against.
00172  *
00173  * It returns the substituted string, or NULL on error.
00174  * BEWARE: Caller must free allocated memory of the result.
00175  *
00176  * Parts of this code are based on Henry Spencer's regsub(), from his
00177  * AT&T V8 regexp package. Function borrowed from apache-1.3/src/main/util.c
00178  */
00179 char *gw_regex_sub(const char *input, const char *source,
00180                    size_t nmatch, regmatch_t pmatch[])
00181 {
00182     const char *src = input;
00183     char *dest, *dst;
00184     char c;
00185     size_t no;
00186     int len;
00187 
00188     if (!source)
00189         return NULL;
00190     if (!nmatch)
00191         return pstrdup(src);
00192 
00193     /* First pass, find the size */
00194     len = 0;
00195     while ((c = *src++) != '\0') {
00196         if (c == '&')
00197             no = 0;
00198         else if (c == '$' && isdigit(*src))
00199             no = *src++ - '0';
00200         else
00201             no = 10;
00202 
00203         if (no > 9) {           /* Ordinary character. */
00204             if (c == '\\' && (*src == '$' || *src == '&'))
00205                 c = *src++;
00206             len++;
00207         }
00208         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
00209             len += pmatch[no].rm_eo - pmatch[no].rm_so;
00210         }
00211     }
00212 
00213     dest = dst = gw_malloc(len + 1);
00214 
00215     /* Now actually fill in the string */
00216     src = input;
00217     while ((c = *src++) != '\0') {
00218         if (c == '&')
00219             no = 0;
00220         else if (c == '$' && isdigit(*src))
00221             no = *src++ - '0';
00222         else
00223             no = 10;
00224 
00225         if (no > 9) {           /* Ordinary character. */
00226             if (c == '\\' && (*src == '$' || *src == '&'))
00227                 c = *src++;
00228             *dst++ = c;
00229         }
00230         else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
00231             len = pmatch[no].rm_eo - pmatch[no].rm_so;
00232             memcpy(dst, source + pmatch[no].rm_so, len);
00233             dst += len;
00234         }
00235     }
00236     *dst = '\0';
00237 
00238     return dest;
00239 }
00240 
00241 
00242 /********************************************************************
00243  * Matching and substitution wrapper functions.
00244  *
00245  * Beware that the regex compilation takes the most significant CPU time,
00246  * so always try to have pre-compiled regular expressions that keep being
00247  * reused and re-matched on variable string patterns.
00248  */
00249 
00250 int gw_regex_match_real(const Octstr *re, const Octstr *os, const char *file, 
00251                         long line, const char *func)
00252 {
00253     regex_t *regexp;
00254     int rc;
00255 
00256     /* compile */
00257     regexp = gw_regex_comp_real(re, REG_EXTENDED|REG_ICASE, file, line, func);
00258     if (regexp == NULL)
00259         return 0;
00260 
00261     /* execute and match */
00262     rc = gw_regex_exec_real(regexp, os, 0, NULL, 0, file, line, func);
00263 
00264     gw_regex_destroy(regexp);
00265 
00266     return (rc == 0) ? 1 : 0;
00267 }
00268 
00269 
00270 int gw_regex_match_pre_real(const regex_t *preg, const Octstr *os, const char *file, 
00271                             long line, const char *func)
00272 {
00273     int rc;
00274 
00275     gw_assert(preg != NULL);
00276 
00277     /* execute and match */
00278     rc = gw_regex_exec_real(preg, os, 0, NULL, 0, file, line, func);
00279 
00280     return (rc == 0) ? 1 : 0;
00281 }
00282 
00283 
00284 Octstr *gw_regex_subst_real(const Octstr *re, const Octstr *os, const Octstr *rule, 
00285                             const char *file, long line, const char *func)
00286 {
00287     Octstr *result;
00288     regex_t *regexp;
00289     regmatch_t pmatch[REGEX_MAX_SUB_MATCH];
00290     int rc;
00291     char *rsub;
00292 
00293     /* compile */
00294     regexp = gw_regex_comp_real(re, REG_EXTENDED|REG_ICASE, file, line, func);
00295     if (regexp == NULL)
00296         return 0;
00297 
00298     /* execute and match */
00299     rc = gw_regex_exec_real(regexp, os, REGEX_MAX_SUB_MATCH, &pmatch[0], 0, 
00300                             file, line, func);
00301     gw_regex_destroy(regexp);
00302 
00303     /* substitute via rule if matched */
00304     if (rc != 0)
00305         return NULL;
00306 
00307     rsub = gw_regex_sub(octstr_get_cstr(rule), octstr_get_cstr(os),
00308                         REGEX_MAX_SUB_MATCH, &pmatch[0]);
00309     if (rsub == NULL)
00310         return NULL;
00311 
00312     result = octstr_create(rsub);
00313     gw_free(rsub);
00314     
00315     return result;
00316 }
00317 
00318 
00319 Octstr *gw_regex_subst_pre_real(const regex_t *preg, const Octstr *os, const Octstr *rule, 
00320                                 const char *file, long line, const char *func)
00321 {
00322     Octstr *result;
00323     regmatch_t pmatch[REGEX_MAX_SUB_MATCH];
00324     int rc;
00325     char *rsub;
00326 
00327     gw_assert(preg != NULL);
00328 
00329     /* execute and match */
00330     rc = gw_regex_exec_real(preg, os, REGEX_MAX_SUB_MATCH, &pmatch[0], 0, 
00331                             file, line, func);
00332 
00333     /* substitute via rule if matched */
00334     if (rc != 0)
00335         return NULL;
00336 
00337     rsub = gw_regex_sub(octstr_get_cstr(rule), octstr_get_cstr(os),
00338                         REGEX_MAX_SUB_MATCH, &pmatch[0]);
00339     if (rsub == NULL)
00340         return NULL;
00341 
00342     result = octstr_create(rsub);
00343     gw_free(rsub);
00344     
00345     return result;
00346 }
00347 
00348 #endif  /* HAVE_REGEX || HAVE_PCRE */
00349 
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.