Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

regex.h

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * regex.h - POSIX regular expressions (REs) 
00059  *
00060  * This modules implements wrapper functions to regcomp(3), regexec(3),
00061  * et all functions from the POSIX compliance standard. Additinally
00062  * it provides subexpression substitution routines in order to easily
00063  * substitute strings arround regular expressions.
00064  *
00065  * See regex(3) man page for more details on POSIX regular expressions.
00066  * 
00067  * PCRE allows wrapper functions for POSIX regex via an own API. So we
00068  * use PCRE in favor, before falling back to POSIX regex.
00069  *
00070  * Stipe Tolj <stolj@kannel.org>
00071  */
00072 
00073 #ifndef REGEX_H
00074 #define REGEX_H
00075 
00076 #ifdef HAVE_PCRE
00077 # include <pcreposix.h>
00078 #elif HAVE_REGEX
00079 # include <regex.h>
00080 #endif
00081 
00082 #if defined(HAVE_REGEX) || defined(HAVE_PCRE)
00083 
00084 
00085 /*
00086  * We handle a maximum of 10 subexpression matches and 
00087  * substitution escape codes $0 to $9 in gw_regex_sub().
00088  */
00089 #define REGEX_MAX_SUB_MATCH 10
00090 
00091 
00092 /*
00093  * Destroy a previously compiled regular expression.
00094  */
00095 void gw_regex_destroy(regex_t *preg);
00096 
00097 
00098 /*
00099  * Compile a regular expression provided by pattern and return
00100  * the regular expression type as function result.
00101  * If the compilation fails, return NULL.
00102  */
00103 regex_t *gw_regex_comp_real(const Octstr *pattern, int cflags, const char *file, 
00104                             long line, const char *func);
00105 #define gw_regex_comp(pattern, cflags) \
00106     gw_regex_comp_real(pattern, cflags, __FILE__, __LINE__, __func__)
00107 
00108 
00109 /*
00110  * Execute a previously compile regular expression on a given
00111  * string and provide the matches via nmatch and pmatch[].
00112  */
00113 int gw_regex_exec_real(const regex_t *preg, const Octstr *string, size_t nmatch, 
00114                        regmatch_t pmatch[], int eflags, const char *file, long line, 
00115                        const char *func);
00116 #define gw_regex_exec(preg, string, nmatch, pmatch, eflags) \
00117     gw_regex_exec_real(preg, string, nmatch, pmatch, eflags, \
00118                        __FILE__, __LINE__, __func__)
00119 
00120 
00121 /*
00122  * Provide the error description string of an regex operation as
00123  * Octstr instead of a char[].
00124  */
00125 Octstr *gw_regex_error(int errcode, const regex_t *preg);
00126 
00127 
00128 /* This function substitutes for $0-$9, filling in regular expression
00129  * submatches. Pass it the same nmatch and pmatch arguments that you
00130  * passed gw_regexec(). pmatch should not be greater than the maximum number
00131  * of subexpressions - i.e. one more than the re_nsub member of regex_t.
00132  *
00133  * input should be the string with the $-expressions, source should be the
00134  * string that was matched against.
00135  *
00136  * It returns the substituted string, or NULL on error.
00137  *
00138  * Parts of this code are based on Henry Spencer's regsub(), from his
00139  * AT&T V8 regexp package. Function borrowed from apache-1.3/src/main/util.c
00140  */
00141 char *gw_regex_sub(const char *input, const char *source,
00142                    size_t nmatch, regmatch_t pmatch[]);
00143 
00144 
00145 /*
00146  * Match directly a given regular expression and a source string. This assumes
00147  * that the RE has not been pre-compiled and hence perform the compile and 
00148  * exec step in this matching step.
00149  * Return 1 if the regular expression is successfully matching, 0 otherwise.
00150  */
00151 int gw_regex_match_real(const Octstr *re, const Octstr *os, const char *file, 
00152                         long line, const char *func);
00153 #define gw_regex_match(re, os) \
00154     gw_regex_match_real(re, os, __FILE__, __LINE__, __func__)
00155 
00156 
00157 /*
00158  * Match directly a given source string against a previously pre-compiled
00159  * regular expression.
00160  * Return 1 if the regular expression is successfully matching, 0 otherwise.
00161  */
00162 int gw_regex_match_pre_real(const regex_t *preg, const Octstr *os, const char *file, 
00163                             long line, const char *func);
00164 #define gw_regex_match_pre(preg, os) \
00165     gw_regex_match_pre_real(preg, os, __FILE__, __LINE__, __func__)
00166 
00167 
00168 /*
00169  * Match directly a given regular expression and a source string. RE has not
00170  * been precompiled. Apply substitution rule accoding to Octstr 'rule' and
00171  * return the substituted Ocstr as result. Return NULL if failed.
00172  * Use \$0 up to \$9 as escape codes for subexpression matchings in the rule.
00173  * Ie. os="+4914287756", re="^(00|\+)([0-9]{6,20})$" rule="\$2" would cause
00174  * to return "4914287756" because the rule returns only the second regular
00175  * expression atom that matched via the expression ([0-9]{6,20}).
00176  */
00177 Octstr *gw_regex_subst_real(const Octstr *re, const Octstr *os, const Octstr *rule, 
00178                             const char *file, long line, const char *func);
00179 #define gw_regex_subst(re, os, rule) \
00180     gw_regex_subst_real(re, os, rule, __FILE__, __LINE__, __func__)
00181 
00182 /*
00183  * Math directly a given source string against a previously pre-compiled
00184  * regular expression. Apply substitution rule according to Ocstr 'rule' and
00185  * return the substitued Octstr as result. Same as gw_regex_subst() but a 
00186  * pre-compiled RE is passed as first argument.
00187  */
00188 Octstr *gw_regex_subst_pre_real(const regex_t *preg, const Octstr *os, const Octstr *rule, 
00189                                 const char *file, long line, const char *func);
00190 #define gw_regex_subst_pre(preg, os, rule) \
00191     gw_regex_subst_pre_real(preg, os, rule, __FILE__, __LINE__, __func__)
00192 
00193 
00194 #endif
00195 #endif  /* REGEX_H */
00196 
00197 
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.