Main Page | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

numhash.c

Go to the documentation of this file.
00001 /* ==================================================================== 
00002  * The Kannel Software License, Version 1.0 
00003  * 
00004  * Copyright (c) 2001-2008 Kannel Group  
00005  * Copyright (c) 1998-2001 WapIT Ltd.   
00006  * All rights reserved. 
00007  * 
00008  * Redistribution and use in source and binary forms, with or without 
00009  * modification, are permitted provided that the following conditions 
00010  * are met: 
00011  * 
00012  * 1. Redistributions of source code must retain the above copyright 
00013  *    notice, this list of conditions and the following disclaimer. 
00014  * 
00015  * 2. Redistributions in binary form must reproduce the above copyright 
00016  *    notice, this list of conditions and the following disclaimer in 
00017  *    the documentation and/or other materials provided with the 
00018  *    distribution. 
00019  * 
00020  * 3. The end-user documentation included with the redistribution, 
00021  *    if any, must include the following acknowledgment: 
00022  *       "This product includes software developed by the 
00023  *        Kannel Group (http://www.kannel.org/)." 
00024  *    Alternately, this acknowledgment may appear in the software itself, 
00025  *    if and wherever such third-party acknowledgments normally appear. 
00026  * 
00027  * 4. The names "Kannel" and "Kannel Group" must not be used to 
00028  *    endorse or promote products derived from this software without 
00029  *    prior written permission. For written permission, please  
00030  *    contact org@kannel.org. 
00031  * 
00032  * 5. Products derived from this software may not be called "Kannel", 
00033  *    nor may "Kannel" appear in their name, without prior written 
00034  *    permission of the Kannel Group. 
00035  * 
00036  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 
00037  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
00038  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
00039  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS 
00040  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  
00041  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT  
00042  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR  
00043  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
00044  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE  
00045  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  
00046  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
00047  * ==================================================================== 
00048  * 
00049  * This software consists of voluntary contributions made by many 
00050  * individuals on behalf of the Kannel Group.  For more information on  
00051  * the Kannel Group, please see <http://www.kannel.org/>. 
00052  * 
00053  * Portions of this software are based upon software originally written at  
00054  * WapIT Ltd., Helsinki, Finland for the Kannel project.  
00055  */ 
00056 
00057 /*
00058  * numhash.c
00059  *
00060  * NUMBER HASH functions
00061  *
00062  * functions to add a number to database/hash
00063  * and functions to retrieve them
00064  *
00065  * notes: read header file
00066  *
00067  * Kalle Marjola for project Kannel 1999-2000
00068  */
00069 
00070 #include <stdlib.h>
00071 #include <string.h>
00072 #include <ctype.h>
00073 #include <errno.h>
00074 
00075 #include "gwlib/gwlib.h"
00076 #include "numhash.h"
00077 
00078 
00079 #define NUMHASH_AUTO_HASH -1
00080 
00081 /* set of pre-calculated prime numbers for hash generation... */
00082 
00083 static int primes[] = {
00084   101, 503, 1009, 2003, 3001, 5003, 7001,
00085   10007, 20011, 30011, 40009, 50021, 60013, 70001, 80021, 90001,
00086   100003, 150001, 200003, 300007, 400009, 500009, 600011, 700001,
00087   800011, 900001, 1000003, 1100009, 1200007, 1300021, 1400017, 1500007,
00088   1600033, 1700021, 1800017, 1900009, 2000003, 3000017, 4000037, 5000111,
00089   6000101, 7000127, 8000051, -1
00090 };
00091 
00092 struct numhash_table {
00093   struct numhash_number *numbers;
00094   long          number_total;
00095   long          table_size;
00096   struct numhash_number **hash;
00097   long          hash_size;
00098 };  /* Numhash */
00099 
00100 struct numhash_number {
00101   long          key;        /* (hopefully) unique key */
00102   struct numhash_number *next;      /* next in hash table, if any */
00103 };
00104 
00105 
00106 struct nh_entry {
00107     Numhash     *hash;
00108     struct nh_entry *next;
00109 };
00110     
00111 struct numhashes {
00112     struct nh_entry *first;
00113     struct nh_entry     *last;
00114 };  /* Multitable */
00115 
00116 
00117 static int  precision = 9;      /* the precision (last numbers) used */
00118 
00119 /*
00120  * add new item (number) to hash table
00121  */
00122 static int add_item(Numhash *table, struct numhash_number *nro)
00123 {
00124     if (table->hash[nro->key % table->hash_size]) {    /* conflict */
00125     struct numhash_number *ptr = table->hash[nro->key % table->hash_size];
00126 
00127     if (ptr->key == nro->key)
00128         goto duplicate;
00129 
00130     while(ptr->next) {
00131         ptr = ptr->next;
00132         if (ptr->key == nro->key)
00133         goto duplicate;
00134     }
00135     ptr->next = nro;                /* put as last of the linkage */
00136     } else
00137     table->hash[nro->key % table->hash_size] = nro;
00138 
00139     return 0;
00140 
00141 duplicate:
00142     warning(0, "Duplicate number %ld!", nro->key); 
00143     return -1;  
00144 }
00145 
00146 /* Add a new number to number list and hash
00147  * Return 0 if all went ok, -1 if out of room
00148  */
00149 static int numhash_add_number(Numhash *table, char *nro)
00150 {
00151     struct numhash_number *newnro;
00152 
00153     if (table->number_total == table->table_size) {
00154     error(0, "Table limit %ld reached, cannot add %s!",
00155           table->table_size, nro);
00156     return -1;
00157     }
00158     newnro =  &table->numbers[table->number_total++]; /* take the next free */
00159   
00160     newnro->key = numhash_get_char_key(nro);
00161     newnro->next = NULL;
00162   
00163     add_item(table, newnro);
00164   
00165     return 0;
00166 }
00167 
00168 
00169 /* Init the number table and hash table with given sizes
00170  */
00171 static Numhash *numhash_init(int max_numbers, int hash_size)
00172 {
00173     Numhash *ntable = NULL;
00174 
00175     ntable = gw_malloc(sizeof(Numhash));
00176     
00177     if (hash_size > 0)
00178     ntable->hash_size = hash_size;
00179     else if (hash_size == NUMHASH_AUTO_HASH) {
00180     int i;
00181     for(i=0 ; primes[i] > 0; i++) {
00182         ntable->hash_size = primes[i];
00183         if (ntable->hash_size > max_numbers)
00184         break;
00185     }
00186     } else {
00187     gw_free(ntable);
00188     return NULL;
00189     }
00190     ntable->hash = gw_malloc(ntable->hash_size * sizeof(struct numhash_number *));
00191     memset(ntable->hash, 0, sizeof(struct numhash_number *) * ntable->hash_size);
00192 
00193     ntable->table_size = max_numbers;
00194     ntable->numbers = gw_malloc(ntable->table_size * sizeof(struct numhash_number));
00195 
00196     ntable->number_total = 0;
00197   
00198     /* set our accuracy according to the size of long int
00199      * Ok, we call this many times if we use multiple tables, but
00200      * that is not a problem...
00201      */
00202     if (sizeof(long)>=8) precision = 18;
00203     else if (sizeof(long)>=4) precision = 9;
00204 
00205     return ntable;
00206 }
00207 
00208 
00209 
00210 /*------------------------------------------------------
00211  * PUBLIC FUNCTIONS
00212  */
00213 
00214 
00215 
00216 int numhash_find_number(Numhash *table, Octstr *nro)
00217 {
00218     long key = numhash_get_key(nro);
00219     if (key<0) return key;
00220 
00221     return numhash_find_key(table, key);
00222 }
00223 
00224 
00225 int numhash_find_key(Numhash *table, long key)
00226 {
00227     struct numhash_number *ptr;
00228 
00229     ptr = table->hash[key % table->hash_size];
00230     while (ptr) {
00231     if (ptr->key == key) return 1;
00232     ptr = ptr->next;
00233     }
00234     return 0;   /* not found */
00235 }
00236 
00237 
00238 
00239 long numhash_get_key(Octstr *nro)
00240 {
00241     long key;
00242     if (!nro) return -1;
00243   
00244     if (octstr_len(nro) > precision)
00245     key = atoi(octstr_get_cstr(nro) + octstr_len(nro) -precision);
00246     else
00247     key = atoi(octstr_get_cstr(nro));
00248 
00249     return key;
00250 }
00251 
00252 
00253 long numhash_get_char_key(char *nro)
00254 {
00255     int len;
00256     long key;
00257     if (!nro) return -1;
00258     len = strlen(nro);
00259     
00260     if (len > precision)
00261     key = atoi(nro + len -precision);
00262     else
00263     key = atoi(nro);
00264 
00265     return key;
00266 }
00267 
00268 
00269 void numhash_destroy(Numhash *table)
00270 {
00271     if (table == NULL)
00272     return;
00273     gw_free(table->numbers);
00274     gw_free(table->hash);
00275     gw_free(table);
00276 }
00277 
00278 
00279 double numhash_hash_fill(Numhash *table, int *longest)
00280 {
00281     int i, l, max = 0, tot = 0;
00282     struct numhash_number *ptr;
00283 
00284     for (i=0; i < table->hash_size; i++)
00285     if (table->hash[i]) {
00286         tot++;
00287         ptr = table->hash[i]; 
00288         for (l=0; ptr->next; ptr = ptr->next)
00289         l++;
00290         if (l > max)
00291         max = l;
00292     }
00293 
00294     if (longest != NULL)
00295     *longest = max;
00296 
00297     return (double)(tot*100.0/(table->hash_size));
00298 }
00299 
00300 
00301 int numhash_size(Numhash *table)
00302 {
00303     return table->number_total;
00304 }
00305 
00306 
00307 Numhash *numhash_create(char *seek_url)
00308 {
00309     int     loc, lines = 0;
00310     List    *request_headers, *reply_headers;
00311     Octstr  *url, *final_url, *reply_body;
00312     Octstr  *type, *charset;
00313     
00314     char    *data, *ptr, numbuf[100];
00315     int     status;
00316     Numhash *table;
00317 
00318     url = octstr_create(seek_url);
00319     request_headers = gwlist_create();
00320     status = http_get_real(HTTP_METHOD_GET, url, request_headers, &final_url,
00321                 &reply_headers, &reply_body);
00322     octstr_destroy(url);
00323     octstr_destroy(final_url);
00324     gwlist_destroy(request_headers, NULL);
00325 
00326     if (status != HTTP_OK) {
00327     http_destroy_headers(reply_headers);
00328     octstr_destroy(reply_body);
00329     error(0, "Cannot load numhash!");
00330     return NULL;
00331     }
00332     http_header_get_content_type(reply_headers, &type, &charset);
00333     octstr_destroy(charset);
00334     http_destroy_headers(reply_headers);
00335     
00336     if (octstr_str_compare(type, "text/plain") != 0) {
00337         octstr_destroy(reply_body);
00338         error(0, "Strange content type <%s> for numhash - expecting 'text/plain'"
00339                  ", operatiom fails", octstr_get_cstr(type));
00340         octstr_destroy(type);
00341         return NULL;
00342     }
00343     octstr_destroy(type);
00344     
00345     ptr = data = octstr_get_cstr(reply_body);
00346     while(*ptr) {
00347     if (*ptr == '\n') lines++;
00348     ptr++;
00349     }
00350     debug("numhash", 0, "Total %d lines in %s", lines, seek_url);
00351 
00352     table = numhash_init(lines+10, NUMHASH_AUTO_HASH);      /* automatic hash */
00353 
00354     /* now, parse the number information */
00355 
00356     lines = 0;
00357   
00358     while((ptr = strchr(data, '\n'))) { /* each line is ended with linefeed */
00359     *ptr = '\0';
00360     while(*data != '\0' && isspace(*data))
00361         data++;
00362     if (*data != '#') {
00363         loc = 0;
00364         while (*data != '\0') {
00365         if (isdigit(*data))
00366             numbuf[loc++] = *data;
00367         else if (*data == ' ' || *data == '+' || *data == '-')
00368             ;
00369         else break;
00370         data++;
00371         }
00372         if (loc) {
00373         numbuf[loc] = '\0';
00374         numhash_add_number(table, numbuf);
00375         lines++;
00376         }
00377         else
00378         warning(0, "Corrupted line '%s'", data);
00379     }
00380     data = ptr+1;   /* next row... */
00381     }
00382     octstr_destroy(reply_body); 
00383 
00384     info(0, "Read from <%s> total of %ld numbers", seek_url, table->number_total);
00385     return table;
00386 }
00387 
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.