00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070 #include <stdlib.h>
00071 #include <string.h>
00072 #include <ctype.h>
00073 #include <errno.h>
00074
00075 #include "gwlib/gwlib.h"
00076 #include "numhash.h"
00077
00078
00079 #define NUMHASH_AUTO_HASH -1
00080
00081
00082
00083 static int primes[] = {
00084 101, 503, 1009, 2003, 3001, 5003, 7001,
00085 10007, 20011, 30011, 40009, 50021, 60013, 70001, 80021, 90001,
00086 100003, 150001, 200003, 300007, 400009, 500009, 600011, 700001,
00087 800011, 900001, 1000003, 1100009, 1200007, 1300021, 1400017, 1500007,
00088 1600033, 1700021, 1800017, 1900009, 2000003, 3000017, 4000037, 5000111,
00089 6000101, 7000127, 8000051, -1
00090 };
00091
00092 struct numhash_table {
00093 struct numhash_number *numbers;
00094 long number_total;
00095 long table_size;
00096 struct numhash_number **hash;
00097 long hash_size;
00098 };
00099
00100 struct numhash_number {
00101 long key;
00102 struct numhash_number *next;
00103 };
00104
00105
00106 struct nh_entry {
00107 Numhash *hash;
00108 struct nh_entry *next;
00109 };
00110
00111 struct numhashes {
00112 struct nh_entry *first;
00113 struct nh_entry *last;
00114 };
00115
00116
00117 static int precision = 9;
00118
00119
00120
00121
00122 static int add_item(Numhash *table, struct numhash_number *nro)
00123 {
00124 if (table->hash[nro->key % table->hash_size]) {
00125 struct numhash_number *ptr = table->hash[nro->key % table->hash_size];
00126
00127 if (ptr->key == nro->key)
00128 goto duplicate;
00129
00130 while(ptr->next) {
00131 ptr = ptr->next;
00132 if (ptr->key == nro->key)
00133 goto duplicate;
00134 }
00135 ptr->next = nro;
00136 } else
00137 table->hash[nro->key % table->hash_size] = nro;
00138
00139 return 0;
00140
00141 duplicate:
00142 warning(0, "Duplicate number %ld!", nro->key);
00143 return -1;
00144 }
00145
00146
00147
00148
00149 static int numhash_add_number(Numhash *table, char *nro)
00150 {
00151 struct numhash_number *newnro;
00152
00153 if (table->number_total == table->table_size) {
00154 error(0, "Table limit %ld reached, cannot add %s!",
00155 table->table_size, nro);
00156 return -1;
00157 }
00158 newnro = &table->numbers[table->number_total++];
00159
00160 newnro->key = numhash_get_char_key(nro);
00161 newnro->next = NULL;
00162
00163 add_item(table, newnro);
00164
00165 return 0;
00166 }
00167
00168
00169
00170
00171 static Numhash *numhash_init(int max_numbers, int hash_size)
00172 {
00173 Numhash *ntable = NULL;
00174
00175 ntable = gw_malloc(sizeof(Numhash));
00176
00177 if (hash_size > 0)
00178 ntable->hash_size = hash_size;
00179 else if (hash_size == NUMHASH_AUTO_HASH) {
00180 int i;
00181 for(i=0 ; primes[i] > 0; i++) {
00182 ntable->hash_size = primes[i];
00183 if (ntable->hash_size > max_numbers)
00184 break;
00185 }
00186 } else {
00187 gw_free(ntable);
00188 return NULL;
00189 }
00190 ntable->hash = gw_malloc(ntable->hash_size * sizeof(struct numhash_number *));
00191 memset(ntable->hash, 0, sizeof(struct numhash_number *) * ntable->hash_size);
00192
00193 ntable->table_size = max_numbers;
00194 ntable->numbers = gw_malloc(ntable->table_size * sizeof(struct numhash_number));
00195
00196 ntable->number_total = 0;
00197
00198
00199
00200
00201
00202 if (sizeof(long)>=8) precision = 18;
00203 else if (sizeof(long)>=4) precision = 9;
00204
00205 return ntable;
00206 }
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216 int numhash_find_number(Numhash *table, Octstr *nro)
00217 {
00218 long key = numhash_get_key(nro);
00219 if (key<0) return key;
00220
00221 return numhash_find_key(table, key);
00222 }
00223
00224
00225 int numhash_find_key(Numhash *table, long key)
00226 {
00227 struct numhash_number *ptr;
00228
00229 ptr = table->hash[key % table->hash_size];
00230 while (ptr) {
00231 if (ptr->key == key) return 1;
00232 ptr = ptr->next;
00233 }
00234 return 0;
00235 }
00236
00237
00238
00239 long numhash_get_key(Octstr *nro)
00240 {
00241 long key;
00242 if (!nro) return -1;
00243
00244 if (octstr_len(nro) > precision)
00245 key = atoi(octstr_get_cstr(nro) + octstr_len(nro) -precision);
00246 else
00247 key = atoi(octstr_get_cstr(nro));
00248
00249 return key;
00250 }
00251
00252
00253 long numhash_get_char_key(char *nro)
00254 {
00255 int len;
00256 long key;
00257 if (!nro) return -1;
00258 len = strlen(nro);
00259
00260 if (len > precision)
00261 key = atoi(nro + len -precision);
00262 else
00263 key = atoi(nro);
00264
00265 return key;
00266 }
00267
00268
00269 void numhash_destroy(Numhash *table)
00270 {
00271 if (table == NULL)
00272 return;
00273 gw_free(table->numbers);
00274 gw_free(table->hash);
00275 gw_free(table);
00276 }
00277
00278
00279 double numhash_hash_fill(Numhash *table, int *longest)
00280 {
00281 int i, l, max = 0, tot = 0;
00282 struct numhash_number *ptr;
00283
00284 for (i=0; i < table->hash_size; i++)
00285 if (table->hash[i]) {
00286 tot++;
00287 ptr = table->hash[i];
00288 for (l=0; ptr->next; ptr = ptr->next)
00289 l++;
00290 if (l > max)
00291 max = l;
00292 }
00293
00294 if (longest != NULL)
00295 *longest = max;
00296
00297 return (double)(tot*100.0/(table->hash_size));
00298 }
00299
00300
00301 int numhash_size(Numhash *table)
00302 {
00303 return table->number_total;
00304 }
00305
00306
00307 Numhash *numhash_create(char *seek_url)
00308 {
00309 int loc, lines = 0;
00310 List *request_headers, *reply_headers;
00311 Octstr *url, *final_url, *reply_body;
00312 Octstr *type, *charset;
00313
00314 char *data, *ptr, numbuf[100];
00315 int status;
00316 Numhash *table;
00317
00318 url = octstr_create(seek_url);
00319 request_headers = gwlist_create();
00320 status = http_get_real(HTTP_METHOD_GET, url, request_headers, &final_url,
00321 &reply_headers, &reply_body);
00322 octstr_destroy(url);
00323 octstr_destroy(final_url);
00324 gwlist_destroy(request_headers, NULL);
00325
00326 if (status != HTTP_OK) {
00327 http_destroy_headers(reply_headers);
00328 octstr_destroy(reply_body);
00329 error(0, "Cannot load numhash!");
00330 return NULL;
00331 }
00332 http_header_get_content_type(reply_headers, &type, &charset);
00333 octstr_destroy(charset);
00334 http_destroy_headers(reply_headers);
00335
00336 if (octstr_str_compare(type, "text/plain") != 0) {
00337 octstr_destroy(reply_body);
00338 error(0, "Strange content type <%s> for numhash - expecting 'text/plain'"
00339 ", operatiom fails", octstr_get_cstr(type));
00340 octstr_destroy(type);
00341 return NULL;
00342 }
00343 octstr_destroy(type);
00344
00345 ptr = data = octstr_get_cstr(reply_body);
00346 while(*ptr) {
00347 if (*ptr == '\n') lines++;
00348 ptr++;
00349 }
00350 debug("numhash", 0, "Total %d lines in %s", lines, seek_url);
00351
00352 table = numhash_init(lines+10, NUMHASH_AUTO_HASH);
00353
00354
00355
00356 lines = 0;
00357
00358 while((ptr = strchr(data, '\n'))) {
00359 *ptr = '\0';
00360 while(*data != '\0' && isspace(*data))
00361 data++;
00362 if (*data != '#') {
00363 loc = 0;
00364 while (*data != '\0') {
00365 if (isdigit(*data))
00366 numbuf[loc++] = *data;
00367 else if (*data == ' ' || *data == '+' || *data == '-')
00368 ;
00369 else break;
00370 data++;
00371 }
00372 if (loc) {
00373 numbuf[loc] = '\0';
00374 numhash_add_number(table, numbuf);
00375 lines++;
00376 }
00377 else
00378 warning(0, "Corrupted line '%s'", data);
00379 }
00380 data = ptr+1;
00381 }
00382 octstr_destroy(reply_body);
00383
00384 info(0, "Read from <%s> total of %ld numbers", seek_url, table->number_total);
00385 return table;
00386 }
00387
See file LICENSE for details about the license agreement for using,
modifying, copying or deriving work from this software.