Commit f4ef873e authored by Chen Wei's avatar Chen Wei

tweak dict.c for performance and safety

1. choose shift-and-xor hash because it does not use multiply and is
   faster than bernstain during profiling
2. better boundary check for normalize_domain_name
3. change hash table % to bit &
parent 2692136d
...@@ -57,13 +57,14 @@ static char buf[MAXDNAME]; ...@@ -57,13 +57,14 @@ static char buf[MAXDNAME];
static struct dict_node *lookup_dictnode (struct dict_node *node, char *label); static struct dict_node *lookup_dictnode (struct dict_node *node, char *label);
static void add_dicttree (struct dict_node *node, struct dict_node *sub); static void add_dicttree (struct dict_node *node, struct dict_node *sub);
static void upsize_dicttree (struct dict_node *np); static void upsize_dicttree (struct dict_node *np);
static inline void normalize_domain_name (char *dst, char *src, int len);
/* hash function 1 for double hashing /* hash function 1 for double hashing
* 32 bit Fowler/Noll/Vo hash */ * 32 bit Fowler/Noll/Vo hash */
static inline uint32_t fnv_32_hash (char *str) static inline uint32_t dblhash_1 (char *key)
{ {
uint32_t hval = FNV1_32A_INIT; uint32_t hval = FNV1_32A_INIT;
unsigned char *s = (unsigned char *) str; unsigned char *s = (unsigned char *) key;
while (*s) while (*s)
{ {
...@@ -76,41 +77,49 @@ static inline uint32_t fnv_32_hash (char *str) ...@@ -76,41 +77,49 @@ static inline uint32_t fnv_32_hash (char *str)
} }
/* hash function 2 for double hashing /* hash function 2 for double hashing
* the modified Bernstein hash, return an odd number */ * modified Shift-Add-XOR hash, return an odd number */
static inline unsigned int bernstein_odd (char *key) static inline uint32_t dblhash_2 (char *key)
{ {
uint32_t h = 0;
unsigned char *s = (unsigned char *) key; unsigned char *s = (unsigned char *) key;
unsigned int h = 0;
while (*s) while (*s)
h = 33 * h ^ *s++; h ^= (h << 5) + (h >> 2) + *s++;
return h % 2 ? h : h + 1; return h % 2 ? h : h + 1;
} }
/* convert domain to lower cases, remove leading blank, leading and trailing /* convert domain to lower cases, remove leading blank, leading and trailing
* dot, string end with \0 */ * dot, string end with \0 */
static inline void memcpy_lower (void *dst, void *src, int len) static inline void normalize_domain_name (char *d, char *s, int len)
{ {
char *d = (char *) dst;
char *s = (char *) src;
int i; int i;
/* skip leading dot and blank */ /* skip leading dot and blank */
for ( ; *s != '\0' && (*s == '.' || *s == '\t' || *s == ' '); s++ ); for ( ; *s != '\0' && (*s == '.' || *s == '\t' || *s == ' '); s++)
;
for (i = 0; i < len; i++, d++, s++) for (i = 0; i < len && *s != '\0'; i++, s++)
{ {
if (*s >= 'A' && *s <= 'Z') if (*s >= 'A' && *s <= 'Z')
*d = *s + 'a' - 'A'; d[i] = *s + 'a' - 'A';
else else
*d = *s; d[i] = *s;
} }
if (*--d == '.') /* should not happen since the source string limited to MAXDNAME */
*d = '\0'; if (i == len)
i--;
for ( ; d[i] == '.'; i--)
;
if (i < (len - 1))
d[++i] = '\0';
else else
*++d = '\0'; /* something wrong with the source string(domain name), it exceeds
* MAXDNAME, terminate the dst string with '\0' anyway */
d[i] = '\0';
} }
struct dict_node * init_sub_dictnode (struct dict_node *node) struct dict_node * init_sub_dictnode (struct dict_node *node)
...@@ -147,8 +156,8 @@ struct dict_node * new_dictnode (char *label, int label_len) ...@@ -147,8 +156,8 @@ struct dict_node * new_dictnode (char *label, int label_len)
else else
{ {
node->label = strdup (label); node->label = strdup (label);
node->h1 = fnv_32_hash (label); node->h1 = dblhash_1 (label);
node->h2 = bernstein_odd (label); node->h2 = dblhash_2 (label);
} }
node->sub_count = 0; node->sub_count = 0;
...@@ -203,7 +212,8 @@ static void add_dicttree (struct dict_node *node, struct dict_node *sub) ...@@ -203,7 +212,8 @@ static void add_dicttree (struct dict_node *node, struct dict_node *sub)
dh = sub->h1; dh = sub->h1;
while (1) while (1)
{ {
idx = dh % node->sub_slots; /* eq to dh % node->sub_slots, since sub_slots is power of 2*/
idx = dh & (node->sub_slots - 1);
if (node->sub[idx] == NULL) if (node->sub[idx] == NULL)
{ {
node->sub[idx] = sub; node->sub[idx] = sub;
...@@ -261,9 +271,9 @@ static struct dict_node *lookup_dictnode (struct dict_node *node, char *label) ...@@ -261,9 +271,9 @@ static struct dict_node *lookup_dictnode (struct dict_node *node, char *label)
return NULL; return NULL;
} }
dh = h1 = fnv_32_hash (label); dh = h1 = dblhash_1 (label);
h2 = bernstein_odd (label); h2 = dblhash_2 (label);
idx = dh % node->sub_slots; idx = dh & (node->sub_slots - 1);
while ((np = node->sub[idx]) != NULL) while ((np = node->sub[idx]) != NULL)
{ {
if (np->h1 == h1 && np->h2 == h2) if (np->h1 == h1 && np->h2 == h2)
...@@ -273,7 +283,7 @@ static struct dict_node *lookup_dictnode (struct dict_node *node, char *label) ...@@ -273,7 +283,7 @@ static struct dict_node *lookup_dictnode (struct dict_node *node, char *label)
} }
dh += h2; dh += h2;
idx = dh % node->sub_slots; idx = dh & (node->sub_slots - 1);
} }
return NULL; return NULL;
...@@ -286,21 +296,14 @@ struct dict_node * match_domain(struct dict_node *root, char *domain) ...@@ -286,21 +296,14 @@ struct dict_node * match_domain(struct dict_node *root, char *domain)
{ {
char *labels[MAXLABELS]; char *labels[MAXLABELS];
int i, label_num; int i, label_num;
int len = strlen (domain); int len = (int) sizeof(buf);
struct dict_node *node, *res; struct dict_node *node, *res;
if (root == NULL) if (root == NULL)
return NULL; return NULL;
memset(buf, 0, sizeof(buf)); memset(buf, 0, sizeof(buf));
memcpy_lower (buf, domain, len); normalize_domain_name (buf, domain, len);
/*
remove the trailing dot, make the last label top domain
if (buf[len - 1] == '.')
buf[len - 1] = '\0';
else
buf[len] = '\0';
*/
for (i = 0; i < MAXLABELS; i++) for (i = 0; i < MAXLABELS; i++)
labels[i] = NULL; labels[i] = NULL;
...@@ -309,7 +312,7 @@ struct dict_node * match_domain(struct dict_node *root, char *domain) ...@@ -309,7 +312,7 @@ struct dict_node * match_domain(struct dict_node *root, char *domain)
labels[label_num++] = &buf[0]; labels[label_num++] = &buf[0];
/* split domain name into labels */ /* split domain name into labels */
for (i = 0; buf[i] != '\0'; i++) for (i = 0; i < len && buf[i] != '\0'; i++)
{ {
if (buf[i] == '.') if (buf[i] == '.')
{ {
...@@ -346,11 +349,11 @@ struct dict_node * lookup_domain (struct dict_node *root, char *domain) ...@@ -346,11 +349,11 @@ struct dict_node * lookup_domain (struct dict_node *root, char *domain)
{ {
char *labels[MAXLABELS]; char *labels[MAXLABELS];
int i, label_num; int i, label_num;
int len = strlen (domain); int len = (int) sizeof(buf);
struct dict_node *node; struct dict_node *node;
memset(buf, 0, sizeof(buf)); memset(buf, 0, sizeof(buf));
memcpy_lower (buf, domain, len); normalize_domain_name (buf, domain, len);
for (i = 0; i < MAXLABELS; i++) for (i = 0; i < MAXLABELS; i++)
labels[i] = NULL; labels[i] = NULL;
...@@ -359,7 +362,7 @@ struct dict_node * lookup_domain (struct dict_node *root, char *domain) ...@@ -359,7 +362,7 @@ struct dict_node * lookup_domain (struct dict_node *root, char *domain)
labels[label_num++] = &buf[0]; labels[label_num++] = &buf[0];
for (i = 0; buf[i] != '\0'; i++) for (i = 0; i < len && buf[i] != '\0'; i++)
{ {
if (buf[i] == '.') if (buf[i] == '.')
{ {
...@@ -383,11 +386,11 @@ struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain) ...@@ -383,11 +386,11 @@ struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain)
{ {
char *labels[MAXLABELS]; char *labels[MAXLABELS];
int i, label_num; int i, label_num;
int len = strlen (domain); int len = (int) sizeof(buf);
struct dict_node *node; struct dict_node *node;
memset(buf, 0, sizeof(buf)); memset(buf, 0, sizeof(buf));
memcpy_lower (buf, domain, len); normalize_domain_name (buf, domain, len);
for (i = 0; i < MAXLABELS; i++) for (i = 0; i < MAXLABELS; i++)
labels[i] = NULL; labels[i] = NULL;
...@@ -395,7 +398,7 @@ struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain) ...@@ -395,7 +398,7 @@ struct dict_node *add_or_lookup_domain (struct dict_node *root, char *domain)
label_num = 0; label_num = 0;
labels[label_num++] = &buf[0]; labels[label_num++] = &buf[0];
for (i = 0; buf[i] != '\0'; i++) for (i = 0; i < len && buf[i] != '\0'; i++)
{ {
if (buf[i] == '.') if (buf[i] == '.')
{ {
......
...@@ -525,8 +525,8 @@ struct ipsets { ...@@ -525,8 +525,8 @@ struct ipsets {
struct dict_node { struct dict_node {
char *label; /* key */ char *label; /* key */
void *obj; /* the value, can point to anything */ void *obj; /* the value, can point to anything */
uint32_t h1; /* from hash function 1, fnv_32_hash */ uint32_t h1; /* from hash function 1 */
uint32_t h2; /* from hash function 2, bernstein_odd */ uint32_t h2; /* from hash function 2 */
unsigned sub_slots; /* size of hash table sub */ unsigned sub_slots; /* size of hash table sub */
int sub_count; /* items stored in sub */ int sub_count; /* items stored in sub */
int sub_loadmax; /* max items stored before upsize sub */ int sub_loadmax; /* max items stored before upsize sub */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment