Ferramentas de Utilizador

Ferramentas de Site


dev_geral:c:snippet:hash_table_c

Implementação de Hash Table em C

Aqui está uma implementação de hash tables. Usa árvores para resolver colisões. No caso de colisão leva, nas piores das hipóteses O(lg(N)), onde N é o número de elementos "colididos". A tabela tem auto-expansão (que pode ser forçada), baseada na carga.

O código seguinte depende de xalloc e list, ambos nesta wiki.

Ficheiro: hashtable.h

/*
 *  GPL
 *
 *  Written by Diogo Sousa aka orium
 *  Copyright (C) 2008 Diogo Sousa
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
#ifndef HASHTABLE_H_
#define HASHTABLE_H_
 
#define HASH_TABLE_DEBUG 0
 
#include <stdint.h>
#include <sys/types.h>
#include <stdbool.h>
#include "list.h"
 
#define DEFAULT_RESIZE_LOAD 0.75
#define DEFAULT_AFTER_RESIZE_LOAD 0.40
 
struct hash_element
{
	void *data;
	uint32_t hash;
 
	struct hash_table *hash_table;
};
 
struct hash_entry
{
	void *tree; /* Tree of struct hash_element */
 
	int count; /* Elements in the tree */
};
 
struct hash_table
{
	struct hash_entry **table;
 
	size_t size;
	size_t used;
 
	float resize_load; /* Resize table when load reach this */
	float after_resize_load; /* Load after resize */
 
	uint32_t (*hash_fun)(void *, size_t);
	int (*cmp_fun)(const void *, const void *);
 
	void *foo;
	list del_list;
};
 
extern void hash_table_init(struct hash_table *, size_t,
			    uint32_t (*)(void *, size_t),
			    int (*)(const void *, const void *));
extern void hash_table_set_resize_load(struct hash_table *, float);
extern void hash_table_set_load_after_resize(struct hash_table *, float);
extern int hash_table_get_size(struct hash_table *);
extern int hash_table_get_used(struct hash_table *);
extern int hash_table_get_collisions(struct hash_table *);
extern float hash_table_get_load(struct hash_table *);
extern float hash_table_get_collision_rate(struct hash_table *);
extern void hash_table_insert(struct hash_table *, void *, size_t);
extern void *hash_table_search(struct hash_table *, void *, size_t);
extern int hash_table_delete(struct hash_table *, void *, size_t);
extern void hash_table_walk(struct hash_table *, void (*)(void *));
extern void hash_table_free(struct hash_table *, void (*)(void *));
extern void hash_table_print_info(struct hash_table *);
 
#if HASH_TABLE_DEBUG
extern void hash_table_assert_integrity(struct hash_table *);
#endif
 
#endif

Ficheiro: hashtable.c

/*
 *  GPL
 *
 *  Written by Diogo Sousa aka orium
 *  Copyright (C) 2008 Diogo Sousa
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
#define _GNU_SOURCE
 
#include <stdio.h>
#include <stdlib.h>
#include <search.h>
#include <math.h>
#include "assert.h"
#include "xalloc.h"
#include "hashtable.h"
 
static void realloc_hash_table(struct hash_table *, size_t);
static void set_hash_table_empty(struct hash_table *, int, int);
static void set_hash_element(struct hash_table *, struct hash_element *);
static int hash_data_cmp(const void *, const void *);
static void hash_tree_walk(const void *, const VISIT, const int);
static void hash_tree_free_node(void *);
static struct hash_element *get_hash_element(struct hash_table *, void *, 
					     size_t);
static void hash_tree_walk_table_resize(const void *, const VISIT, const int);
#if HASH_TABLE_DEBUG
static void hash_tree_walk_assert_integrity(const void *, const VISIT,
					    const int);
#endif
static void delete_elements_from_tree(struct hash_table *, void **);
 
static void 
realloc_hash_table(struct hash_table *hash, size_t size)
{
	hash->table=xrealloc(hash->table,sizeof(*hash->table)*size);
	hash->size=size;
 
#if HASH_TABLE_DEBUG
	printf("Resized hash to %dn",size);
#endif
}
 
static void 
set_hash_table_empty(struct hash_table *hash, int b, int e)
{
	for (; b <= e; b++)
		hash->table[b]=NULL;
}
 
void 
hash_table_init(struct hash_table *hash, size_t size,
		uint32_t (*hash_fun)(void *, size_t),
		int (*cmp_fun)(const void *, const void *))
{
	hash->table=NULL;
	realloc_hash_table(hash,size);
	set_hash_table_empty(hash,0,hash->size-1);
	hash->used=0;
	hash->resize_load=DEFAULT_RESIZE_LOAD;
	hash->after_resize_load=DEFAULT_AFTER_RESIZE_LOAD;
 
	hash->hash_fun=hash_fun;
	hash->cmp_fun=cmp_fun;
 
	hash->foo=NULL;
}
 
void 
hash_table_set_resize_load(struct hash_table *hash, float load)
{
	hash->resize_load=load;
}
 
void 
hash_table_set_load_after_resize(struct hash_table *hash, float load)
{
	hash->after_resize_load=load;
}
 
int 
hash_table_get_size(struct hash_table *hash)
{
	return (int)hash->size;
}
 
int 
hash_table_get_collisions(struct hash_table *hash)
{
	int c=0;
	int i;
 
	for (i=0; i < hash->size; i++)
		if (hash->table[i] != NULL)
			c+=(hash->table[i]->count > 1) ? 
				hash->table[i]->count : 0;
 
	return c;
}
 
float 
hash_table_get_load(struct hash_table *hash)
{
	if (!hash->size)
		return 1.0;
 
	return (float)hash->used/(float)hash->size;
}
 
float 
hash_table_get_collision_rate(struct hash_table *hash)
{
	if (!hash->used)
		return 0.0;
 
	return (float)hash_table_get_collisions(hash)/(float)hash->used;
}
 
int 
hash_table_get_used(struct hash_table *hash)
{
	return (int)hash->used;
}
 
static int
hash_data_cmp(const void *p1, const void *p2)
{
	const struct hash_element *e1=p1;
	const struct hash_element *e2=p2;
 
	return e1->hash_table->cmp_fun(e1->data,e2->data);
}
 
static void
set_hash_element(struct hash_table *hash, struct hash_element *element)
{
	int p;
 
	p=element->hash%hash->size;
 
	if (hash->table[p] == NULL)
	{
		hash->table[p]=xmalloc(sizeof(*hash->table[p]));
		hash->table[p]->tree=NULL;
		hash->table[p]->count=0;
	}
 
	element->hash_table=hash;
 
	if (tsearch(element,&hash->table[p]->tree,hash_data_cmp) == NULL)
		xmemerror();
 
	hash->table[p]->count++;
 
#if HASH_TABLE_DEBUG
	printf("0x%x: hash %lu: Now in position %dn",
	       (unsigned int)element->data,(unsigned long)element->hash,p);
#endif
}
 
static void 
hash_tree_walk_table_resize(const void *nodep, const VISIT which,
			    const int depth)
{
	if (which == postorder
	    || which == leaf)
	{
		struct hash_element *element;
		struct hash_table *hash;
		int p;
 
		element=*(struct hash_element **)nodep;
		hash=element->hash_table;
		p=*(int *)hash->foo;
 
		if (element->hash%hash->size != p)
		{
			list_add(&hash->del_list,element);
 
			set_hash_element(hash,element);
		}
	}
}
 
static void 
delete_elements_from_tree(struct hash_table *hash, void **tree)
{
	list_node *i;
 
	for (i=list_head(&hash->del_list); i != NULL; i=list_node_next(i))
		tdelete(list_node_data_ptr(i,struct hash_element),tree,
			hash_data_cmp);
}
 
/* Takes O(N)
 */
int 
hash_table_resize(struct hash_table *hash, float new_load)
{
	int i;
	int size;
 
	if (new_load < hash->resize_load)
		return -1;
 
	if (!hash_table_get_used(hash))
		return -1;
 
	while (hash->foo != NULL)
		;
 
	size=hash->size;
 
	realloc_hash_table(hash,(size_t)ceilf((float)hash->used/new_load));
 
	set_hash_table_empty(hash,size,hash->size-1);
 
	for (i=0; i < size; i++)
		if (hash->table[i] != NULL)
		{
			hash->foo=&i;
 
			list_init(&hash->del_list);
 
			twalk(hash->table[i]->tree,hash_tree_walk_table_resize);
 
			delete_elements_from_tree(hash,&hash->table[i]->tree);
 
			hash->table[i]->count-=list_size(&hash->del_list);
 
			list_free(&hash->del_list,NULL);
 
			if (hash->table[i]->tree == NULL)
			{
				free(hash->table[i]);
				hash->table[i]=NULL;
			}
		}
 
	hash->foo=NULL;
 
#if HASH_TABLE_DEBUG
	hash_table_assert_integrity(hash);
#endif
 
	return 0;
}
 
void 
hash_table_insert(struct hash_table *hash, void *data, size_t size)
{
	struct hash_element *element;
 
	element=xmalloc(sizeof(*element));
 
	element->data=data;
 
	element->hash=hash->hash_fun(data,size);
 
	set_hash_element(hash,element);
 
	hash->used++;
 
	if (hash->resize_load > 0.0
	    && hash_table_get_load(hash) >= hash->resize_load)
		hash_table_resize(hash,hash->resize_load);
}
 
static struct hash_element *
get_hash_element(struct hash_table *hash, void *needle, size_t size)
{
	int p;
	struct hash_element tree_needle;
	struct hash_element **found;
 
	p=hash->hash_fun(needle,size)%hash->size;
 
	if (hash->table[p] == NULL)
		return NULL;
 
	tree_needle.data=needle;
	tree_needle.hash_table=hash;
 
	found=(struct hash_element **)tfind(&tree_needle,&hash->table[p]->tree,
					    hash_data_cmp);
 
	return (found == NULL) ? NULL : *found;
}
 
void *
hash_table_search(struct hash_table *hash, void *needle, size_t size)
{
	struct hash_element *found;
 
	found=get_hash_element(hash,needle,size);
 
	return (found == NULL) ? NULL : found->data;
}
 
int
hash_table_delete(struct hash_table *hash, void *needle, size_t size)
{
	int p;
	struct hash_element tree_needle;
	struct hash_element *found;
 
	p=hash->hash_fun(needle,size)%hash->size;
 
	if (hash->table[p] == NULL)
		return -1;
 
	tree_needle.data=needle;
	tree_needle.hash_table=hash;
 
	found=get_hash_element(hash,needle,size);
 
	if (found == NULL)
		return -1;
 
	if (tdelete(&tree_needle,&hash->table[p]->tree,hash_data_cmp) == NULL)
		return -1;
 
	hash->table[p]->count--;
	hash->used--;
 
	free(found);
 
	if (hash->table[p]->tree == NULL)
	{
		assert(!hash->table[p]->count);
		free(hash->table[p]);
		hash->table[p]=NULL;
	}
 
#if HASH_TABLE_DEBUG
	hash_table_assert_integrity(hash);
#endif
 
	return 0;
}
 
static void 
hash_tree_walk(const void *nodep, const VISIT which, const int depth)
{
	if (which == postorder || which == leaf)
	{
		const struct hash_element **node;
		void (*action)(void *);
 
		node=(const struct hash_element **)nodep;
		action=(*node)->hash_table->foo;
 
		action((*node)->data);
	}
}
 
void 
hash_table_walk(struct hash_table *hash, void (*action)(void *))
{
	int i;
 
	/* For thread safety */
	while (hash->foo != NULL)
		;
 
	hash->foo=action;
 
	for (i=0; i < hash->size; i++)
		if (hash->table[i] != NULL)
			twalk(hash->table[i]->tree,hash_tree_walk);
 
	hash->foo=NULL;
}
 
static void
hash_tree_free_node(void *node)
{
	struct hash_element *element=node;
	void (*action)(void *)=element->hash_table->foo;
 
	if (action != NULL)
		action(element->data);
 
	free(element);
}
 
void 
hash_table_free(struct hash_table *hash, void (*action)(void *))
{
	int i;
 
	/* For thread safety */
	while (hash->foo != NULL)
		;
 
	hash->foo=action;
 
	for (i=0; i < hash->size; i++)
		if (hash->table[i] != NULL)
		{
			tdestroy(hash->table[i]->tree,hash_tree_free_node);
			free(hash->table[i]);
		}
 
	free(hash->table);
}
 
void 
hash_table_print_info(struct hash_table *hash)
{
	int collisions;
 
	collisions=hash_table_get_collisions(hash);
 
	printf("Size:           %dn",hash_table_get_size(hash));
	printf("Used:           %dn",hash_table_get_used(hash));
	printf("Load:           %.3fn",hash_table_get_load(hash));
	printf("Collisions:     %dn",collisions);
	printf("Collision Rate: %.3fn",(float)collisions/(float)hash->used);
}
 
 
#if HASH_TABLE_DEBUG
 
static void 
hash_tree_walk_assert_integrity(const void *nodep, const VISIT which,
				const int depth)
{
	if (which == postorder || which == leaf)
	{
		const struct hash_element *element;
 
		element=*(const struct hash_element **)nodep;
 
		if (element->hash%element->hash_table->size !=
		    *(int *)element->hash_table->foo)
		{
			fprintf(stderr,"HASH INTEGRITY TEST FAILDn");		
			abort();
		}
	}
}
 
void
hash_table_assert_integrity(struct hash_table *hash)
{
	int i;
	int used=0;
 
	/* For thread safety */
	while (hash->foo != NULL)
		;
 
	for (i=0; i < hash->size; i++)
		if (hash->table[i] != NULL)
		{
			hash->foo=&i;
			twalk(hash->table[i]->tree,
			      hash_tree_walk_assert_integrity);
			used+=hash->table[i]->count;
		}
 
	if (used != hash->used)
	{
		fprintf(stderr,"HASH INTEGRITY TEST FAILDn");
		abort();
	}
 
	fprintf(stderr,"HASH OKn");
 
	hash->foo=NULL;
}
 
#endif

Exemplo de uso

FIXME Comentar o código do exemplo para o utilizador perceber como usar a hash table.

/*
 *  GPL
 *
 *  Written by Diogo Sousa aka orium
 *  Copyright (C) 2008 Diogo Sousa
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
#include <stdio.h>
#include "hashtable.h"
 
struct integer
{
        int n;
        char *english;
};
 
uint32_t jenkins_one_at_a_time_hash(void *data, size_t key_len)
{
	struct integer *myint=data;
	void *key=&myint->n;
	uint32_t hash=0;
	size_t i;
 
	/* We don't use the key_len arg */
	key_len=sizeof(myint->n);
 
	for (i=0; i < key_len; i++) 
	{
		hash+=*((unsigned char *)key+i);
		hash+=hash<<10;
		hash^=hash>>6;
	}
 
	hash+=hash<<3;
	hash^=hash>>11;
	hash+=hash<<15;
 
	return hash;
}
 
bool cmp(void *p1, void *p2)
{
	struct integer *i1=p1;
	struct integer *i2=p2;
 
	return i1->n == i2->n;
}
 
void my_walk_action(void *data)
{
	struct integer *i=data;
 
	printf("%d: %sn",i->n,i->english);
}
 
int main(void)
{
	struct hash_table table;
	struct integer i1={1,"one"};
	struct integer i2={2,"two"};
	struct integer i3={3,"three"};  
	struct integer i4={4,"four"};
	struct integer i5={5,"five"};
	struct integer *lost_i5;
 
	init_hash_table(&table,32,jenkins_one_at_a_time_hash,cmp);
 
	hash_table_insert(&table,&i1,0);
	hash_table_insert(&table,&i2,0);
	hash_table_insert(&table,&i3,0);
	hash_table_insert(&table,&i4,0);
	hash_table_insert(&table,&i5,0);
 
	lost_i5=hash_table_search(&table,&i5,0);
 
	printf("%d: %sn",lost_i5->n,lost_i5->english);
 
	hash_table_delete(&table,&i5,0);	
 
	lost_i5=hash_table_search(&table,&i5,0);
 
	hash_table_walk(&table,my_walk_action);
 
	hash_table_free(&table,NULL);
 
	return 0;
}
dev_geral/c/snippet/hash_table_c.txt · Esta página foi modificada pela última vez em: 2014/09/02 12:39 (Edição externa)