feisty meow concerns codebase  2.140
hash_table.h
Go to the documentation of this file.
1 #ifndef HASH_TABLE_CLASS
2 #define HASH_TABLE_CLASS
3 
4 /*****************************************************************************\
5 * *
6 * Name : hash_table *
7 * Author : Chris Koeritz *
8 * *
9 *******************************************************************************
10 * Copyright (c) 2001-$now By Author. This program is free software; you can *
11 * redistribute it and/or modify it under the terms of the GNU General Public *
12 * License as published by the Free Software Foundation; either version 2 of *
13 * the License or (at your option) any later version. This is online at: *
14 * http://www.fsf.org/copyleft/gpl.html *
15 * Please send any updates to: fred@gruntose.com *
16 \*****************************************************************************/
17 
18 #include "amorph.h"
19 
20 #include <basis/array.h>
21 #include <basis/byte_array.h>
22 #include <basis/contracts.h>
23 #include <basis/enhance_cpp.h>
24 #include <basis/functions.h>
25 
26 #include <math.h>
27 
28 namespace structures {
29 
30 const int HASH_FUDGE_FACTOR = 10;
31  // we fudge the number of elements requested by adding a little chunk, just in case.
32  // since we're going to take the power of 2 that comes closest, we can afford to be
33  // a little generous, rather than being too miserly.
34 
35 // forward.
36 template <class key, class contents> class internal_hash_array;
37 
39 
45 class hashing_algorithm : public virtual basis::clonable
46 {
47 public:
48  virtual basis::un_int hash(const void *key_data, int key_length) const = 0;
50 
56  virtual hashing_algorithm *clone() const = 0;
58 };
59 
61 
63 
70 template <class key_type, class contents>
71  // the key_type must support valid copy constructor, assignment operator and
72  // equality operators. the contents need not support any special operations;
73  // it is always considered as just a pointer.
74 class hash_table : public virtual basis::nameable
75 {
76 public:
79 
85  virtual ~hash_table();
87 
88  DEFINE_CLASS_NAME("hash_table");
89 
92 
94  enum outcomes {
95  IS_NEW = basis::common::IS_NEW,
96  EXISTING = basis::common::EXISTING
97  };
98 
101 
102  int elements() const;
104 
106  int estimated_elements() const { return c_estim_elements; }
108 
109  basis::outcome add(const key_type &key, contents *to_store);
111 
119  basis::outcome fast_dangerous_add(const key_type &key, contents *to_store);
121 
124  bool find(const key_type &key, contents * &item_found) const;
126 
130  contents *find(const key_type &key) const
131  { contents *c = NULL_POINTER; return find(key, c)? c : NULL_POINTER; }
133 
134  contents *acquire(const key_type &key);
136 
140  bool zap(const key_type &key);
142 
144  void reset();
146 
147  typedef bool apply_function(const key_type &key, contents &current,
148  void *data_link);
150 
155  void apply(apply_function *to_apply, void *data_link);
157 
163  basis::outcome add(key_type *key, contents *to_store, bool check_dupes = true);
165 
172  bool verify() const;
174 
176 private:
177  int c_estim_elements;
178  hashing_algorithm *_hasher;
180  int _last_iter;
182 
183  hash_table(const hash_table &to_copy);
185  hash_table &operator =(const hash_table &to_copy);
187 
188 public:
191 };
192 
194 
196 
202 template <class key_type, class contents>
204  const hash_table<key_type, contents> &source);
205 
207 
208 // implementations for longer methods below....
209 
210 // this is a very special micro-managed class. it holds two pointers which
211 // it neither creates nor destroys. thus all interaction with it must be
212 // careful about removing these objects at the appropriate time. we don't
213 // want automatic memory management since we want a cheap copy on the items
214 // in the buckets.
215 
216 template <class key_type, class contents>
217 class hash_wrapper : public virtual basis::root_object
218 {
219 public:
220  key_type *_id;
221  contents *_data;
222 
223  hash_wrapper(key_type *id = NULL_POINTER, contents *data = NULL_POINTER)
224  : _id(id), _data(data) {}
225 };
226 
228 
229 template <class key_type, class contents>
230 class bucket
231  : public basis::array<hash_wrapper<key_type, contents> >,
232  public virtual basis::root_object
233 {
234 public:
235  bucket() : basis::array<hash_wrapper<key_type, contents> >(0, NULL_POINTER,
236  basis::byte_array::SIMPLE_COPY | basis::byte_array::EXPONE
237  | basis::byte_array::FLUSH_INVISIBLE) {}
238 
239  int find(const key_type &to_find) {
240  for (int i = 0; i < this->length(); i++) {
241  key_type *curr_key = this->get(i)._id;
242 //hmmm: if curr key is not set, is that a logic error? it seems like we
243 // are seeing the potential for this.
244  if (curr_key && (to_find == *curr_key))
245  return i;
246  }
247  return basis::common::NOT_FOUND;
248  }
249 };
250 
252 
253 template <class key_type, class contents>
254 class internal_hash_array : public amorph<bucket<key_type, contents> >
255 {
256 public:
257  internal_hash_array(int estimated_elements)
258  : amorph<bucket<key_type, contents> >
259  (hash_table<key_type, contents>::calculate_num_slots(estimated_elements)) {}
260 };
261 
263 
264 template <class key_type, class contents>
265 hash_table<key_type, contents>::hash_table(const hashing_algorithm &hasher, int estimated_elements)
266 : c_estim_elements(estimated_elements),
267  _hasher(hasher.clone()),
268  _table(new internal_hash_array<key_type, contents>(estimated_elements)),
269  _last_iter(0)
270 {}
271 
272 template <class key_type, class contents>
274 {
275 #ifdef EXTREME_CHECKING
276  FUNCDEF("destructor");
277  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
278 #endif
279  reset();
280  basis::WHACK(_table);
281  basis::WHACK(_hasher);
282 }
283 
284 template <class key_type, class contents>
286 {
287 //printf("[ elems wanted = %d, fudge adjusted = %d\n", estimated_elements, estimated_elements + HASH_FUDGE_FACTOR);
288  int log_2_truncated = int(log(float(estimated_elements + HASH_FUDGE_FACTOR)) / log(2.0));
289 //printf(" log 2 of elems, truncated = %d\n", log_2_truncated);
290  int slots_needed_for_elems = (int)pow(2.0, double(log_2_truncated + 1));
291 //printf(" slots needed = %d]\n", slots_needed_for_elems );
292  return slots_needed_for_elems;
293 }
294 
295 // the specialized copy operation.
296 template <class key_type, class contents>
298  const hash_table<key_type, contents> &source)
299 {
300 #ifdef EXTREME_CHECKING
301  FUNCDEF("copy_hash_table");
302  if (!source.verify())
303  deadly_error(class_name(), func, "source state did not verify.");
304 #endif
305  target.reset();
306  for (int i = 0; i < source.table_access().elements(); i++) {
307  bucket<key_type, contents> *buck = source.table_access().borrow(i);
308  if (!buck) continue;
309  for (int j = 0; j < buck->length(); j++) {
310  target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
311  }
312  }
313 #ifdef EXTREME_CHECKING
314  if (!target.verify())
315  deadly_error(class_name(), func, "target state did not verify.");
316 #endif
317  #undef class_name
318 }
319 
320 template <class key_type, class contents>
322 {
323 #ifdef EXTREME_CHECKING
324  FUNCDEF("reset");
325  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
326 #endif
327  // iterate over the list whacking the content items in the buckets.
328  for (int i = 0; i < _table->elements(); i++) {
329  bucket<key_type, contents> *buck = _table->acquire(i);
330  if (!buck) continue;
331  for (int j = 0; j < buck->length(); j++) {
332  basis::WHACK((*buck)[j]._data); // eliminate the stored data.
333  basis::WHACK((*buck)[j]._id); // eliminate the stored key.
334  buck->zap(j, j); // remove the element.
335  j--; // skip back before whack happened.
336  }
337  basis::WHACK(buck);
338  }
339 #ifdef EXTREME_CHECKING
340  if (!verify())
341  deadly_error(class_name(), func, "state did not verify afterwards.");
342 #endif
343 }
344 
345 template <class key_type, class contents>
347 {
348  for (int i = 0; i < _table->elements(); i++) {
349  const bucket<key_type, contents> *buck = _table->borrow(i);
350  if (!buck) continue; // that's acceptable.
351  for (int j = 0; j < buck->length(); j++) {
352  const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
353  if (!wrap._data) {
354 // program_wide_logger::get().log(a_sprintf("hash table: no data segment at position %d.", j));
355  return false;
356  }
357  if (!wrap._id) {
358 // program_wide_logger::get().log(a_sprintf("hash table: no identifier at position %d.", j));
359  return false;
360  }
361  }
362  }
363  return true;
364 }
365 
366 template <class key_type, class contents>
369 { return *_table; }
370 
371 template <class key_type, class contents>
372 void hash_table<key_type, contents>::rehash(int estimated_elements)
373 {
374 #ifdef EXTREME_CHECKING
375  FUNCDEF("rehash");
376  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
377 #endif
378  typedef bucket<key_type, contents> buckie;
379  hash_table<key_type, contents> new_hash(*_hasher, estimated_elements);
380  // this is the new table that will be used.
381 
382  // scoot through the existing table so we can move items into the new one.
383  for (int i = 0; i < _table->elements(); i++) {
384  buckie *b = _table->borrow(i); // look at the current element.
385  if (!b) continue; // nothing here, keep going.
386  for (int j = b->length() - 1; j >= 0; j--) {
387  key_type *k = b->use(j)._id;
388  contents *c = b->use(j)._data;
389  new_hash.add(k, c);
390  }
391  b->reset();
392  // clean out any items in the buckets here now that they've moved.
393  }
394 
395  // now flip the contents of the new guy into "this".
396  _table->reset(); // toss previous stuff.
397  _table->adjust(new_hash._table->elements());
398  for (int q = 0; q < new_hash._table->elements(); q++)
399  _table->put(q, new_hash._table->acquire(q));
400  // reset other data members.
401  c_estim_elements = new_hash.c_estim_elements;
402  _last_iter = 0;
403 #ifdef EXTREME_CHECKING
404  if (!verify())
405  deadly_error(class_name(), func, "state did not verify afterwards.");
406 #endif
407 }
408 
409 template <class key_type, class contents>
411  contents *to_store)
412 { return add(new key_type(key), to_store); }
413 
414 template <class key_type, class contents>
416  contents *to_store, bool check_dupes)
417 {
418 #ifdef EXTREME_CHECKING
419  FUNCDEF("add");
420  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
421 #endif
422  // get a hash value.
423  basis::un_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
424  // make the value appropriate for our table.
425  hashed = hashed % _table->elements();
426  // see if the key already exists there.
427  bucket<key_type, contents> *buck = _table->borrow(hashed);
428  if (!buck) {
429  // this slot doesn't exist yet.
430  buck = new bucket<key_type, contents>;
431  _table->put(hashed, buck);
432  }
433  if (!check_dupes) {
434  // we aren't even going to check for its existence.
435  *buck += hash_wrapper<key_type, contents>(key, to_store);
436  return IS_NEW;
437  }
438  int indy = buck->find(*key);
439  if (basis::negative(indy)) {
440  // that value was not seen yet, so we're adding a new entry.
441  *buck += hash_wrapper<key_type, contents>(key, to_store);
442 #ifdef EXTREME_CHECKING
443  if (!verify())
444  deadly_error(class_name(), func, "state did not verify afterwards.");
445 #endif
446  return IS_NEW;
447  }
448  // that key already existed, so we'll re-use its slot with the new data.
449  basis::WHACK((*buck)[indy]._data);
450  basis::WHACK(key); // toss since we're not using.
451  (*buck)[indy]._data = to_store;
452 #ifdef EXTREME_CHECKING
453  if (!verify())
454  deadly_error(class_name(), func, "state did not verify afterwards.");
455 #endif
456  return EXISTING;
457 }
458 
459 template <class key_type, class contents>
461  (const key_type &key, contents *to_store)
462 { return add(new key_type(key), to_store, false); }
463 
464 template <class key_type, class contents>
465 bool hash_table<key_type, contents>::find(const key_type &key,
466  contents * &item_found) const
467 {
468 #ifdef EXTREME_CHECKING
469  FUNCDEF("find");
470  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
471 #endif
472  item_found = NULL_POINTER;
473  // get a hash value.
474  basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
475  // make the value appropriate for our table.
476  hashed = hashed % _table->elements();
477  // see if the key exists in the bucket.
478  bucket<key_type, contents> *buck = _table->borrow(hashed);
479  if (!buck) return false;
480  int indy = buck->find(key);
481  if (basis::negative(indy)) return false; // not there.
482  // was there, so retrieve the data.
483  item_found = (*buck)[indy]._data;
484  return true;
485 }
486 
487 template <class key_type, class contents>
488 contents *hash_table<key_type, contents>::acquire(const key_type &key)
489 {
490 #ifdef EXTREME_CHECKING
491  FUNCDEF("acquire");
492  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
493 #endif
494  // get a hash value.
495  basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
496  // make the value appropriate for our table.
497  hashed = hashed % _table->elements();
498  // see if the key exists in the bucket.
499  bucket<key_type, contents> *buck = _table->borrow(hashed);
500  if (!buck) return NULL_POINTER;
501  int indy = buck->find(key);
502  if (basis::negative(indy)) return NULL_POINTER; // nope, not there.
503  contents *to_return = (*buck)[indy]._data;
504  basis::WHACK((*buck)[indy]._id); // clean the id.
505  buck->zap(indy, indy); // toss the storage blob for the item.
506 #ifdef EXTREME_CHECKING
507  if (!verify())
508  deadly_error(class_name(), func, "state did not verify afterwards.");
509 #endif
510  return to_return;
511 }
512 
513 template <class key_type, class contents>
514 bool hash_table<key_type, contents>::zap(const key_type &key)
515 {
516 #ifdef EXTREME_CHECKING
517  FUNCDEF("zap");
518  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
519 #endif
520  // get a hash value.
521  basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
522  // make the value appropriate for our table.
523  hashed = hashed % _table->elements();
524  // see if the key exists in the bucket.
525  bucket<key_type, contents> *buck = _table->borrow(hashed);
526  if (!buck) return false;
527  int indy = buck->find(key);
528  if (basis::negative(indy)) {
529  // nope, not there.
530  return false;
531  }
532  basis::WHACK((*buck)[indy]._data); // delete the data held.
533  basis::WHACK((*buck)[indy]._id); // delete the data held.
534  buck->zap(indy, indy); // toss the storage blob for the item.
535  if (!buck->length()) {
536  // clean up this bucket now.
537  buck = _table->acquire(hashed);
538  basis::WHACK(buck);
539  }
540 #ifdef EXTREME_CHECKING
541  if (!verify())
542  deadly_error(class_name(), func, "state did not verify afterwards.");
543 #endif
544  return true;
545 }
546 
547 template <class key_type, class contents>
548 void hash_table<key_type, contents>::apply(apply_function *to_apply,
549  void *data_link)
550 {
551 #ifdef EXTREME_CHECKING
552  FUNCDEF("apply");
553  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
554 #endif
555  typedef bucket<key_type, contents> buckie;
556  int bucks_seen = 0;
557  int posn = _last_iter; // start at the same place we left.
558  while (bucks_seen++ < _table->elements()) {
559  if ( (posn < 0) || (posn >= _table->elements()) )
560  posn = 0;
561  buckie *b = _table->borrow(posn);
562  _last_iter = posn++;
563  // record where the iteration last touched and increment next position.
564  // we must do this before we check if the bucket exists or we'll rotate
565  // on this same place forever.
566  if (!b) continue; // nothing here, keep going.
567  for (int j = 0; j < b->length(); j++) {
568  contents *c = b->use(j)._data;
569  if (!c) {
570 #ifdef EXTREME_CHECKING
571  deadly_error("hash_table", "apply", "logic error--missing pointer");
572 #endif
573  continue;
574  }
575  if (!to_apply(*b->use(j)._id, *c, data_link)) return;
576  }
577  }
578 }
579 
580 template <class key_type, class contents>
582 {
583 #ifdef EXTREME_CHECKING
584  FUNCDEF("elements");
585  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
586 #endif
587  typedef bucket<key_type, contents> buckie;
588  int to_return = 0;
589  for (int i = 0; i < _table->elements(); i++) {
590  bucket<key_type, contents> *buck = _table->borrow(i);
591  if (!buck) continue; // nothing to count.
592  to_return += buck->length();
593  }
594  return to_return;
595 }
596 
597 #undef static_class_name
598 
599 } //namespace.
600 
601 #endif // outer guard.
602 
Represents a sequential, ordered, contiguous collection of objects.
Definition: array.h:54
@ EXPONE
synonym for EXPONENTIAL_GROWTH.
Definition: array.h:61
@ SIMPLE_COPY
the contents can be memcpy'd and are not deep.
Definition: array.h:59
@ FLUSH_INVISIBLE
blanks out allocated but inaccessible elements.
Definition: array.h:62
outcome put(int index, const contents &to_put)
Stores an object at the index "index" in the array.
Definition: array.h:834
const hash_wrapper< key_type, contents > & get(int index) const
Accesses individual objects stored in "this" at the "index" position.
Definition: array.h:372
int length() const
Returns the current reported length of the allocated C array.
Definition: array.h:115
outcome zap(int start, int end)
Deletes from "this" the objects inclusively between "start" and "end".
Definition: array.h:769
contents & use(int index)
A non-constant version of get(); the returned object can be modified.
Definition: array.h:365
array(int number=0, const hash_wrapper< key_type, contents > *init=NULL_POINTER, int flags=EXPONENTIAL_GROWTH|FLUSH_INVISIBLE)
Constructs an array with room for "number" objects.
Definition: array.h:315
A clonable object knows how to make copy of itself.
Definition: contracts.h:109
Root object for any class that knows its own name.
Definition: contracts.h:123
Outcomes describe the state of completion for an operation.
Definition: outcome.h:31
int find(const key_type &to_find)
Definition: hash_table.h:239
Implements hashing into buckets for quick object access.
Definition: hash_table.h:75
bool apply_function(const key_type &key, contents &current, void *data_link)
the "apply_function" is what a user of the "apply" method must supply.
Definition: hash_table.h:147
int elements() const
the number of valid items we found by traversing the hash table.
Definition: hash_table.h:581
basis::outcome add(key_type *key, contents *to_store, bool check_dupes=true)
specialized add for a pre-existing pointer "key".
Definition: hash_table.h:415
basis::outcome add(const key_type &key, contents *to_store)
Stores "to_store" into the table given its "key" for hashing.
Definition: hash_table.h:410
internal_hash_array< key_type, contents > & table_access() const
special accessor for the copy_hash_table method only.
Definition: hash_table.h:368
bool verify() const
returns true if the hash table is internally consistent.
Definition: hash_table.h:346
DEFINE_CLASS_NAME("hash_table")
static int calculate_num_slots(int estimated_elements)
a helper method that lets us know what n is for how many 2^n slots we should have.
Definition: hash_table.h:285
void apply(apply_function *to_apply, void *data_link)
Invokes the function "to_apply" on every entry in the table.
Definition: hash_table.h:548
void rehash(int estimated_elements)
resizes the hashing structures to optimise for a new size of "estimated_elements".
Definition: hash_table.h:372
contents * acquire(const key_type &key)
retrieves the contents held for "key" out of the table.
Definition: hash_table.h:488
void reset()
removes all entries in the table and returns it to a pristine state.
Definition: hash_table.h:321
bool find(const key_type &key, contents *&item_found) const
locates the item specified by the "key", if possible.
Definition: hash_table.h:465
basis::outcome fast_dangerous_add(const key_type &key, contents *to_store)
Like the add method above, but doesn't check for duplicates.
Definition: hash_table.h:461
hash_table(const hashing_algorithm &hasher, int estimated_elements)
Creates a table using the "hasher" that is ready to store "estimated_elements".
Definition: hash_table.h:265
virtual ~hash_table()
destroys any objects left in the hash_table.
Definition: hash_table.h:273
int estimated_elements() const
returns the size of table we're optimized for.
Definition: hash_table.h:106
contents * find(const key_type &key) const
simplified form of above find() method.
Definition: hash_table.h:130
bool zap(const key_type &key)
removes the entry with the "key" specified.
Definition: hash_table.h:514
hash_wrapper(key_type *id=NULL_POINTER, contents *data=NULL_POINTER)
Definition: hash_table.h:223
A hashing algorithm takes a key and derives a related integer from it.
Definition: hash_table.h:46
virtual hashing_algorithm * clone() const =0
supports virtual construction of new algorithm objects.
virtual basis::un_int hash(const void *key_data, int key_length) const =0
returns a hash value based on the "key_data" and "key_length".
internal_hash_array(int estimated_elements)
Definition: hash_table.h:257
#define deadly_error(c, f, i)
#define NULL_POINTER
The value representing a pointer to nothing.
Definition: definitions.h:32
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:54
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
void WHACK(contents *&ptr)
deletion with clearing of the pointer.
Definition: functions.h:121
unsigned int un_int
Abbreviated name for unsigned integers.
Definition: definitions.h:62
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition: functions.h:43
A dynamic container class that holds any kind of object via pointers.
Definition: amorph.h:55
void copy_hash_table(hash_table< key_type, contents > &target, const hash_table< key_type, contents > &source)
Copies the entire hash table, given a contents type that supports copying.
Definition: hash_table.h:297
const int HASH_FUDGE_FACTOR
Definition: hash_table.h:30