top

COMP 202: Principles of Object-Oriented Programming II

← Dictionaries & Hashing →

Home — Fall 2008

Dictionary

A major theme in computing is the theme of storage/retrieval/removal: store data somewhere so that it can later be retrieved and discarded if no longer needed, all of this in the most efficient manner. The abstraction of these computing activities is embodied in the notion of what is called a dictionary, expressed in Java as an interface as follows.

package dict;

import java.lang.*;

/**
 * Represents a (key, value) pair stored in a dictionary,
 * where key is a Comparable.
 */
public class DictionaryPair implements Comparable {

private Comparable _key;
    private Object _value;

/**
     * Initializes this DictionaryPair to a given (key, value) pair.
     */
    public DictionaryPair(Comparable key, Object value) {
        _key   = key;
        _value = value;
    }

/**
     * Compares the key of this DictionaryPair against the key of the
     * other DictionayPair.
     * @param other a DictionaryPair
     */
    public int compareTo(Object other) {
        return _key.compareTo(((DictionaryPair)other)._key);
    }
    
    /**
     * Returns the key of this DictionaryPair.
     */
    public Comparable getKey() {
        return _key;
    }
    
    /**
     * Returns the value of this DictionaryPair.
     */
    public Object getValue() {
        return _value;
    }
    
    /**
     * Shows "(", followed by the String representation of the key, followed by
     * a ",", followed by the String representation of the associcated value,
     * followed by a ")".
     */
    public String toString() {
        return "(" + _key + "," + _value + ")";
    }
}

package dict;

import java.lang.*;
import listFW.*;

/*
 * Defines an interface for a simple dictionary.
 */
public interface IDictionary {
    /**
     * Clears the contents of the dictionary leaving it empty.
     */
    public void clear();

/**
     * Returns true if the dictionary is empty and false otherwise.
     * Non OO!
     * How can we eliminate this check
     */
    public boolean isEmpty();

/**
     * Returns true if the dictionary is full and false otherwise.
     */
    public boolean isFull();

/**
     * Returns an IList of DictionaryPairs corresponding to the entire
     * contents of the dictionary.
     * @param lf a factory to manufacture IList objects.
     */
    public IList elements(IListFactory lf);

/**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value. 
     */
    public void insert(Comparable key, Object value);

/**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     */
    public DictionaryPair remove(Comparable key);
}

IDictionay is an example what we call an unrestricted access container (as opposed to restricted access container). A simple way to implement IDictionary is to use an LRStruct.

package dict;

import jav.lang.*;
import lrs.*;
import listFW.*;
import listFW.factory.*;

/**
 * An implementation of IDictionary using an LRStruct to hold the
 * DictionaryPairs. 
 */
public class DictLRS implements IDictionary {
    
    /**
     * Visitor to check for emptiness.
     * Need only one for all DictLRS.
     * Non OO!
     */
    private static IAlgo IsEmpty = new IAlgo() {
        public Object emptyCase(LRStruct host, Object input) {
            return Boolean.TRUE;
        }
        
        public Object nonEmptyCase(LRStruct host, Object input) {
            return Boolean.FALSE;
        }
    };
    
    /*
     * A list of DictionaryPairs ordered by key
     */
    private LRStruct _lrs = new LRStruct();
    
    /**
     * Clears the contents of the dictionary leaving it empty.
     *
     * Implemented by replacing the existing LRStruct with a new,
     * empty one.
     */
    public void clear() {
        _lrs = new LRStruct();
    }
    
    /**
     * Returns true if the dictionary is empty and false otherwise.
     *
     * Implemented as a visitor to LRStruct.
     */
    public boolean isEmpty() {
        return ((Boolean)_lrs.execute(IsEmpty, null)).booleanValue();
    }
    
    /**
     * Returns false always.
     */
    public boolean isFull() {
        return false;
    }

/**
     * Returns an IList of DictionaryPairs corresponding to the entire
     * contents of the dictionary.
     *
     * Implemented as a visitor to LRStruct.
     */
    public IList elements(final IListFactory lf) {        
        return (IList)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return lf.makeEmptyList();
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                return lf.makeNEList(host.getFirst(),
                                     (IList)host.getRest().execute(this, input));
            }
        }, null);
    }
    
    /**
     * Returns the DictionaryPair with the given key.  If there is not
     * a DictionaryPair with the given key, returns null. 
     *
     * Returns a DictionaryPair rather than the value alone so that
     * the user can distinguish between not finding the key and
     * finding the pair (key, null). 
     *
     * Implemented as a visitor to LRStruct.
     */
    public DictionaryPair lookup(Comparable key) {
        return (DictionaryPair)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return null;
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.getKey().compareTo(input);
                
                if (result > 0)  // host > input
                    return null;
                else if (result == 0) // host == input
                    return first;
                else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, key);
    }
    
    /**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value. 
     *
     * Implemented as a visitor to LRStruct that inserts the key and
     * value in order.
     */
    public void insert(Comparable key, Object value) {
        _lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return host.insertFront(input);
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.compareTo(input);
                
                if (result > 0)  // host > input
                    return host.insertFront(input);
                else if (result == 0) // key == input
                    return host.setFirst(input);
                else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, new DictionaryPair(key, value));
    }
    
    /**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     *
     * Implemented as a visitor to LRStruct.
     */
    public DictionaryPair remove(Comparable key) {
        return (DictionaryPair)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return null;
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.getKey().compareTo(input);
                
                if (result > 0)  // host > input
                    return null;
                else if (result == 0) // host == input
                    return first;
                else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, key);
    }
    
    /**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value. 
     *
     * Implemented as a visitor to LRStruct that inserts the key and
     * value in order.
     */
    public void insert(Comparable key, Object value) {
        _lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return host.insertFront(input);
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.compareTo(input);
                
                if (result > 0)  // host > input
                    return host.insertFront(input);
                else if (result == 0) // key == input
                    return host.setFirst(input);
                else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, new DictionaryPair(key, value));
    }
    
    /**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     *
     * Implemented as a visitor to LRStruct.
     */
    public DictionaryPair remove(Comparable key) {
        return (DictionaryPair)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return null;
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.getKey().compareTo(input);
                
                if (result > 0)  // host > input
                    return null;
                else if (result == 0) { // host == input
                    host.removeFront();
                    return first;
                } else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, key);
    }
    
    /**
     * Delegates the conversion to the LRStruct toString().
     */
    public String toString() {
        return _lrs.toString();
    }
}

The problem with such an implementation is that each of the operations, insert, lookup and remove takes O(N) time, where N is the total number of elements in the dictionary. Using a self-balanced tree will guarantee O(logN) time.

Can we do better than that? The answer is yes and no. With an data structure called "hash table" coupled with an appropriate "hash function", we can achieve an amortized performance of O(1), that is constant time!

Hash Tables and Hash Functions

Hash Tables

A hash table is a generalization of an ordinary array.
When the number of keys actually stored is small relative to the total number of possible keys, hash tables become an effective alternative to directly addressing an array, since a hash table typically uses an array of size proportional to the number of keys actually stored.
Instead of using the key as an array index directly, the array index is computed from the key.
With hashing, an element with key k is stored in slot h(k); i.e., a hash function h is used to compute the slot from the key k.
h maps the set U of keys into the slots of a hash table T[0..m-1]:

The Problem: Collisions

Two keys may hash to the same slot. This is called a collision. Because |U|>m, collisions are unavoidable.
To avoid collisions, h should appear ``random'', i.e., adjacent keys should not hash to adjacent slots.
To cope with collisions, the simplest method is chaining.

Chaining

In chaining, we put all the elements that hash to the same slot in a linked list, i.e., slot j contains a reference to the head of the list of all stored elements that hash to j; if there are no such elements, slot j contains an empty list.
To insert an element, we simply put it at the front of the list. So, the worst case running time is O(1).
To lookup an element, we search the list belonging to the slot for the corresponding key. So, the worst case running time is proportional to the length of the list.
Removal is identical to lookup.

Performance

Given a hash table with m slots that stores n elements, we define the load factor alpha as n/m, i.e., the average number of elements in a chain.
The worst case behavior of hashing with chaining is O(n): All n keys hash to the same slot, creating a list of length n.
The expected case behavior depends on how well the hash function distributes the set of keys to be stored among the m slots, on average. We will assume that

any given element is equally likely to hash into any of the m slots and
the hash value can be computed in O(1) time. Then the expected case search time is O(1+alpha).

If the number of hash table slots is at least proportional to the number of elements in the table, we have n=O(m) and consequently, alpha = n/m = O(m)/m = O(1). Thus, searching takes constant time on average.

Hash Table Implementations

The Java Collection Framework provided by Java SDK, includes an "industrial strength" implementation of the hash table data structure called HashTable.
Below is our own implementation of hash tables. One of the main differences between the Sun's version and ours is the our elements() method returns an IList, while the Sun's hash table returns an Enumeration. By returning an IList, we can process our hash table using visitors, while with an Enumeration, one will have to write loops to do the processing. The choice is yours!

package dict;

import java.lang.*;
import lrs.*;
import listFW.*;

/**
 * An IDictionary implemented using a hash table.  Collisions are handled
 * using chaining.  The chains are implemented using DictLRS.
 *
 * Uses the method hashCode() defined by class Object as the hash
 * function.  Any class may override this method with a new
 * implementation.
 *
 * @author Alan L. Cox
 * @since 03/28/03
 */
public class DictHash implements IDictionary {
    /*
     * Initialize _table to reference a single-element array of
     * IDictionary, containing in its single element a reference to an
     * empty DictLRS.
     */
    private IDictionary[] _table = { new DictLRS() };
    private int _tableOccupancy = 0;

/*
     * An IList factory used to linearalize each internal DictLRS and
     * resize the _table array.
     */
    private IListFactory _lf;
    
     /*
     * An upper bound on the load factor.
     */    
    private double _loadFactor;

public DictHash(IListFactory lf, double loadFactor) {
        _lf = lf;
        _loadFactor = loadFactor;
    }

/**
     * Clears the contents of the dictionary leaving it empty.
     *
     * Implemented by replacing the existing LRStruct with a new,
     * empty one.
     */
    public void clear() {
        _table = new DictLRS[1];
        _table[0] = new DictLRS();
        _tableOccupancy = 0;
    }

/**
     * Returns true if the dictionary is empty and false otherwise.
     */
    public boolean isEmpty() {
        return _tableOccupancy == 0;
    }

/**
     * Returns an IList of DictionaryPairs corresponding to the entire
     * contents of the dictionary.
     *
     * Note that the elements are not in order.
     */
    public IList elements(IListFactory lf) {
        IList l = lf.makeEmptyList();

for (int i = 0; i < _table.length; i++)
            l = (AList)_table[i].elements(lf).execute(new IListAlgo() {
                public Object emptyCase(IEmptyList host, Object input) {
                    return input;
                }

public Object nonEmptyCase(INEList host, Object input) {
                    return lf.makeNEList(host.getFirst(),
                                         (IList)host.getRest().execute(this,
                                                                       input));
                }
            }, l);

return l;
    }

/**
     * Returns the DictionaryPair with the given key.  If there is not
     * a DictionaryPair with the given key, returns null.
     *
     * Returns a DictionaryPair rather than the value alone so that
     * the user can distinguish between not finding the key and
     * finding the pair (key, null).
     *
     * This method is O(1) in the expected case and O(n) in the worst
     * case.
     *
     * @param key the key to lookup
     * @return the DictionaryPair found
     */
    public DictionaryPair lookup(Comparable key) {
        int index = key.hashCode() % _table.length;

return _table[index].lookup(key);
    }

/**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value.
     *
     * This method is O(1) in both the expected case and the worst
     * case if we amortize the cost of doubling the hash table over
     * subsequent insert()'s.
     *
     * @param key the key to insert
     * @param value the value to insert
     */
    public void insert(Comparable key, Object value) {
        if (_tableOccupancy >= (_loadFactor * _table.length)) {
            int i;

final IDictionary newTable[] = new IDictionary[2*_table.length];

for (i = 0; i < newTable.length; i++)
                newTable[i] = new DictLRS();

for (i = 0; i < _table.length; i++) {
                _table[i].elements(_lf).execute(new IListAlgo() {
                    public Object emptyCase(AList host, Object input) {
                        return null;
                    }

public Object nonEmptyCase(AList host, Object input) {
                        DictionaryPair pair = (DictionaryPair) host.getFirst();
                        int index = pair.getKey().hashCode() % newTable.length;

newTable[index].insert(pair.getKey(), pair.getValue());

return host.getRest().execute(this, input);
                    }
                }, null);
            }
            _table = newTable;
        }
        int index = key.hashCode() % _table.length;

_tableOccupancy++;
        _table[index].insert(key, value);
    }

/**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     *
     * This method is O(1) in the expected case and O(n) in the worst
     * case.
     *
     * @param key the key to remove
     * @return the DictionaryPair removed
     */
    public DictionaryPair remove(Comparable key) {
        int index = key.hashCode() % _table.length;

DictionaryPair pair = _table[index].remove(key);

if (pair != null)
            _tableOccupancy--;

return pair;
    }

/**
     * Returns a string representing the contents of the dictionary.
     */
    public String toString() {
        return elements(_lf).toString();
    }
}

← Dictionaries & Hashing →