散列表
优点: 插入和查找操作的时间复杂度都是O(1)
缺点: 性能的保证来自散列函数的质量,是无序符号表,不支持有序性相关操作,是以空间换取时间的数据结构.
1. 基于拉链法的散列表
概述: 通过链表方式处理碰撞冲突,将碰撞冲突的键放到链表中
//基于拉链法的散列表
public class SeparateChainingHashST<K,V> {
private static final int INIT_CAPACITY = 1 << 4; //初始容量,第一次put时分配
private int n; //键值对数量
private Node<K,V>[] table; //散列表
private class Node<K,V>{
final int hash;
K key;
V value;
Node next;
public Node(int hash,K key, V value, Node next) {
this.hash = hash;
this.key = key;
this.value = value;
this.next = next;
}
public K getKey() { return key; }
public V getValue() { return value; }
public void setValue(V value) { this.value = value; }
}
public V get(K key){
if(key == null) return null;
Node<K,V> p;
int i = hash(key) & (table.length-1); //计算索引
if((p = table[i]) == null) return null;
do{
if(key.equals(p.key)) return p.value;
} while((p = p.next) != null);
return null;
}
public void put(K key,V value){
if(key == null) return;
if(table == null || size() > table.length*0.75) resize(); //自动扩充容量
Node<K,V> p,pre = null;
int hash = hash(key), i = hash & (table.length-1);
if((p = table[i]) == null) { //索引位置为空则直接添加
table[i] = new Node<>(hash, key, value, null);
++n;
}
else{ //不为空则添加到链表末尾
do{
if(key.equals(p.key)) {
p.value = value;
return;
}
pre = p;
} while((p = p.next) != null);
pre.next = new Node<>(hash,key,value,null);
++n;
}
}
public void delete(K key){
if(key == null) return;
int i = hash(key) & (table.length-1); //计算索引
Node<K,V> p, pre = null;
if((p = table[i]) == null) return;
do{
if(key.equals(p.key)){
if(pre != null) pre.next = p.next; //非链表头部删除
else table[i] = p.next; //链表头删除
--n;
return;
}
pre = p;
} while((p = p.next) != null);
}
public Iterable<K> keys(){
Queue<K> queue = new ArrayDeque<>();
for(int i = 0; i< table.length; ++i)
for(Node<K,V> x = table[i]; x != null; x = x.next)
queue.offer(x.key);
return queue;
}
public int size(){ return n; }
public boolean isEmpty(){ return size() == 0; }
public boolean contains(K key){ return get(key) != null; }
public int capacity(){ return table == null ? INIT_CAPACITY : table.length; }
/**
* 常规除留余数法计算索引: (key.hashCode() & 0x7fffffff) % M,
* 其中M为哈希表长度,一般为质数,减少碰撞冲突
* 优化方式:
* 1. 当 M = 2^n(n为自然数)时,a % M = a & (M - 1)
* 因此可使用与运算(&)代替取余提高效率,key.hashCode() & (2^n - 1)(相当于只取出二进制的前n位二进制数)
* 2. 但是M如果为非质数,会有较严重碰撞冲突,解决办法就是对key.hashCode()的高16位与低16位
* 进行异或减少这种影响,考虑到通过异或方式的开销,综合来说异或一次即可
*/
public int hash(K key){
int h;
return key == null ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}
/**
* 自动扩容: 原来的2倍
* 原索引: 取出前n位二进制数的值, 新索引取出前n+1位二进制的值(n为原来table长度)
* 即 新索引 = 原索引 + 第n+1二进制位的实际值,
* 因为二进制位只有0,1,因此只有2种情况(0时等于原索引,1时需要加上1代表的实际值)
*/
private void resize(){
int oldCap = table == null? 0 : table.length;
int newCap = oldCap == 0? INIT_CAPACITY : oldCap << 1;
Node<K,V>[] newTab = (Node<K, V>[]) new Node[newCap]; //建立新散列表
if(table != null) {
for (int i = 0; i < oldCap; i++) { //遍历然后放入新的散列表中
Node<K,V> e = table[i];
if(e != null){
if(e.next == null) {
newTab[e.hash & (newCap - 1)] = e; //单个元素之间放入
}
else{ //如果是多个元素的链表,则分成2个链表(新索引只有2种可能)
Node loHead = null, loTail = null;
Node hiHead = null, hiTail = null;
do{
if((e.hash & oldCap) == 0){ //第n+1位二进制数为0时(n为原table长度)
if(loTail == null) loHead = e;
else loTail.next = e;
loTail = e;
}
else{ //第n+1位二进制数为1时(n为原table长度)
if(hiTail == null) hiHead = e;
else hiTail.next = e;
hiTail = e;
}
}while((e = e.next) != null);
if(loTail != null){
loTail.next = null;
newTab[i] = loHead; //新索引和原索引相同(第n+1二进制位为0,n为原table长度)
}
if(hiTail != null) {
hiTail.next = null;
newTab[i + oldCap] = hiHead; //新索引和原索引不同(第n+1二进制位为1,n为原table长度)
}
}
}
}
}
table = newTab;
}
}
1. 基于线性探测法的散列表
*优点: *
更容易进行序列化操作
键值对总数可预知时,可以创建完美哈希函数,处理效率高
缺点:
键值对数不能超过容量,当接近容量时会严重影响性能,达到一定程度必须扩容
public class LinearProbingHashST<K,V> {
private static final int INIT_CAPACITY = 1 << 4; //初始化容量
private static final int MAXIMUM_CAPACITY = 1 << 30; //最大容量
private int m; //容量
private int n; //键值对数量
private K[] keys; //使用2个数组分别保存键值对
private V[] vals;
public LinearProbingHashST(){ this(INIT_CAPACITY); }
public LinearProbingHashST(int capacity) {
int cap = this.tableSizeFor(capacity); //获取大于该值的最小2的幂
m = cap;
n = 0;
keys = (K[])new Object[m];
vals = (V[])new Object[m];
}
//获取大于该值的最小2的幂
public int tableSizeFor(int capacity) {
int n = capacity - 1;
n |= n >>> 1;
n |= n >>> 2;
n |= n >>> 4;
n |= n >>> 8;
n |= n >>> 16;
return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}
//插入
public void put(K key,V value){
if(key == null) return;
if(n >= m >>> 2) resize(m << 1); //扩大容量
int i;
for(i=hash(key);keys[i] != null;i = (i+1)%m) //遍历键簇
if(key.equals(keys[i])) {
vals[i] = value; //已存在更新值
return;
}
//新键值对插入
keys[i] = key;
vals[i] = value;
++n;
}
//获取
public V get(K key){
if(key == null) return null;
for(int i=hash(key);keys[i] != null;i = (i+1) % m) //遍历键簇
if(key.equals(keys[i])) return vals[i]; //找到返回其值
return null; //没找到
}
//删除
public void delete(K key){
if(key == null || !contains(key)) return;
int i = hash(key);
while(!key.equals(keys[i])) //遍历键簇,找到后设置为null
i = (i+1)%m;
keys[i] = null;
vals[i] = null;
//修复null位置后面的键簇(重新插入即可)
i = (i+1)%m;
while(keys[i] != null){
K tempK = keys[i];
V tempV = vals[i];
keys[i] = null;
vals[i] = null;
--n;
put(tempK,tempV);
i = (i+1)%m;
}
--n;
if(n > 0 && n < m >>> 3) resize(m >>> 2); //调整大小
}
//遍历
public Iterable<K> keys(){
Queue<K> queue = new ArrayDeque<>();
for(int i=0;i < m;++i)
if(keys[i] != null) queue.offer(keys[i]);
return queue;
}
public int size(){ return n; }
public boolean isEmpty(){ return size() == 0; }
public boolean contains(K key){ return get(key) != null; }
private int hash(K key){
int h;
return (key == null? 0 : (h = key.hashCode()) ^ (h >>> 16)) & (m-1);
}
//自动调整容量
private void resize(int capacity){
LinearProbingHashST<K,V> temp = new LinearProbingHashST<>(capacity);
for(int i=0;i<m;++i)
temp.put(keys[i],vals[i]);
keys = temp.keys;
vals = temp.vals;
m = temp.m;
}
}