Hash Tables
Hash tables provide O(1) average-case insertion, deletion, and lookup. They are the most used data structure after arrays — Python dicts, JavaScript objects, and PHP arrays are all hash tables under the hood.
How Hash Tables Work
class SimpleHashTable:
def __init__(self, size=10):
self.size = size
self.buckets = [[] for _ in range(size)] # list of buckets
def _hash(self, key: str) -> int:
# Simple hash function: sum of ASCII values mod table size
return sum(ord(c) for c in key) % self.size
def set(self, key: str, value) -> None:
idx = self._hash(key)
bucket = self.buckets[idx]
for i, (k, v) in enumerate(bucket):
if k == key:
bucket[i] = (key, value) # update existing
return
bucket.append((key, value)) # insert new
def get(self, key: str):
idx = self._hash(key)
for k, v in self.buckets[idx]:
if k == key:
return v
raise KeyError(key)
def delete(self, key: str) -> None:
idx = self._hash(key)
self.buckets[idx] = [(k,v) for k,v in self.buckets[idx] if k != key]
ht = SimpleHashTable()
ht.set('name', 'Rahul')
ht.set('age', 25)
print(ht.get('name')) # Rahul
Collision Resolution — Chaining vs Open Addressing
# Chaining (used above): store list at each bucket index
# Pro: simple, handles many collisions
# Con: cache-unfriendly (linked list traversal)
# Open Addressing — Linear Probing
class LinearProbing:
def __init__(self, size=10):
self.size = size
self.keys = [None] * size
self.values = [None] * size
self.deleted = object() # sentinel
def _hash(self, key):
return hash(key) % self.size
def set(self, key, value):
idx = self._hash(key)
while self.keys[idx] is not None and self.keys[idx] != key:
idx = (idx + 1) % self.size # probe next slot
self.keys[idx] = key
self.values[idx] = value
def get(self, key):
idx = self._hash(key)
while self.keys[idx] is not None:
if self.keys[idx] == key:
return self.values[idx]
idx = (idx + 1) % self.size
raise KeyError(key)
Common Hash Table Interview Problems
# 1. Two Sum — O(n) using dict
def two_sum(nums, target):
seen = {}
for i, n in enumerate(nums):
complement = target - n
if complement in seen:
return [seen[complement], i]
seen[n] = i
return []
print(two_sum([2,7,11,15], 9)) # [0, 1]
# 2. Group Anagrams
def group_anagrams(strs):
groups = {}
for s in strs:
key = tuple(sorted(s)) # sort letters as key
groups.setdefault(key, []).append(s)
return list(groups.values())
print(group_anagrams(['eat','tea','tan','ate','nat','bat']))
# [['eat','tea','ate'], ['tan','nat'], ['bat']]
# 3. First Non-Repeating Character
def first_unique(s):
count = {}
for c in s: count[c] = count.get(c, 0) + 1
for i, c in enumerate(s):
if count[c] == 1: return i
return -1
print(first_unique('aabbcde')) # 4 (index of 'c')
Q: What is a hash collision and how is it handled?
A collision occurs when two different keys produce the same hash index. Chaining stores all colliding pairs in a linked list at that index. Open addressing finds the next empty slot by probing (linear, quadratic, or double hashing). Python uses open addressing with a form of pseudo-random probing.
Comments (0)
No comments yet. Be the first!
Leave a Comment