Bloom Filter Script#
Complete Python script demonstrating the Creating/Evaluating bloom filter (view as a GitHub Gist):
#!/usr/bin/env python
from math import log
from bitarray import bitarray # from pypi.python.org/pypi/bitarray
import pyhash # from https://github.com/flier/pyfasthash
# Based on BIP-37
# https://github.com/QuantumExplorer/bips/blob/master/bip-0037.mediawiki
# Defined in bloom.h
# https://github.com/dashpay/dash/blob/master/src/bloom.h#L19-#L20
MAX_BLOOM_FILTER_SIZE = 36000
MAX_HASH_FUNCS = 50
# Set of flags that control how matched items are added to the filter (per BIP-37)
# https://github.com/dashpay/dash/blob/master/src/bloom.h#L26
nFlags = 0
nElements = 1 # Number of elements
nFPRate = 0.0001 # False positive rate
nFilterBytes = int(min((-1 / log(2)**2 * nElements * log(nFPRate)) / 8, MAX_BLOOM_FILTER_SIZE))
# Calculate the number of hash functions to use in the filter
# Limit the maximum number to 50 per BIP-37
nHashFuncs = int(min(nFilterBytes * 8 / nElements * log(2), MAX_HASH_FUNCS))
murmur3 = pyhash.murmur3_32()
TEST_TXID = "019f5b01d4195ecbc9398fbf3c3b1fa9bb3183301d7a1fb3bd174fcfa40a2b65"
def bloom_hash(nHashNum, data):
seed = (nHashNum * 0xfba4c795 + nTweak) & 0xffffffff
return( murmur3(data, seed=seed) % (nFilterBytes * 8) )
# Bloom Filter creation
def create_filter(nTweak):
print('Creating bloom filter')
vData = nFilterBytes * 8 * bitarray('0', endian="little")
data_to_hash = TEST_TXID
data_to_hash = data_to_hash.decode("hex")
print('Filter bytes: {}; Hash functions: {}'.format(nFilterBytes, nHashFuncs))
print(" Filter (As Bits)")
print("nHashNum nIndex Filter 0123456789abcdef")
print("~~~~~~~~ ~~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~")
for nHashNum in range(nHashFuncs):
nIndex = bloom_hash(nHashNum, data_to_hash)
## Set the bit at nIndex to 1
vData[nIndex] = True
## Debug: print current state
print(' {0:2} {1:2} {2} {3}'.format(
nHashNum,
hex(int(nIndex)),
vData.tobytes().encode("hex"),
vData.to01()
))
print('Bloom filter: {}\n'.format(vData.tobytes().encode("hex")))
# Bloom Filter evaluation
def evaluate_filter():
print('Evaluating bloom filter')
vData = bitarray(endian='little')
vData.frombytes("b50f".decode("hex"))
nHashFuncs = 11
nTweak = 0
nFlags = 0
def contains(nHashFuncs, data_to_hash):
for nHashNum in range(nHashFuncs):
## bloom_hash as defined in previous section
nIndex = bloom_hash(nHashNum, data_to_hash)
if vData[nIndex] != True:
print("MATCH FAILURE: Index {0} not set in {1}\n".format(
hex(int(nIndex)),
vData.to01()
))
return False
print("MATCH SUCCESS\n")
## Test 1: Same TXID as previously added to filter
data_to_hash = TEST_TXID
print('\nChecking: {}'.format(data_to_hash))
data_to_hash = data_to_hash.decode("hex")
contains(nHashFuncs, data_to_hash)
## Test 2: Arbitrary string
data_to_hash = "1/10,000 chance this ASCII string will match"
print('Checking: {}'.format(data_to_hash))
contains(nHashFuncs, data_to_hash)
# Tweak is a random value added to the seed value in the hash function
# used by the bloom filter
nTweak = 0
create_filter(nTweak)
evaluate_filter()