基于Redis的BloomFilter实现

前言最近在研究布隆过滤器（如果不了解什么是布隆过滤器的，推荐看如何判断一个元素在亿级数据中是否存在？），发现Guava提供了封装好的类，但是只能单机使用，一般现在的应用都是部署在分布式系统的，所以想找个可以在分布式系统下使用的布隆过滤器，找了半天只找到一个基于redis开发的模块项目ReBloom，但是这个是需要额外安装的，而且文档里只说了怎么在docker下运行，没研究过docker所以放弃了。后来找到一篇博客讲怎么利用布隆过滤器统计消息未读数的（博客地址不记得了，是一位淘宝同学写的），博客最后放了一份整合redis和bloomFilter的代码demo，详见BloomFilter.java，看了下实现比较简单，但是使用方式不是我想要的，所以参考着自己整理了一份。BloomFilterHelperpackage com.doodl6.springmvc.service.cache.redis;import com.google.common.base.Preconditions;import com.google.common.hash.Funnel;import com.google.common.hash.Hashing;public class BloomFilterHelper<T> { private int numHashFunctions; private int bitSize; private Funnel<T> funnel; public BloomFilterHelper(Funnel<T> funnel, int expectedInsertions, double fpp) { Preconditions.checkArgument(funnel != null, “funnel不能为空”); this.funnel = funnel; bitSize = optimalNumOfBits(expectedInsertions, fpp); numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, bitSize); } int[] murmurHashOffset(T value) { int[] offset = new int[numHashFunctions]; long hash64 = Hashing.murmur3_128().hashObject(value, funnel).asLong(); int hash1 = (int) hash64; int hash2 = (int) (hash64 >>> 32); for (int i = 1; i <= numHashFunctions; i++) { int nextHash = hash1 + i * hash2; if (nextHash < 0) { nextHash = ~nextHash; } offset[i - 1] = nextHash % bitSize; } return offset; } /** * 计算bit数组长度 / private int optimalNumOfBits(long n, double p) { if (p == 0) { p = Double.MIN_VALUE; } return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2))); } /* * 计算hash方法执行次数 */ private int optimalNumOfHashFunctions(long n, long m) { return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); }}BloomFilterHelper做的事情很简单，其实大部分代码都是来源于Guava库里面的