algorithm Murmur3_32 is // Note: In this version, all arithmetic is performed with unsigned 32-bit integers. // In the case of overflow, the result is reduced modulo 232. input: key, len, seed
c1 ← 0xcc9e2d51 c2 ← 0x1b873593 r1 ← 15 r2 ← 13 m ← 5 n ← 0xe6546b64
hash ← seed
// 按照 chunk(4byte)读取数据 for each fourByteChunk of key do k ← fourByteChunk
k ← k × c1 k ← k ROL r1 // 这里的 ROL 指循环左移(汇编指令),将输入的无符号数循环左移N位 k ← k × c2
hash ← hash XOR k hash ← hash ROL r2 hash ← (hash × m) + n
withany remainingBytesInKey do remainingBytes ← SwapToLittleEndian(remainingBytesInKey) // Note: Endian swapping is only necessary on big-endian machines. // The purpose is to place the meaningful digits towards the low end of the value, // so that these digits have the greatest potential to affect the low range digits // in the subsequent multiplication. Consider that locating the meaningful digits // in the high range would produce a greater effect upon the high digits of the // multiplication, and notably, that such high digits are likely to be discarded // by the modulo arithmetic under overflow. We don't want that. remainingBytes ← remainingBytes × c1 remainingBytes ← remainingBytes ROL r1 remainingBytes ← remainingBytes × c2 hash ← hash XOR remainingBytes hash ← hash XOR len hash ← hash XOR (hash >> 16) hash ← hash × 0x85ebca6b hash ← hash XOR (hash >> 13) hash ← hash × 0xc2b2ae35 hash ← hash XOR (hash >> 16)
/** * K mix * @param k * @return */ privatestaticintmixK(int k) { k *= C1; k = Integer.rotateLeft(k, 15); k *= C2; return k; }
/** * h mix * @param h * @param k * @return */ privatestaticintmixHash(int h, int k) { h ^= k; h = Integer.rotateLeft(h, 13); h = h * 5 + 0xe6546b64; return h; }
/** * byte 转 Int * @param value * @return */ privatestaticinttoInt(byte value){ return value & 0xff; }
// Finalization mix - force all bits of a hash block to avalanche privatestaticintfmix(int hash, int length) { hash ^= length; hash ^= hash >>> 16; hash *= 0x85ebca6b; hash ^= hash >>> 13; hash *= 0xc2b2ae35; hash ^= hash >>> 16; return hash; } // --------------------------- 入口函数 --------------------------- /** * Murmur3_32的Hash求值函数 * @param seed seed 种子(无符号数) * @param key 待求hash的key值(二进制数组) * @param offset start offset of the array to hash * @param len length length of the array to hash * @return */ publicstaticintmurmurhash3_x86_32(byte[] key,int offset, int len,int seed){ // 使用种子作为初始值 int hash= seed; int i; // 每次读4个byte for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) { int k= mixK(getIntLittleEndian(key,offset+i)); hash = mixHash(hash, k); } // 当没有读净buffer时,处理尾部 intk1=0; for (intshift=0; i < len; i++, shift += 8) { k1 ^= toInt(key[offset + i]) << shift; } hash ^= mixK(k1); return fmix(hash, len); } // --------------------------- 包装方法 --------------------------- /** * 字符串基于Murmur3_32的Hash求值函数 * @param seed 种子(无符号数) * @param key 待求hash的key值 * @param charset key的字符集 * @return 32位Hash值 */ publicstaticintmurmurhash3_x86_32_Str(int seed, String key, Charset charset){ byte[] data = key.getBytes(charset); return murmurhash3_x86_32(data,0,data.length,seed); } }