哈希碰撞

假设我们的哈希函数实现的足够好，每个元素的哈希值都是独立均匀随机的在值域 \(d\) 内撒一个点，那么 \(n\) 个元素哈希后碰撞的概率可以写为：

\[ 1-\frac{\binom{d}{n}}{d^n} \]

其在 \(d=10^9,\ n=10^6\) 时非常接近于 \(1\)，因此不建议使用模数 \(10^9\) 级别的单模哈希。

其在 \(d=10^{18},\ n=10^6\) 时非常接近于 \(0\)，因此可以使用模数 \(10^{18}\) 级别（\(2^{61}-1\)）的单模哈希。

这里提供一个计算随机撒点碰撞概率的程序。

#include<iostream>
#include<random>
#define int long long
#define ld long double
using namespace std;

// 就是下降幂除以普通幂
ld calc1(int n, int d) {
    ld res = 1;
    int j = 0;
    for(int i = d - n + 1; i <= d; i++) {
        res = res * i;
        while(j < n && res >= 1) { res /= d; ++j; }
    }
    while(j < n) { res /= d; ++j; }
    return res;
}

// example
// 在值域 1e18 内撒 1e6 个点
int n = 1e6;
int d = 1e18;

signed main() {

    // cin >> n >> d;
    // 不碰撞的概率
    cout << (1.0l - calc1(n, d)) << endl;

    return 0;
}