在PHP中,对海量数据进行基数统计通常可以使用布隆过滤器(Bloom Filter)或者Count-Min Sketch算法。以下是使用Count-Min Sketch算法的一个简单示例:
class CountMinSketch {private $rows;private $columns;private $values;public function __construct($rows, $columns) {$this->rows = $rows;$rows = $rows + 1;$this->columns = $columns;$this->values = array_fill(0, $rows, array_fill(0, $columns, 0));}public function increment($item, $count) {$hashes = $this->generateHashes($item);foreach ($hashes as $hash) {$row = $hash % $this->rows;$column = ($hash >> $this->rows) % $this->columns;if ($this->values[$row][$column] > $count) {$this->values[$row][$column] = $count;}}}public function estimate($item) {$min = PHP_INT_MAX;$hashes = $this->generateHashes($item);foreach ($hashes as $hash) {$row = $hash % $this->rows;$column = ($hash >> $this->rows) % $this->columns;$min = min($min, $this->values[$row][$column]);}return $min;}private function generateHashes($item) {$hashes = array(hash("fnv1a32", $item) // FNV-1a 32-bit hash);// For better estimation, you can add more hash functions// e.g., MD5, SHA1, or a custom hash function// $hashes[] = hash("md5", $item);// $hashes[] = hash("sha1", $item);return $hashes;}
}// 使用示例
$sketch = new CountMinSketch(1024, 2048); // 调整行和列的大小
$sketch->increment("item1", 1);
$sketch->increment("item2", 2);echo "Estimated count for item1: " . $sketch->estimate("item1") . "\n";
echo "Estimated count for item2: " . $sketch->estimate("item2") . "\n";