Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Generated protobuf code: collapse in diffs/reviews and exclude from language stats.
internal/zetasketch/*.pb.go linguist-generated=true
18 changes: 0 additions & 18 deletions .github/workflows/lint.yml

This file was deleted.

21 changes: 1 addition & 20 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,4 @@ on:
- main
jobs:
go:
runs-on: ubuntu-latest
strategy:
matrix:
go-version: [1.26.x]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Cache dependencies
uses: actions/cache@v4
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
- name: Run tests
run: make test
uses: bsm/misc/.github/workflows/test-go.yml@main
6 changes: 1 addition & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,4 @@ module github.com/bsm/zetasketch

go 1.26.0

require (
github.com/bsm/ginkgo v1.16.5
github.com/bsm/gomega v1.27.10
google.golang.org/protobuf v1.36.11
)
require google.golang.org/protobuf v1.36.11
4 changes: 0 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
github.com/bsm/ginkgo v1.16.5 h1:uTeeWv0Yx1PnDeCk76PFyGrOMVw3D+r9bTNKNcIjDdQ=
github.com/bsm/ginkgo v1.16.5/go.mod h1:RabIZLzOCPghgHJKUqHZpqrQETA5AnF4aCSIYy5C1bk=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
Expand Down
5 changes: 2 additions & 3 deletions hll.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"

"github.com/bsm/zetasketch/hllplus"
"github.com/bsm/zetasketch/internal/zetasketch"
pb "github.com/bsm/zetasketch/internal/zetasketch"
"google.golang.org/protobuf/proto"
)
Expand Down Expand Up @@ -85,7 +84,7 @@ func (h *HLL) proto() *pb.AggregatorStateProto {
EncodingVersion: &encodingVersion,
NumValues: &numValues,
}
proto.SetExtension(msg, zetasketch.E_HyperloglogplusUniqueState, h.h.Proto())
proto.SetExtension(msg, pb.E_HyperloglogplusUniqueState, h.h.Proto())
return msg
}

Expand All @@ -100,7 +99,7 @@ func (h *HLL) fromProto(msg *pb.AggregatorStateProto) error {
return fmt.Errorf("incompatible binary message: no num values")
}

ext := proto.GetExtension(msg, zetasketch.E_HyperloglogplusUniqueState)
ext := proto.GetExtension(msg, pb.E_HyperloglogplusUniqueState)
hState, ok := ext.(*pb.HyperLogLogPlusUniqueStateProto)
if !ok {
return fmt.Errorf("incompatible binary message: invalid HyperLogLog++ state")
Expand Down
126 changes: 75 additions & 51 deletions hll_test.go
Original file line number Diff line number Diff line change
@@ -1,57 +1,81 @@
package zetasketch_test

import (
"github.com/bsm/zetasketch"
"testing"

. "github.com/bsm/ginkgo"
. "github.com/bsm/gomega"
"github.com/bsm/zetasketch"
)

var _ = Describe("HLL", func() {
var subject *zetasketch.HLL
var _ zetasketch.Aggregator = subject

BeforeEach(func() {
subject = zetasketch.NewHLL(nil)

for i := 0; i < 1_000; i++ {
subject.Add(zetasketch.Uint64Value(uint64(i)))
}
for i := 500; i < 1_000; i++ {
subject.Add(zetasketch.Uint64Value(uint64(i)))
}
})

It("should count values", func() {
Expect(subject.NumValues()).To(BeNumerically("==", 1_500))
})

It("should estimate uniques", func() {
Expect(subject.Result()).To(BeNumerically("==", 1_000))
})

It("should merge", func() {
other := zetasketch.NewHLL(nil)
for i := 800; i < 1_200; i++ {
other.Add(zetasketch.Uint64Value(uint64(i)))
}

Expect(subject.Merge(other)).To(Succeed())
Expect(subject.NumValues()).To(BeNumerically("==", 1_900))
Expect(subject.Result()).To(BeNumerically("==", 1_207))

// `other` is not modified:
Expect(other.NumValues()).To(BeNumerically("==", 400))
Expect(other.Result()).To(BeNumerically("==", 400))
})

It("should marshal/unmarshal binary", func() {
data, err := subject.MarshalBinary()
Expect(err).NotTo(HaveOccurred())

subject = new(zetasketch.HLL)
Expect(subject.UnmarshalBinary(data)).To(Succeed())
Expect(subject.NumValues()).To(BeNumerically("==", 1_500))
Expect(subject.Result()).To(BeNumerically("==", 1_000))
})
})
var _ zetasketch.Aggregator = (*zetasketch.HLL)(nil)

func newTestHLL() *zetasketch.HLL {
subject := zetasketch.NewHLL(nil)
for i := range 1_000 {
subject.Add(zetasketch.Uint64Value(uint64(i)))
}
for i := 500; i < 1_000; i++ {
subject.Add(zetasketch.Uint64Value(uint64(i)))
}
return subject
}

func TestHLL_NumValues(t *testing.T) {
subject := newTestHLL()
if got, exp := subject.NumValues(), int64(1_500); got != exp {
t.Errorf("got %d, want %d", got, exp)
}
}

func TestHLL_Result(t *testing.T) {
subject := newTestHLL()
if got, exp := subject.Result(), int64(1_000); got != exp {
t.Errorf("got %d, want %d", got, exp)
}
}

func TestHLL_Merge(t *testing.T) {
subject := newTestHLL()

other := zetasketch.NewHLL(nil)
for i := 800; i < 1_200; i++ {
other.Add(zetasketch.Uint64Value(uint64(i)))
}

if err := subject.Merge(other); err != nil {
t.Fatal(err)
}
if got, exp := subject.NumValues(), int64(1_900); got != exp {
t.Errorf("NumValues: got %d, want %d", got, exp)
}
if got, exp := subject.Result(), int64(1_207); got != exp {
t.Errorf("Result: got %d, want %d", got, exp)
}

// `other` is not modified:
if got, exp := other.NumValues(), int64(400); got != exp {
t.Errorf("other.NumValues: got %d, want %d", got, exp)
}
if got, exp := other.Result(), int64(400); got != exp {
t.Errorf("other.Result: got %d, want %d", got, exp)
}
}

func TestHLL_MarshalBinary(t *testing.T) {
subject := newTestHLL()

data, err := subject.MarshalBinary()
if err != nil {
t.Fatal(err)
}

subject = new(zetasketch.HLL)
if err := subject.UnmarshalBinary(data); err != nil {
t.Fatal(err)
}
if got, exp := subject.NumValues(), int64(1_500); got != exp {
t.Errorf("NumValues: got %d, want %d", got, exp)
}
if got, exp := subject.Result(), int64(1_000); got != exp {
t.Errorf("Result: got %d, want %d", got, exp)
}
}
4 changes: 2 additions & 2 deletions hllplus/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package hllplus

import (
"math"
"math/bits"
"sort"
)
Expand Down Expand Up @@ -748,9 +747,10 @@ func closestBiases(estimate float64, precision uint8) weightedBiases {

res := make(weightedBiases, 0, max-min)
for i := min; i < max; i++ {
d := means[i] - estimate
res = append(res, weightedBias{
Bias: biases[i],
Distance: math.Pow(means[i]-estimate, 2),
Distance: d * d,
})
}

Expand Down
45 changes: 28 additions & 17 deletions hllplus/data_test.go
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
package hllplus_test

import (
"github.com/bsm/zetasketch/hllplus"
"math"
"testing"

. "github.com/bsm/ginkgo"
. "github.com/bsm/gomega"
"github.com/bsm/zetasketch/hllplus"
)

var _ = Describe("Data", func() {
It("should estimate bias", func() {
Expect(hllplus.EstimateBias(0, 15)).To(BeNumerically("==", 0.0))
Expect(hllplus.EstimateBias(1, 15)).To(BeNumerically("==", 0.0))
Expect(hllplus.EstimateBias(10_000, 15)).To(BeNumerically("==", 0.0))
Expect(hllplus.EstimateBias(100_000, 15)).To(BeNumerically("~", 888.1, 0.1))
Expect(hllplus.EstimateBias(200_000, 15)).To(BeNumerically("==", 0.0))
func TestEstimateBias(t *testing.T) {
cases := []struct {
e float64
p uint8
exp float64
delta float64
}{
{0, 15, 0.0, 0},
{1, 15, 0.0, 0},
{10_000, 15, 0.0, 0},
{100_000, 15, 888.1, 0.1},
{200_000, 15, 0.0, 0},

Expect(hllplus.EstimateBias(50_000, 13)).To(BeNumerically("==", 0.0))
Expect(hllplus.EstimateBias(50_000, 14)).To(BeNumerically("~", 449.7, 0.1))
Expect(hllplus.EstimateBias(50_000, 15)).To(BeNumerically("~", 7820.2, 0.1))
Expect(hllplus.EstimateBias(50_000, 16)).To(BeNumerically("~", 44513.2, 0.1))
Expect(hllplus.EstimateBias(50_000, 17)).To(BeNumerically("==", 0.0))
})
})
{50_000, 13, 0.0, 0},
{50_000, 14, 449.7, 0.1},
{50_000, 15, 7820.2, 0.1},
{50_000, 16, 44513.2, 0.1},
{50_000, 17, 0.0, 0},
}
for _, tc := range cases {
got := hllplus.EstimateBias(tc.e, tc.p)
if math.Abs(got-tc.exp) > tc.delta {
t.Errorf("EstimateBias(%v, %d) = %v, want %v (±%v)", tc.e, tc.p, got, tc.exp, tc.delta)
}
}
}
5 changes: 1 addition & 4 deletions hllplus/ext_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@ func EstimateBias(e float64, p uint8) float64 {
}

func NewNormal(precision uint8) (*HLL, error) {
pp := precision + 5
if pp > MaxSparsePrecision {
pp = MaxSparsePrecision
}
pp := min(precision+5, MaxSparsePrecision)

s, err := New(precision, pp)
if err != nil {
Expand Down
Loading