diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7782cd2..0ea8641 100755 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go: [ '1.22.11' ] + go: [ '1.23.11' ] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml deleted file mode 100755 index ddee4f0..0000000 --- a/.github/workflows/codeql.yml +++ /dev/null @@ -1,38 +0,0 @@ - -name: "CodeQL" - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - schedule: - - cron: '16 8 * * 1' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'go' ] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - with: - languages: ${{ matrix.language }} - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 - with: - category: "/language:${{matrix.language}}" diff --git a/.golangci.yml b/.golangci.yml index e3f91ed..09bb23f 100755 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,202 +1,61 @@ +version: "2" run: - go: "1.22" - concurrency: 4 + go: "1.23" timeout: 5m tests: false issues-exit-code: 1 modules-download-mode: readonly + allow-parallel-runners: true issues: - exclude-use-default: false - max-issues-per-linter: 100 - max-same-issues: 4 + max-issues-per-linter: 0 + max-same-issues: 0 new: false - exclude-files: - - ".+_test.go" - exclude-dirs: - - "vendor$" + fix: false output: formats: - - format: line-number - sort-results: true + text: + print-linter-name: true + print-issued-lines: true -linters-settings: - govet: - check-shadowing: true - enable: - - asmdecl - - assign - - atomic - - atomicalign - - bools - - buildtag - - cgocall - - composites - - copylocks - - deepequalerrors - - errorsas - - findcall - - framepointer - - httpresponse - - ifaceassert - - loopclosure - - lostcancel - - nilfunc - - nilness - - printf - - reflectvaluecompare - - shadow - - shift - - sigchanyzer - - sortslice - - stdmethods - - stringintconv - - structtag - - testinggoroutine - - tests - - unmarshal - - unreachable - - unsafeptr - - unusedresult - - unusedwrite - disable: - - fieldalignment - gofmt: - simplify: true - errcheck: - check-type-assertions: true - check-blank: true - gocyclo: - min-complexity: 30 - misspell: - locale: US - prealloc: - simple: true - range-loops: true - for-loops: true - unparam: - check-exported: false - gci: - skip-generated: true - custom-order: false - gosec: - includes: - - G101 # Look for hard coded credentials - - G102 # Bind to all interfaces - - G103 # Audit the use of unsafe block - - G104 # Audit errors not checked - - G106 # Audit the use of ssh.InsecureIgnoreHostKey - - G107 # Url provided to HTTP request as taint input - - G108 # Profiling endpoint automatically exposed on /debug/pprof - - G109 # Potential Integer overflow made by strconv.Atoi result conversion to int16/32 - - G110 # Potential DoS vulnerability via decompression bomb - - G111 # Potential directory traversal - - G112 # Potential slowloris attack - - G113 # Usage of Rat.SetString in math/big with an overflow (CVE-2022-23772) - - G114 # Use of net/http serve function that has no support for setting timeouts - - G201 # SQL query construction using format string - - G202 # SQL query construction using string concatenation - - G203 # Use of unescaped data in HTML templates - - G204 # Audit use of command execution - - G301 # Poor file permissions used when creating a directory - - G302 # Poor file permissions used with chmod - - G303 # Creating tempfile using a predictable path - - G304 # File path provided as taint input - - G305 # File traversal when extracting zip/tar archive - - G306 # Poor file permissions used when writing to a new file - - G307 # Deferring a method which returns an error - - G401 # Detect the usage of DES, RC4, MD5 or SHA1 - - G402 # Look for bad TLS connection settings - - G403 # Ensure minimum RSA key length of 2048 bits - - G404 # Insecure random number source (rand) - - G501 # Import blocklist: crypto/md5 - - G502 # Import blocklist: crypto/des - - G503 # Import blocklist: crypto/rc4 - - G504 # Import blocklist: net/http/cgi - - G505 # Import blocklist: crypto/sha1 - - G601 # Implicit memory aliasing of items from a range statement - excludes: - - G101 # Look for hard coded credentials - - G102 # Bind to all interfaces - - G103 # Audit the use of unsafe block - - G104 # Audit errors not checked - - G106 # Audit the use of ssh.InsecureIgnoreHostKey - - G107 # Url provided to HTTP request as taint input - - G108 # Profiling endpoint automatically exposed on /debug/pprof - - G109 # Potential Integer overflow made by strconv.Atoi result conversion to int16/32 - - G110 # Potential DoS vulnerability via decompression bomb - - G111 # Potential directory traversal - - G112 # Potential slowloris attack - - G113 # Usage of Rat.SetString in math/big with an overflow (CVE-2022-23772) - - G114 # Use of net/http serve function that has no support for setting timeouts - - G201 # SQL query construction using format string - - G202 # SQL query construction using string concatenation - - G203 # Use of unescaped data in HTML templates - - G204 # Audit use of command execution - - G301 # Poor file permissions used when creating a directory - - G302 # Poor file permissions used with chmod - - G303 # Creating tempfile using a predictable path - - G304 # File path provided as taint input - - G305 # File traversal when extracting zip/tar archive - - G306 # Poor file permissions used when writing to a new file - - G307 # Deferring a method which returns an error - - G401 # Detect the usage of DES, RC4, MD5 or SHA1 - - G402 # Look for bad TLS connection settings - - G403 # Ensure minimum RSA key length of 2048 bits - - G404 # Insecure random number source (rand) - - G501 # Import blocklist: crypto/md5 - - G502 # Import blocklist: crypto/des - - G503 # Import blocklist: crypto/rc4 - - G504 # Import blocklist: net/http/cgi - - G505 # Import blocklist: crypto/sha1 - - G601 # Implicit memory aliasing of items from a range statement - exclude-generated: true - severity: medium - confidence: medium - concurrency: 12 - config: - global: - nosec: true - "#nosec": "#my-custom-nosec" - show-ignored: true - audit: true - G101: - pattern: "(?i)passwd|pass|password|pwd|secret|token|pw|apiKey|bearer|cred" - ignore_entropy: false - entropy_threshold: "80.0" - per_char_threshold: "3.0" - truncate: "32" - G104: - fmt: - - Fscanf - G111: - pattern: "http\\.Dir\\(\"\\/\"\\)|http\\.Dir\\('\\/'\\)" - G301: "0750" - G302: "0600" - G306: "0600" - - lll: - line-length: 130 - tab-width: 1 - staticcheck: - go: "1.15" - # SAxxxx checks in https://staticcheck.io/docs/configuration/options/#checks - # Default: ["*"] - checks: [ "*", "-SA1019" ] +formatters: + exclusions: + paths: + - vendors/ + enable: + - gofmt + - goimports linters: - disable-all: true + settings: + staticcheck: + checks: + - all + - -S1023 + - -ST1000 + - -ST1003 + - -ST1020 + gosec: + excludes: + - G104 + - G115 + - G301 + - G304 + - G306 + - G501 + - G505 + exclusions: + paths: + - vendors/ + default: none enable: - govet - - gofmt - errcheck - misspell - gocyclo - ineffassign - - goimports - - nakedret - unparam - unused - prealloc @@ -206,7 +65,4 @@ linters: - nilerr - errorlint - bodyclose - - exportloopref - gosec - - lll - fast: false diff --git a/encoding/base62/base62.go b/encoding/base62/base62.go index 33ef862..08051ac 100644 --- a/encoding/base62/base62.go +++ b/encoding/base62/base62.go @@ -5,7 +5,11 @@ package base62 -import "go.osspkg.com/algorithms/sorts" +import ( + "bytes" + + "go.osspkg.com/algorithms/sorts" +) const size = 62 @@ -40,8 +44,8 @@ func (v *Base62) Encode(id uint64) string { func (v *Base62) Decode(data string) uint64 { var id uint64 - for _, b := range []byte(data) { - id = id*size + v.dec[b] + for _, r := range data { + id = id*size + uint64(bytes.IndexRune(v.enc, r)) } return id } diff --git a/encoding/base62/base62_test.go b/encoding/base62/base62_test.go index 0f0d6f5..0e4dfaa 100644 --- a/encoding/base62/base62_test.go +++ b/encoding/base62/base62_test.go @@ -6,7 +6,6 @@ package base62 import ( - "fmt" "math" "testing" @@ -19,26 +18,27 @@ func TestEncode_EncodeDecode(t *testing.T) { id uint64 want string }{ - {name: "Case1", id: 1, want: "p"}, - {name: "Case1", id: 2, want: "L"}, - {name: "Case1", id: 3, want: "K"}, - {name: "Case1", id: 4, want: "G"}, - {name: "Case1", id: 5, want: "R"}, - {name: "Case1", id: 6, want: "S"}, - {name: "Case1", id: 7, want: "u"}, - {name: "Case1", id: 8, want: "D"}, - {name: "Case1", id: 9, want: "v"}, - {name: "Case2", id: 10, want: "o"}, - {name: "Case3", id: 100, want: "pH"}, - {name: "Case4", id: 1000, want: "PD"}, - {name: "Case5", id: 10000, want: "LIn"}, - {name: "Case6", id: 100000, want: "c0k"}, - {name: "Case7", id: 1000000000, want: "pRmUWP"}, - {name: "Case8", id: 999999, want: "Glvp"}, - {name: "Case20", id: math.MaxUint64, want: "XNWjtpSoji4"}, + {name: "Case1", id: 0, want: ""}, + {name: "Case1", id: 1, want: "1"}, + {name: "Case1", id: 2, want: "2"}, + {name: "Case1", id: 3, want: "3"}, + {name: "Case1", id: 4, want: "4"}, + {name: "Case1", id: 5, want: "5"}, + {name: "Case1", id: 6, want: "6"}, + {name: "Case1", id: 7, want: "7"}, + {name: "Case1", id: 8, want: "8"}, + {name: "Case1", id: 9, want: "9"}, + {name: "Case2", id: 10, want: "Q"}, + {name: "Case3", id: 100, want: "1z"}, + {name: "Case4", id: 1000, want: "E8"}, + {name: "Case5", id: 10000, want: "2aC"}, + {name: "Case6", id: 100000, want: "H0m"}, + {name: "Case7", id: 1000000000, want: "15xjeE"}, + {name: "Case8", id: 999999, want: "4Z91"}, + {name: "Case20", id: math.MaxUint64, want: "VleDq16QDLX"}, } - v := New("0pLKGRSuDvorlO14Pjnd7XgQw9c8YhaIJ5iqtIHy3mWxM6C2TeAbFVBUkZfsNz") + v := New("0123456789QAZWSXEDCRFVTGBYHNUJMIKOLPqazwsxedcrfvtgbyhnujmikolp") for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -50,30 +50,41 @@ func TestEncode_EncodeDecode(t *testing.T) { } } +func TestCheckAll(t *testing.T) { + v := New("0123456789QAZWSXEDCRFVTGBYHNUJMIKOLPqazwsxedcrfvtgbyhnujmikolp") + + for i := 0; i <= 1000; i++ { + h := v.Encode(uint64(i)) + id := v.Decode(h) + if uint64(i) != id { + t.Errorf("case %d: want %d got %d", i, i, id) + } + } +} + func TestEncode_Encode(t *testing.T) { tests := []struct { name string str string want uint64 }{ - {name: "Case1", str: "a", want: 30}, - {name: "Case2", str: "aa", want: 1890}, - {name: "Case3", str: "aaaaaaaa", want: 107380379795850}, + {name: "Case1", str: "a", want: 37}, + {name: "Case2", str: "aa", want: 2331}, + {name: "Case3", str: "aaaaaaaa", want: 132435801748215}, } - v := New("0pLKGRSuDvorlO14Pjnd7XgQw9c8YhaIJ5iqtIHy3mWxM6C2TeAbFVBUkZfsNz") + v := New("0123456789QAZWSXEDCRFVTGBYHNUJMIKOLPqazwsxedcrfvtgbyhnujmikolp") for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { h := v.Decode(tt.str) - fmt.Println(h) casecheck.Equal(t, tt.want, h) }) } } func Benchmark_base62(b *testing.B) { - v := New("0pLKGRSuDvorlO14Pjnd7XgQw9c8YhaIJ5iqtIHy3mWxM6C2TeAbFVBUkZfsNz") + v := New("0123456789QAZWSXEDCRFVTGBYHNUJMIKOLPqazwsxedcrfvtgbyhnujmikolp") b.ReportAllocs() b.ResetTimer() @@ -84,3 +95,29 @@ func Benchmark_base62(b *testing.B) { } }) } + +func Benchmark_base62_encode(b *testing.B) { + v := New("0123456789QAZWSXEDCRFVTGBYHNUJMIKOLPqazwsxedcrfvtgbyhnujmikolp") + + b.ReportAllocs() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + v.Encode(math.MaxUint64) + } + }) +} + +func Benchmark_base62_decode(b *testing.B) { + v := New("0123456789QAZWSXEDCRFVTGBYHNUJMIKOLPqazwsxedcrfvtgbyhnujmikolp") + + b.ReportAllocs() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + v.Decode("XNWjtpSoji4") + } + }) +} diff --git a/go.mod b/go.mod index ea6b81e..5e7ac0e 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,8 @@ module go.osspkg.com/algorithms -go 1.22.11 +go 1.23.11 -require go.osspkg.com/casecheck v0.3.0 - -require github.com/cespare/xxhash/v2 v2.3.0 +require ( + github.com/cespare/xxhash/v2 v2.3.0 + go.osspkg.com/casecheck v0.3.0 +) diff --git a/structs/bitmap/bitmap.go b/structs/bitmap/bitmap.go new file mode 100644 index 0000000..640bc04 --- /dev/null +++ b/structs/bitmap/bitmap.go @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2019-2025 Mikhail Knyazhev . All rights reserved. + * Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. + */ + +package bitmap + +import ( + "sync" +) + +const ( + blockSize = 8 +) + +type Bitmap struct { + data []byte + size uint64 + max uint64 + mux sync.RWMutex + lockoff bool +} + +type Option func(*Bitmap) + +func DisableLock() Option { + return func(o *Bitmap) { + o.lockoff = true + } +} + +func New(maxIndex uint64, opts ...Option) *Bitmap { + size := maxIndex/blockSize + maxIndex%blockSize + + bm := &Bitmap{ + max: size * blockSize, + size: size, + data: make([]byte, size), + } + + for _, opt := range opts { + opt(bm) + } + + return bm +} + +func (b *Bitmap) Set(index uint64) { + if index > b.max { + return + } + + if !b.lockoff { + b.mux.Lock() + defer b.mux.Unlock() + } + + b.data[index%b.size] |= 1 << (index % blockSize) +} + +func (b *Bitmap) Del(index uint64) { + if index > b.max { + return + } + + if !b.lockoff { + b.mux.Lock() + defer b.mux.Unlock() + } + + b.data[index%b.size] &^= 1 << (index % blockSize) +} + +func (b *Bitmap) Has(index uint64) bool { + if index > b.max { + return false + } + + if !b.lockoff { + b.mux.RLock() + defer b.mux.RUnlock() + } + + return (b.data[index%b.size] & (1 << (index % blockSize))) > 0 +} + +func (b *Bitmap) Dump() []byte { + if !b.lockoff { + b.mux.RLock() + defer b.mux.RUnlock() + } + + out := make([]byte, b.size) + copy(out, b.data) + return out +} + +func (b *Bitmap) Restore(in []byte) { + if !b.lockoff { + b.mux.Lock() + defer b.mux.Unlock() + } + + b.data = make([]byte, len(in)) + copy(b.data, in) + + b.size = uint64(len(in)) + b.max = b.size * blockSize +} diff --git a/structs/bitmap/bitmap_test.go b/structs/bitmap/bitmap_test.go new file mode 100644 index 0000000..b369cf0 --- /dev/null +++ b/structs/bitmap/bitmap_test.go @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2019-2025 Mikhail Knyazhev . All rights reserved. + * Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. + */ + +package bitmap + +import ( + "math" + "testing" + + "go.osspkg.com/casecheck" +) + +func TestUnit_Bitmap_calcBlockIndex(t *testing.T) { + bm := New(65) + + for i := 0; i <= 65; i++ { + bm.Set(uint64(i)) + + casecheck.True(t, bm.Has(uint64(i)), "(1) for index: %d", i) + casecheck.False(t, bm.Has(uint64(i+1)), "(2) for index: %d", i+1) + } + + backup := bm.Dump() + + bm.Restore(make([]byte, len(backup))) + + for i := 0; i <= 65; i++ { + casecheck.False(t, bm.Has(uint64(i)), "(3) for index: %d", i) + casecheck.False(t, bm.Has(uint64(i+1)), "(4) for index: %d", i+1) + } + + bm.Restore(backup) + + for i := 65; i >= 0; i-- { + casecheck.True(t, bm.Has(uint64(i)), "(1) for index: %d", i) + casecheck.False(t, bm.Has(uint64(i+1)), "(2) for index: %d", i+1) + + bm.Del(uint64(i)) + casecheck.False(t, bm.Has(uint64(i)), "(3) for index: %d", i+1) + } + +} + +/* +goos: linux +goarch: amd64 +pkg: go.osspkg.com/algorithms/structs/bitmap +cpu: 12th Gen Intel(R) Core(TM) i9-12900KF +Benchmark_Bitmap +Benchmark_Bitmap-4 9942369 162.7 ns/op 0 B/op 0 allocs/op +*/ +func Benchmark_Bitmap(b *testing.B) { + index := uint64(math.MaxInt16) + bm := New(index) + + b.ReportAllocs() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + bm.Set(index) + bm.Has(index) + bm.Del(index) + } + }) +} diff --git a/structs/bloom/bloom.go b/structs/bloom/bloom.go index 1db408e..9764f2d 100644 --- a/structs/bloom/bloom.go +++ b/structs/bloom/bloom.go @@ -16,22 +16,31 @@ package bloom import ( + "bufio" + "bytes" "crypto/rand" + "encoding" "encoding/binary" + "encoding/gob" + "encoding/json" "fmt" "hash" + "io" "math" + "strconv" "sync" "github.com/cespare/xxhash/v2" + + "go.osspkg.com/algorithms/structs/bitmap" ) -const blockSize = 64 +const saltSize = 8 type Bloom struct { - bits []uint64 + bits *bitmap.Bitmap size uint64 - salts [][64]byte + salts [][saltSize]byte optSize uint64 optRate float64 @@ -58,7 +67,7 @@ func Quantity(size uint64, rate float64) Option { func New(opts ...Option) (*Bloom, error) { b := &Bloom{ optSize: 10_000_000, - optRate: 0.001, + optRate: 0.1, pool: &sync.Pool{New: func() any { return xxhash.New() }}, } @@ -69,95 +78,220 @@ func New(opts ...Option) (*Bloom, error) { if b.optSize == 0 { return nil, fmt.Errorf("bitset size cannot be 0") } - if b.optRate <= 0 || b.optRate >= 1.0 { - return nil, fmt.Errorf("false positive rate must be between 0 and 1") + if b.optRate <= 0.0 || b.optRate >= 1.0 { + return nil, fmt.Errorf("false positive rate must be between 0.0 and 1.0") } - m, k := b.calcOptimalParams(b.optSize, b.optRate) + m, k := calcOptimalParams(b.optSize, b.optRate) b.size = m - b.bits = make([]uint64, m/blockSize+1) - b.salts = make([][64]byte, k) + b.bits = bitmap.New(m, bitmap.DisableLock()) + b.salts = make([][saltSize]byte, k) for i := 0; i < int(k); i++ { if _, err := rand.Read(b.salts[i][:]); err != nil { return nil, fmt.Errorf("generate hash salt: %w", err) } + + b.salts[i] = [saltSize]byte(bytes.ReplaceAll(b.salts[i][:], []byte("\n"), []byte("~"))) } return b, nil } -func (b *Bloom) MaxElements() uint64 { - b.mux.RLock() - defer b.mux.RUnlock() +func (b *Bloom) Dump(w io.Writer) error { + if _, err := w.Write([]byte("OSSPkg:bloom\n")); err != nil { + return fmt.Errorf("write header: %w", err) + } - return b.size -} + if _, err := fmt.Fprintf(w, "%d\n", len(b.salts)); err != nil { + return fmt.Errorf("write salt count: %w", err) + } + + for _, salt := range b.salts { + if _, err := w.Write(salt[:]); err != nil { + return fmt.Errorf("write salt: %w", err) + } + + if _, err := w.Write([]byte("\n")); err != nil { + return fmt.Errorf("write salt: %w", err) + } + } -func (b *Bloom) SizeBytes() int { b.mux.RLock() defer b.mux.RUnlock() - return len(b.bits) * 8 + if _, err := w.Write(b.bits.Dump()); err != nil { + return fmt.Errorf("write bitmap: %w", err) + } + + return nil } -func (b *Bloom) Add(v []byte) { - b.mux.Lock() - defer b.mux.Unlock() +func (b *Bloom) Restore(r io.Reader) error { + reader := bufio.NewReader(r) - for i := 0; i < len(b.salts)-1; i++ { - p := b.createHash(i, v) - index, num := b.getIndex(p) - b.bits[index] |= num + head, err := reader.ReadBytes('\n') + if err != nil { + return fmt.Errorf("read header: %w", err) + } + if !bytes.Equal(head[:len(head)-1], []byte("OSSPkg:bloom")) { + return fmt.Errorf("invalid header") } -} -func (b *Bloom) Contain(v []byte) bool { - b.mux.RLock() - defer b.mux.RUnlock() + countSalt, err := reader.ReadBytes('\n') + if err != nil { + return fmt.Errorf("read countSalt: %w", err) + } + + count, err := strconv.Atoi(string(countSalt[:len(countSalt)-1])) + if err != nil { + return fmt.Errorf("invalid countSalt: %w", err) + } + + if count <= 0 { + return fmt.Errorf("invalid countSalt: got negative value") + } - for i := 0; i < len(b.salts)-1; i++ { - p := b.createHash(i, v) - index, num := b.getIndex(p) - if b.bits[index]&num > 0 { - continue + b.salts = make([][saltSize]byte, count) + + for i := 0; i < count; i++ { + salt, err0 := reader.ReadBytes('\n') + if err0 != nil { + return fmt.Errorf("read salt[%d]: %w", i, err0) + } + + salt = salt[:len(salt)-1] + if len(salt) != saltSize { + return fmt.Errorf("invalid salt[%d], want 64 got %d", i, len(salt)) } - return false + + b.salts[i] = [saltSize]byte(salt) } - return true + + bm, err := io.ReadAll(reader) + if err != nil { + return fmt.Errorf("read bitmap: %w", err) + } + + b.mux.Lock() + defer b.mux.Unlock() + + b.bits.Restore(bm) + + return nil } -func (b *Bloom) createHash(saltIndex int, key []byte) uint64 { +func (b *Bloom) Add(arg any) { h, ok := b.pool.Get().(hash.Hash) if !ok { panic("failed get hash function from pool") } defer func() { - h.Reset() b.pool.Put(h) }() - h.Write(key) - h.Write(b.salts[saltIndex][:]) + val := anyToBytes(arg) - return binary.BigEndian.Uint64(h.Sum(nil)) % b.size + b.mux.Lock() + defer b.mux.Unlock() + + for i := 0; i < len(b.salts); i++ { + h.Reset() + h.Write(val) + h.Write(b.salts[i][:]) + key := binary.BigEndian.Uint64(h.Sum(nil)) % b.size + + b.bits.Set(key) + } } -func (*Bloom) getIndex(p uint64) (uint64, uint64) { - index := uint64(math.Ceil(float64(p+1)/blockSize)) - 1 - num := uint64(1) << (p - index*blockSize) - return index, num +func (b *Bloom) Contain(arg any) bool { + h, ok := b.pool.Get().(hash.Hash) + if !ok { + panic("failed get hash function from pool") + } + defer func() { + b.pool.Put(h) + }() + + val := anyToBytes(arg) + + b.mux.RLock() + defer b.mux.RUnlock() + + for i := 0; i < len(b.salts); i++ { + h.Reset() + h.Write(val) + h.Write(b.salts[i][:]) + key := binary.BigEndian.Uint64(h.Sum(nil)) % b.size + + if !b.bits.Has(key) { + return false + } + } + return true } -func (*Bloom) calcOptimalParams(n uint64, p float64) (m, k uint64) { - m = uint64(math.Ceil(-float64(n) * math.Log(p) / math.Pow(math.Log(2.0), 2.0))) - if m == 0 { - m = 1 +func calcOptimalParams(n uint64, p float64) (uint64, uint64) { + m := -(float64(n) * math.Log(p)) / math.Pow(math.Log(2.0), 2.0) + if m < 1 { + m = 1.0 + } + k := (m / float64(n)) * math.Log(2.0) + if k < 1 { + k = 1.0 } - k = uint64(math.Ceil(float64(m) * math.Log(2.0) / float64(n))) - if k == 0 { - k = 1 + return uint64(math.Ceil(m)), uint64(math.Ceil(k)) +} + +type byter interface { + Bytes() []byte +} + +func anyToBytes(arg any) []byte { + switch value := arg.(type) { + case []byte: + return value + case byter: + return value.Bytes() + case string: + return []byte(value) + case fmt.Stringer: + return []byte(value.String()) + case int64: + return binary.AppendVarint(nil, value) + case int32: + return binary.AppendVarint(nil, int64(value)) + case int16: + return binary.AppendVarint(nil, int64(value)) + case int8: + return binary.AppendVarint(nil, int64(value)) + case int: + return binary.AppendVarint(nil, int64(value)) + case uint64: + return binary.AppendUvarint(nil, value) + case uint32: + return binary.AppendUvarint(nil, uint64(value)) + case uint16: + return binary.AppendUvarint(nil, uint64(value)) + case uint8: + return binary.AppendUvarint(nil, uint64(value)) + case uint: + return binary.AppendUvarint(nil, uint64(value)) + case json.Marshaler: + bb, _ := value.MarshalJSON() + return bb + case encoding.BinaryMarshaler: + bb, _ := value.MarshalBinary() + return bb + case encoding.TextMarshaler: + bb, _ := value.MarshalText() + return bb + case gob.GobEncoder: + bb, _ := value.GobEncode() + return bb + default: + return []byte(fmt.Sprintf("%+v", arg)) } - return } diff --git a/structs/bloom/bloom_test.go b/structs/bloom/bloom_test.go index 1cb40b5..dc208b0 100644 --- a/structs/bloom/bloom_test.go +++ b/structs/bloom/bloom_test.go @@ -3,77 +3,92 @@ * Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. */ -package bloom_test +package bloom import ( + "bytes" "crypto/md5" "crypto/sha1" "crypto/sha256" - "fmt" "hash" "hash/fnv" + "reflect" "testing" "github.com/cespare/xxhash/v2" "go.osspkg.com/casecheck" - - "go.osspkg.com/algorithms/structs/bloom" ) func TestUnit_Bloom(t *testing.T) { - bf, err := bloom.New(bloom.Quantity(1000, 0.00001)) + bf, err := New(Quantity(1000, 0.00001)) casecheck.NoError(t, err) - bf.Add([]byte("hello")) - bf.Add([]byte("user")) - bf.Add([]byte("home")) + bf.Add("hello") + bf.Add("user") + bf.Add("home") + + casecheck.False(t, bf.Contain("users")) + casecheck.True(t, bf.Contain("user")) + casecheck.True(t, bf.Contain("hello")) + casecheck.True(t, bf.Contain("home")) - casecheck.False(t, bf.Contain([]byte("users"))) - casecheck.True(t, bf.Contain([]byte("user"))) + buf := bytes.NewBuffer(nil) + casecheck.NoError(t, bf.Dump(buf)) + //fmt.Println(string(buf.Bytes())) + casecheck.NoError(t, bf.Restore(buf)) + + casecheck.False(t, bf.Contain("users")) + casecheck.True(t, bf.Contain("user")) + casecheck.True(t, bf.Contain("hello")) + casecheck.True(t, bf.Contain("home")) } func TestUnit_Bloom2(t *testing.T) { - _, err := bloom.New(bloom.Quantity(0, 0.00001)) + _, err := New(Quantity(0, 0.00001)) casecheck.Error(t, err) - _, err = bloom.New(bloom.Quantity(1, 1)) + _, err = New(Quantity(1, 1)) casecheck.Error(t, err) - _, err = bloom.New(bloom.Quantity(1, 0.0001)) + _, err = New(Quantity(1, 0.0001)) casecheck.NoError(t, err) } func runBloom(b *testing.B, size uint64, rate float64, h func() hash.Hash) { - bf, err := bloom.New(bloom.Quantity(size, rate), bloom.HashFunc(h)) + bf, err := New(Quantity(size, rate), HashFunc(h)) if err != nil { b.FailNow() } b.ResetTimer() b.ReportAllocs() - b.ReportMetric(float64(bf.SizeBytes()/(1024*1024)), "Mb") - b.ReportMetric(float64(bf.MaxElements()), "elm") - - var i int - for i = 0; i < b.N; i++ { - bf.Add([]byte(fmt.Sprintf("u%d", i))) - if !bf.Contain([]byte(fmt.Sprintf("u%d", i))) { - b.Log(i) - b.FailNow() - } - } - if bf.Contain([]byte(fmt.Sprintf("u%d", i+1))) { - b.Log(i + 1) - b.FailNow() + for i := 0; i < b.N; i++ { + bf.Add(i) + if !bf.Contain(i) { + b.Fatal(i) + } } } const ( vSize = 10_000_000 - vRate = 0.0001 + vRate = 0.1 ) +/* +goos: linux +goarch: amd64 +pkg: go.osspkg.com/algorithms/structs/bloom +cpu: 12th Gen Intel(R) Core(TM) i9-12900KF +Benchmark_Bloom_fnv128-4 2435511 499.1 ns/op 224 B/op 19 allocs/op +Benchmark_Bloom_fnv128a-4 2512622 486.0 ns/op 224 B/op 19 allocs/op +Benchmark_Bloom_md5-4 1253158 942.2 ns/op 160 B/op 11 allocs/op +Benchmark_Bloom_sha1-4 1000000 1077 ns/op 224 B/op 11 allocs/op +Benchmark_Bloom_sha256-4 1533015 798.8 ns/op 288 B/op 11 allocs/op +Benchmark_Bloom_xxhash-4 3036788 378.8 ns/op 96 B/op 11 allocs/op +*/ + func Benchmark_Bloom_fnv128(b *testing.B) { runBloom(b, vSize, vRate, fnv.New128) } @@ -99,3 +114,44 @@ func Benchmark_Bloom_xxhash(b *testing.B) { return xxhash.New() }) } + +func TestUnit_anyToBytes(t *testing.T) { + tests := []struct { + name string + arg any + want []byte + }{ + { + name: "case Bytes", + arg: []byte("hello"), + want: []byte("hello"), + }, + { + name: "case String", + arg: "hello", + want: []byte("hello"), + }, + { + name: "case Int", + arg: 12345, + want: []byte{242, 192, 1}, + }, + { + name: "case Struct", + arg: struct{ A int }{A: 1}, + want: []byte("{A:1}"), + }, + { + name: "case Ptr", + arg: &struct{ A int }{A: 1}, + want: []byte("&{A:1}"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := anyToBytes(tt.arg); !reflect.DeepEqual(got, tt.want) { + t.Errorf("anyToBytes() = %v, want %v", got, string(tt.want)) + } + }) + } +}