Skip to content

Commit cc9bc8f

Browse files
authored
Introduced some options for compactor concurrency (#66)
* Tool for CLI compactions. * Use concurrency when populating symbols for multiple blocks. * Use concurrency when writing to multiple output blocks. Signed-off-by: Peter Štibraný <pstibrany@gmail.com>
1 parent 415354a commit cc9bc8f

5 files changed

Lines changed: 476 additions & 52 deletions

File tree

cmd/compact/main.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"flag"
6+
"log"
7+
"os"
8+
"os/signal"
9+
"runtime/pprof"
10+
"syscall"
11+
12+
golog "github.com/go-kit/log"
13+
14+
"github.com/prometheus/prometheus/tsdb"
15+
)
16+
17+
func main() {
18+
var (
19+
outputDir string
20+
shardCount int
21+
cpuProf string
22+
segmentSizeMB int64
23+
maxClosingBlocks int
24+
symbolFlushers int
25+
)
26+
27+
flag.StringVar(&outputDir, "output-dir", ".", "Output directory for new block(s)")
28+
flag.StringVar(&cpuProf, "cpuprofile", "", "Where to store CPU profile (it not empty)")
29+
flag.IntVar(&shardCount, "shard-count", 1, "Number of shards for splitting")
30+
flag.Int64Var(&segmentSizeMB, "segment-file-size", 512, "Size of segment file")
31+
flag.IntVar(&maxClosingBlocks, "max-closing-blocks", 2, "Number of blocks that can close at once during split compaction")
32+
flag.IntVar(&symbolFlushers, "symbol-flushers", 4, "Number of symbol flushers used during split compaction")
33+
34+
flag.Parse()
35+
36+
logger := golog.NewLogfmtLogger(os.Stderr)
37+
38+
var blockDirs []string
39+
var blocks []*tsdb.Block
40+
for _, d := range flag.Args() {
41+
s, err := os.Stat(d)
42+
if err != nil {
43+
panic(err)
44+
}
45+
if !s.IsDir() {
46+
log.Fatalln("not a directory: ", d)
47+
}
48+
49+
blockDirs = append(blockDirs, d)
50+
51+
b, err := tsdb.OpenBlock(logger, d, nil)
52+
if err != nil {
53+
log.Fatalln("failed to open block:", d, err)
54+
}
55+
56+
blocks = append(blocks, b)
57+
defer b.Close()
58+
}
59+
60+
if len(blockDirs) == 0 {
61+
log.Fatalln("no blocks to compact")
62+
}
63+
64+
if cpuProf != "" {
65+
f, err := os.Create(cpuProf)
66+
if err != nil {
67+
log.Fatalln(err)
68+
}
69+
70+
log.Println("writing to", cpuProf)
71+
err = pprof.StartCPUProfile(f)
72+
if err != nil {
73+
log.Fatalln(err)
74+
}
75+
76+
defer pprof.StopCPUProfile()
77+
}
78+
79+
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
80+
defer cancel()
81+
82+
c, err := tsdb.NewLeveledCompactorWithChunkSize(ctx, nil, logger, []int64{0}, nil, segmentSizeMB*1024*1024, nil)
83+
if err != nil {
84+
log.Fatalln("creating compator", err)
85+
}
86+
87+
opts := tsdb.DefaultConcurrencyOptions()
88+
opts.MaxClosingBlocks = maxClosingBlocks
89+
opts.SymbolsFlushersCount = symbolFlushers
90+
c.SetConcurrencyOptions(opts)
91+
92+
_, err = c.CompactWithSplitting(outputDir, blockDirs, blocks, uint64(shardCount))
93+
if err != nil {
94+
log.Fatalln("compacting", err)
95+
}
96+
}

tsdb/async_block_writer.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
package tsdb
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"github.com/pkg/errors"
8+
"go.uber.org/atomic"
9+
"golang.org/x/sync/semaphore"
10+
11+
"github.com/prometheus/prometheus/model/labels"
12+
"github.com/prometheus/prometheus/storage"
13+
"github.com/prometheus/prometheus/tsdb/chunkenc"
14+
"github.com/prometheus/prometheus/tsdb/chunks"
15+
)
16+
17+
// asyncBlockWriter runs a background goroutine that writes series and chunks to the block asynchronously.
18+
type asyncBlockWriter struct {
19+
chunkPool chunkenc.Pool // Where to return chunks after writing.
20+
21+
chunkw ChunkWriter
22+
indexw IndexWriter
23+
24+
closeSemaphore *semaphore.Weighted
25+
26+
seriesChan chan seriesToWrite
27+
finishedCh chan asyncBlockWriterResult
28+
29+
closed bool
30+
result asyncBlockWriterResult
31+
}
32+
33+
type asyncBlockWriterResult struct {
34+
stats BlockStats
35+
err error
36+
}
37+
38+
type seriesToWrite struct {
39+
lbls labels.Labels
40+
chks []chunks.Meta
41+
}
42+
43+
func newAsyncBlockWriter(chunkPool chunkenc.Pool, chunkw ChunkWriter, indexw IndexWriter, closeSema *semaphore.Weighted) *asyncBlockWriter {
44+
bw := &asyncBlockWriter{
45+
chunkPool: chunkPool,
46+
chunkw: chunkw,
47+
indexw: indexw,
48+
seriesChan: make(chan seriesToWrite, 64),
49+
finishedCh: make(chan asyncBlockWriterResult, 1),
50+
closeSemaphore: closeSema,
51+
}
52+
53+
go bw.loop()
54+
return bw
55+
}
56+
57+
// loop doing the writes. Return value is only used by defer statement, and is sent to the channel,
58+
// before closing it.
59+
func (bw *asyncBlockWriter) loop() (res asyncBlockWriterResult) {
60+
defer func() {
61+
bw.finishedCh <- res
62+
close(bw.finishedCh)
63+
}()
64+
65+
stats := BlockStats{}
66+
ref := storage.SeriesRef(0)
67+
for sw := range bw.seriesChan {
68+
if err := bw.chunkw.WriteChunks(sw.chks...); err != nil {
69+
return asyncBlockWriterResult{err: errors.Wrap(err, "write chunks")}
70+
}
71+
if err := bw.indexw.AddSeries(ref, sw.lbls, sw.chks...); err != nil {
72+
return asyncBlockWriterResult{err: errors.Wrap(err, "add series")}
73+
}
74+
75+
stats.NumChunks += uint64(len(sw.chks))
76+
stats.NumSeries++
77+
for _, chk := range sw.chks {
78+
stats.NumSamples += uint64(chk.Chunk.NumSamples())
79+
}
80+
81+
for _, chk := range sw.chks {
82+
if err := bw.chunkPool.Put(chk.Chunk); err != nil {
83+
return asyncBlockWriterResult{err: errors.Wrap(err, "put chunk")}
84+
}
85+
}
86+
ref++
87+
}
88+
89+
err := bw.closeSemaphore.Acquire(context.Background(), 1)
90+
if err != nil {
91+
return asyncBlockWriterResult{err: errors.Wrap(err, "failed to acquire semaphore before closing writers")}
92+
}
93+
defer bw.closeSemaphore.Release(1)
94+
95+
// If everything went fine with writing so far, close writers.
96+
if err := bw.chunkw.Close(); err != nil {
97+
return asyncBlockWriterResult{err: errors.Wrap(err, "closing chunk writer")}
98+
}
99+
if err := bw.indexw.Close(); err != nil {
100+
return asyncBlockWriterResult{err: errors.Wrap(err, "closing index writer")}
101+
}
102+
103+
return asyncBlockWriterResult{stats: stats}
104+
}
105+
106+
func (bw *asyncBlockWriter) addSeries(lbls labels.Labels, chks []chunks.Meta) error {
107+
select {
108+
case bw.seriesChan <- seriesToWrite{lbls: lbls, chks: chks}:
109+
return nil
110+
case result, ok := <-bw.finishedCh:
111+
if ok {
112+
bw.result = result
113+
}
114+
return fmt.Errorf("asyncBlockWriter doesn't run anymore")
115+
}
116+
}
117+
118+
func (bw *asyncBlockWriter) closeAsync() {
119+
if !bw.closed {
120+
bw.closed = true
121+
122+
close(bw.seriesChan)
123+
}
124+
}
125+
126+
func (bw *asyncBlockWriter) waitFinished() (BlockStats, error) {
127+
// Wait for flusher to finish.
128+
result, ok := <-bw.finishedCh
129+
if ok {
130+
bw.result = result
131+
}
132+
133+
return bw.result.stats, bw.result.err
134+
}
135+
136+
type preventDoubleCloseIndexWriter struct {
137+
IndexWriter
138+
closed atomic.Bool
139+
}
140+
141+
func newPreventDoubleCloseIndexWriter(iw IndexWriter) *preventDoubleCloseIndexWriter {
142+
return &preventDoubleCloseIndexWriter{IndexWriter: iw}
143+
}
144+
145+
func (p *preventDoubleCloseIndexWriter) Close() error {
146+
if p.closed.CAS(false, true) {
147+
return p.IndexWriter.Close()
148+
}
149+
return nil
150+
}
151+
152+
type preventDoubleCloseChunkWriter struct {
153+
ChunkWriter
154+
closed atomic.Bool
155+
}
156+
157+
func newPreventDoubleCloseChunkWriter(cw ChunkWriter) *preventDoubleCloseChunkWriter {
158+
return &preventDoubleCloseChunkWriter{ChunkWriter: cw}
159+
}
160+
161+
func (p *preventDoubleCloseChunkWriter) Close() error {
162+
if p.closed.CAS(false, true) {
163+
return p.ChunkWriter.Close()
164+
}
165+
return nil
166+
}

0 commit comments

Comments
 (0)