Skip to content

perf: optimize light calculations (4400x less allocs)#1192

Open
HashimTheArab wants to merge 3 commits intodf-mc:masterfrom
HashimTheArab:optimize-light-calculations
Open

perf: optimize light calculations (4400x less allocs)#1192
HashimTheArab wants to merge 3 commits intodf-mc:masterfrom
HashimTheArab:optimize-light-calculations

Conversation

@HashimTheArab
Copy link
Contributor

  • Replaced linked-list BFS queue with a ring-buffer queue for light propagation (Fill/Spread) to remove per-node heap churn.
  • Removed iterator-based neighbor traversal in propagation; now uses direct 6-neighbor checks.
  • Removed small slice allocations in world light orchestration (chunk() and spreadLight()).

Benchmark results

image image

Benchmark Code

package chunk

import (
	"testing"

	"github.com/df-mc/dragonfly/server/block/cube"
)

func BenchmarkLightAreaFill(b *testing.B) {
	prepareLightBenchTables()

	c := benchmarkChunk()
	area := LightArea([]*Chunk{c}, 0, 0)

	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		area.Fill()
	}
}

func BenchmarkLightAreaSpread(b *testing.B) {
	prepareLightBenchTables()

	chunks := make([]*Chunk, 0, 9)
	fillAreas := make([]*lightArea, 0, 9)
	for z := -1; z <= 1; z++ {
		for x := -1; x <= 1; x++ {
			c := benchmarkChunk()
			chunks = append(chunks, c)
			fillAreas = append(fillAreas, LightArea([]*Chunk{c}, x, z))
		}
	}
	spread := LightArea(chunks, -1, -1)

	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		for _, area := range fillAreas {
			area.Fill()
		}
		spread.Spread()
	}
}

func prepareLightBenchTables() {
	if len(LightBlocks) < 4 {
		LightBlocks = make([]uint8, 4)
	}
	if len(FilteringBlocks) < 4 {
		FilteringBlocks = make([]uint8, 4)
	}

	// Runtime IDs used in benchmarkChunk():
	// 0: air, 1: full blocker, 2: emitter, 3: partial diffuser.
	LightBlocks[0], LightBlocks[1], LightBlocks[2], LightBlocks[3] = 0, 0, 14, 0
	FilteringBlocks[0], FilteringBlocks[1], FilteringBlocks[2], FilteringBlocks[3] = 0, 15, 0, 2
}

func benchmarkChunk() *Chunk {
	c := New(0, cube.Range{0, 63})

	// Opaque floor.
	for x := uint8(0); x < 16; x++ {
		for z := uint8(0); z < 16; z++ {
			c.SetBlock(x, 0, z, 0, 1)
		}
	}

	// A few emitters spread through the chunk and on the edges to exercise both
	// fill and neighbour spreading.
	for x := uint8(1); x < 16; x += 4 {
		for z := uint8(1); z < 16; z += 4 {
			c.SetBlock(x, 2, z, 0, 2)
		}
	}
	for _, x := range [...]uint8{0, 15} {
		for _, z := range [...]uint8{0, 15} {
			c.SetBlock(x, 2, z, 0, 2)
		}
	}

	// A partially diffusing strip to force filtered propagation paths.
	for x := uint8(0); x < 16; x += 2 {
		c.SetBlock(x, 3, 7, 0, 3)
	}
	return c
}

- Replaced linked-list BFS queue with a ring-buffer queue for light propagation (Fill/Spread) to remove per-node heap churn.
- Removed iterator-based neighbor traversal in propagation; now uses direct 6-neighbor checks.
- Removed small slice allocations in world light orchestration (chunk() and spreadLight()).
@HashimTheArab HashimTheArab changed the title perf: optimize light calculations perf: optimize light calculations (4400x less allocs) Feb 25, 2026
Copy link
Member

@Sandertv Sandertv left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good changes, a ring buffer will definitely help bring down allocations. Got a couple of minor comments

func (a *lightArea) Fill() {
a.initialiseLightSlices()
queue := list.New()
queue := newLightQueue(512)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these pre-allocated sizes based on anything? Could we do some tests with a real world to see what a suitable cap might be if that wasn't done yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will need @AkmalFairuz to do this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants