-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiffx.go
More file actions
194 lines (170 loc) · 4.69 KB
/
diffx.go
File metadata and controls
194 lines (170 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Package diffx implements the Myers O(ND) diff algorithm with heuristics
// for better output quality on large files with many small, scattered changes.
//
// Unlike simple Myers implementations, diffx includes:
// - Preprocessing: Filters out high-frequency elements that cause spurious matches
// - Heuristics: Early termination for expensive comparisons
// - Postprocessing: Shifts diff boundaries for more readable output
package diffx
// OpType identifies the type of edit operation.
type OpType int
const (
// Equal means the elements are unchanged.
Equal OpType = iota
// Insert means elements were added to B that are not in A.
Insert
// Delete means elements were removed from A that are not in B.
Delete
)
// String returns a string representation of the OpType.
func (t OpType) String() string {
switch t {
case Equal:
return "Equal"
case Insert:
return "Insert"
case Delete:
return "Delete"
default:
return "Unknown"
}
}
// DiffOp represents a single edit operation with index ranges.
type DiffOp struct {
Type OpType
AStart int // start index in sequence A (inclusive)
AEnd int // end index in sequence A (exclusive)
BStart int // start index in sequence B (inclusive)
BEnd int // end index in sequence B (exclusive)
}
// options holds configuration for the diff algorithm.
type options struct {
useHeuristic bool
forceMinimal bool
costLimit int
preprocessing bool
postprocessing bool
anchorElimination bool
}
// defaultOptions returns options with sensible defaults.
func defaultOptions() *options {
return &options{
useHeuristic: true,
forceMinimal: false,
costLimit: 0, // auto-calculated
preprocessing: true,
postprocessing: true,
anchorElimination: true,
}
}
// Option configures diff behavior.
type Option func(*options)
// WithHeuristic enables or disables speed heuristics.
// Default: true.
func WithHeuristic(enabled bool) Option {
return func(o *options) {
o.useHeuristic = enabled
}
}
// WithMinimal forces minimal edit script even if slow.
// Default: false.
func WithMinimal(minimal bool) Option {
return func(o *options) {
o.forceMinimal = minimal
if minimal {
o.useHeuristic = false
}
}
}
// WithCostLimit sets custom early termination threshold.
// 0 means auto-calculate based on input size.
// Default: 0.
func WithCostLimit(n int) Option {
return func(o *options) {
o.costLimit = n
}
}
// WithPreprocessing enables or disables confusing element filtering.
// Default: true.
func WithPreprocessing(enabled bool) Option {
return func(o *options) {
o.preprocessing = enabled
}
}
// WithPostprocessing enables or disables boundary shifting.
// Default: true.
func WithPostprocessing(enabled bool) Option {
return func(o *options) {
o.postprocessing = enabled
}
}
// Diff compares two string slices using the Myers algorithm.
// For histogram-style diff, use DiffHistogram instead.
func Diff(a, b []string, opts ...Option) []DiffOp {
return DiffElements(toElements(a), toElements(b), opts...)
}
// DiffElements compares arbitrary Element slices using the Myers algorithm.
// For histogram-style diff, use DiffElementsHistogram instead.
func DiffElements(a, b []Element, opts ...Option) []DiffOp {
// Apply options
o := defaultOptions()
for _, opt := range opts {
opt(o)
}
// Handle trivial cases
if len(a) == 0 && len(b) == 0 {
return nil
}
if len(a) == 0 {
return []DiffOp{{
Type: Insert,
AStart: 0,
AEnd: 0,
BStart: 0,
BEnd: len(b),
}}
}
if len(b) == 0 {
return []DiffOp{{
Type: Delete,
AStart: 0,
AEnd: len(a),
BStart: 0,
BEnd: 0,
}}
}
// Keep original sequences for postprocessing
origA, origB := a, b
// Create context and run algorithm
ctx := newDiffContext(a, b, o)
// Preprocessing: filter confusing elements
var mapping *indexMapping
if o.preprocessing {
a, b, mapping = filterConfusingElements(a, b)
if len(a) > 0 || len(b) > 0 {
// Re-create context with filtered sequences
ctx = newDiffContext(a, b, o)
}
}
// Run the core algorithm
if len(a) > 0 || len(b) > 0 {
ctx.compareSeq(0, len(a), 0, len(b), o.forceMinimal)
}
// Build operations from change marks
ops := ctx.buildOps()
// Map indices back to original sequences
if mapping != nil {
ops = mapping.mapOps(ops)
}
// Anchor elimination: remove weak anchors (short, high-frequency Equal regions)
// This must happen before boundary shifting so the shifted boundaries are clean
if o.anchorElimination {
ops = eliminateWeakAnchors(ops, origA, origB)
}
// Postprocessing: shift boundaries for readability
// Use original sequences since ops now have original indices
if o.postprocessing {
ops = shiftBoundaries(ops, origA, origB)
}
return ops
}