Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ func Parse(input string, opts ...any) ([]Word, error) {
subs, stats := StringToSubParts(input)

p := &ParserConfig{
SmartAcronyms: true,
NumberSplitting: false,
SmartAcronyms: true,
NumberMode: NumberModeNone,
}

for _, opt := range opts {
Expand Down Expand Up @@ -55,10 +55,25 @@ type ParserConfig struct {
// should be treated as AcronymWord instead of UpperCaseWord.
// Defaults to true.
SmartAcronyms bool
// NumberSplitting controls whether to split on letter-digit boundaries.
NumberSplitting bool
// NumberMode controls how numbers are handled during word splitting.
NumberMode NumberMode
}

// NumberMode defines the strategy for handling numbers during parsing.
type NumberMode int

const (
// NumberModeNone does not perform any special number splitting.
NumberModeNone NumberMode = iota
// NumberModeSplitAlways splits on any transition between a letter and a digit.
NumberModeSplitAlways
// NumberModeMergeRecursive treats digits as compatible with both preceding and succeeding lowercase letters,
// preventing splits like 123test -> 123-test.
NumberModeMergeRecursive
// NumberModeTreatAsLowercase treats digits exactly as if they were lowercase letters for boundary detection.
NumberModeTreatAsLowercase
)

// ParserOption configures the parser.
type ParserOption interface {
Apply(*ParserConfig)
Expand Down Expand Up @@ -91,9 +106,21 @@ func WithSmartAcronyms(enabled bool) ParserOption {
}

// WithNumberSplitting enables or disables splitting on letter-digit boundaries.
// It is equivalent to WithNumberMode(NumberModeSplitAlways) when true, and WithNumberMode(NumberModeNone) when false.
func WithNumberSplitting(enabled bool) ParserOption {
return funcParserOption(func(p *ParserConfig) {
p.NumberSplitting = enabled
if enabled {
p.NumberMode = NumberModeSplitAlways
} else {
p.NumberMode = NumberModeNone
}
})
}

// WithNumberMode sets the specific number splitting mode.
func WithNumberMode(mode NumberMode) ParserOption {
return funcParserOption(func(p *ParserConfig) {
p.NumberMode = mode
})
}

Expand Down Expand Up @@ -123,15 +150,15 @@ func DetectPartitioner(stats Stats, config ...*ParserConfig) Partitioner {
}
}

splitNumber := false
numberMode := NumberModeNone
if len(config) > 0 && config[0] != nil {
splitNumber = config[0].NumberSplitting
numberMode = config[0].NumberMode
}

return NewPartitioner(PartitionerConfig{
Delimiters: delimiters,
SplitCamel: true,
SplitNumber: splitNumber,
Delimiters: delimiters,
SplitCamel: true,
NumberMode: numberMode,
})
}

Expand Down
31 changes: 26 additions & 5 deletions parts.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func CamelCasePartitioner(subs []SubPart) []Part {
type PartitionerConfig struct {
Delimiters map[rune]bool
SplitCamel bool
SplitNumber bool
NumberMode NumberMode
PreserveSep bool // If true, delimiters are returned as SeparatorPart instead of discarded
}

Expand All @@ -93,27 +93,48 @@ func NewPartitioner(cfg PartitionerConfig) Partitioner {

// Transition check
isSplit := false
if (cfg.SplitCamel || cfg.SplitNumber) && i > 0 && len(current) > 0 {
if (cfg.SplitCamel || cfg.NumberMode != NumberModeNone) && i > 0 && len(current) > 0 {
prev := subs[i-1]
// Note: if prev was delimiter, current is empty or started anew.
// We rely on current being non-empty to check transitions within a word chunk.

if cfg.SplitCamel {
isPrevLower := prev.IsLower()
isPrevUpper := prev.IsUpper()
isCurrUpper := s.IsUpper()

if cfg.NumberMode == NumberModeTreatAsLowercase {
if prev.IsDigit() {
isPrevLower = true
}
}

// lower -> Upper
if prev.IsLower() && s.IsUpper() {
if isPrevLower && isCurrUpper {
isSplit = true
}

// Upper -> Upper -> lower (PDFLoader split at L)
if i+1 < len(subs) {
next := subs[i+1]
if prev.IsUpper() && s.IsUpper() && next.IsLower() {
isNextLower := next.IsLower()
if cfg.NumberMode == NumberModeTreatAsLowercase && next.IsDigit() {
isNextLower = true
}
if isPrevUpper && isCurrUpper && isNextLower {
isSplit = true
}
}

// MergeRecursive specific rule: digit -> Upper triggers a split, similar to lower -> Upper
if cfg.NumberMode == NumberModeMergeRecursive {
if prev.IsDigit() && isCurrUpper {
isSplit = true
}
}
}

if cfg.SplitNumber {
if cfg.NumberMode == NumberModeSplitAlways {
// Letter -> Digit -> Split.
// Digit -> Letter -> Split.
if prev.IsLetter() && s.IsDigit() {
Expand Down
51 changes: 51 additions & 0 deletions parts_num_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package strings2

import (
"reflect"
"testing"
)

func TestNumberMode(t *testing.T) {
tests := []struct {
name string
input string
mode NumberMode
expected []string
}{
// None
{"None_User123ID", "User123ID", NumberModeNone, []string{"User123ID"}},
{"None_UPPER123", "UPPER123", NumberModeNone, []string{"UPPER123"}},
{"None_123test", "123test", NumberModeNone, []string{"123test"}},

// SplitAlways
{"SplitAlways_User123ID", "User123ID", NumberModeSplitAlways, []string{"User", "123", "ID"}},
{"SplitAlways_UPPER123", "UPPER123", NumberModeSplitAlways, []string{"UPPER", "123"}},
{"SplitAlways_123test", "123test", NumberModeSplitAlways, []string{"123", "test"}},

// MergeRecursive
{"MergeRecursive_User123ID", "User123ID", NumberModeMergeRecursive, []string{"User123", "ID"}},
{"MergeRecursive_UPPER123", "UPPER123", NumberModeMergeRecursive, []string{"UPPER123"}},
{"MergeRecursive_123test", "123test", NumberModeMergeRecursive, []string{"123test"}},

// TreatAsLowercase
{"TreatAsLowercase_User123ID", "User123ID", NumberModeTreatAsLowercase, []string{"User123", "ID"}},
{"TreatAsLowercase_UPPER123", "UPPER123", NumberModeTreatAsLowercase, []string{"UPPE", "R123"}},
{"TreatAsLowercase_123test", "123test", NumberModeTreatAsLowercase, []string{"123test"}},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
words, err := Parse(tt.input, WithNumberMode(tt.mode))
if err != nil {
t.Fatalf("Parse failed: %v", err)
}
var got []string
for _, w := range words {
got = append(got, w.String())
}
if !reflect.DeepEqual(got, tt.expected) {
t.Errorf("Parse(%q) with mode %v = %v; want %v", tt.input, tt.mode, got, tt.expected)
}
})
}
}
4 changes: 2 additions & 2 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,8 @@ func WordsToFormattedCase(words []Word, opts ...any) (string, error) {
func PartsToFormattedCase(parts []Part, opts ...any) (string, error) {
// Extract ParserConfig from opts to use for classification
p := &ParserConfig{
SmartAcronyms: true,
NumberSplitting: false,
SmartAcronyms: true,
NumberMode: NumberModeNone,
}
for _, opt := range opts {
if o, ok := opt.(ParserOption); ok {
Expand Down
Loading