Skip to content

Commit 4811c14

Browse files
Fix Boyer-Moore bug bad character shift logic
1 parent e3b01ec commit 4811c14

1 file changed

Lines changed: 19 additions & 6 deletions

File tree

strings/boyer_moore_search.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@
1313
1414
If there is no mismatch then the pattern matches with text block.
1515
16-
Time Complexity : O(n/m)
16+
Time Complexity : O(n/m) average case with bad character heuristic
1717
n=length of main string
1818
m=length of pattern string
19+
20+
Note: The bad character shift requires a while loop so positions are
21+
actually skipped. A for loop ignores loop-variable reassignment.
1922
"""
2023

2124

@@ -78,23 +81,33 @@ def mismatch_in_text(self, current_pos: int) -> int:
7881

7982
def bad_character_heuristic(self) -> list[int]:
8083
"""
81-
Finds the positions of the pattern location.
84+
Finds the positions of the pattern in text using the bad character
85+
heuristic. A while loop is used so the shift actually skips
86+
positions, achieving O(n/m) average performance instead of the
87+
O(nm) brute-force that a for loop would produce.
8288
8389
>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
8490
>>> bms.bad_character_heuristic()
8591
[0, 3]
92+
>>> bms2 = BoyerMooreSearch(text="AAAAAA", pattern="AA")
93+
>>> bms2.bad_character_heuristic()
94+
[0, 1, 2, 3, 4]
95+
>>> bms3 = BoyerMooreSearch(text="ABCDEF", pattern="XY")
96+
>>> bms3.bad_character_heuristic()
97+
[]
8698
"""
8799

88100
positions = []
89-
for i in range(self.textLen - self.patLen + 1):
101+
i = 0
102+
while i <= self.textLen - self.patLen:
90103
mismatch_index = self.mismatch_in_text(i)
91104
if mismatch_index == -1:
92105
positions.append(i)
106+
i += 1
93107
else:
94108
match_index = self.match_in_pattern(self.text[mismatch_index])
95-
i = (
96-
mismatch_index - match_index
97-
) # shifting index lgtm [py/multiple-definition]
109+
# Use max to prevent shifting backwards
110+
i = max(i + 1, mismatch_index - match_index)
98111
return positions
99112

100113

0 commit comments

Comments
 (0)