|
13 | 13 |
|
14 | 14 | If there is no mismatch then the pattern matches with text block. |
15 | 15 |
|
16 | | -Time Complexity : O(n/m) |
| 16 | +Time Complexity : O(n/m) average case with bad character heuristic |
17 | 17 | n=length of main string |
18 | 18 | m=length of pattern string |
| 19 | +
|
| 20 | +Note: The bad character shift requires a while loop so positions are |
| 21 | + actually skipped. A for loop ignores loop-variable reassignment. |
19 | 22 | """ |
20 | 23 |
|
21 | 24 |
|
@@ -78,23 +81,33 @@ def mismatch_in_text(self, current_pos: int) -> int: |
78 | 81 |
|
79 | 82 | def bad_character_heuristic(self) -> list[int]: |
80 | 83 | """ |
81 | | - Finds the positions of the pattern location. |
| 84 | + Finds the positions of the pattern in text using the bad character |
| 85 | + heuristic. A while loop is used so the shift actually skips |
| 86 | + positions, achieving O(n/m) average performance instead of the |
| 87 | + O(nm) brute-force that a for loop would produce. |
82 | 88 |
|
83 | 89 | >>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB") |
84 | 90 | >>> bms.bad_character_heuristic() |
85 | 91 | [0, 3] |
| 92 | + >>> bms2 = BoyerMooreSearch(text="AAAAAA", pattern="AA") |
| 93 | + >>> bms2.bad_character_heuristic() |
| 94 | + [0, 1, 2, 3, 4] |
| 95 | + >>> bms3 = BoyerMooreSearch(text="ABCDEF", pattern="XY") |
| 96 | + >>> bms3.bad_character_heuristic() |
| 97 | + [] |
86 | 98 | """ |
87 | 99 |
|
88 | 100 | positions = [] |
89 | | - for i in range(self.textLen - self.patLen + 1): |
| 101 | + i = 0 |
| 102 | + while i <= self.textLen - self.patLen: |
90 | 103 | mismatch_index = self.mismatch_in_text(i) |
91 | 104 | if mismatch_index == -1: |
92 | 105 | positions.append(i) |
| 106 | + i += 1 |
93 | 107 | else: |
94 | 108 | match_index = self.match_in_pattern(self.text[mismatch_index]) |
95 | | - i = ( |
96 | | - mismatch_index - match_index |
97 | | - ) # shifting index lgtm [py/multiple-definition] |
| 109 | + # Use max to prevent shifting backwards |
| 110 | + i = max(i + 1, mismatch_index - match_index) |
98 | 111 | return positions |
99 | 112 |
|
100 | 113 |
|
|
0 commit comments