|
8 | 8 | Uses a bad-character shift of the rightmost character of the window to |
9 | 9 | compute shifts. |
10 | 10 |
|
| 11 | + The trick to this algorithm is `bmbc`, a lookup table with a default |
| 12 | + value equal to the length of the pattern to be searched, so that |
| 13 | + the algorithm can skip `len(pattern)` indices through the string |
| 14 | + for efficiency's sake. For example, if we're searching through the |
| 15 | + string "cotton milled paper" for the pattern "grumble", we look at |
| 16 | + the last letter "r" (BMH goes backwards through a string) and notices |
| 17 | + that it is not equal to "e". Thus, we can afford to jump our search |
| 18 | + index back a whole seven characters. |
| 19 | +
|
| 20 | + However, not all the entries in `bmbc` are equal to `len(pattern)`. |
| 21 | + If we searched the string "adventure time" for "grumble", we'd find |
| 22 | + the "e" to match but mismatch the "m" and "l" in the string and |
| 23 | + pattern, respectively. In this case, we can only jump back six |
| 24 | + characters safely, which is why `bmbc` contains values that are not |
| 25 | + simply `len(pattern)`. |
| 26 | +
|
11 | 27 | Pre: a string > substring. |
12 | 28 |
|
13 | | - Post: returns a list of indexes where the substring was found. |
| 29 | + Post: returns a list of indices where the substring was found. |
14 | 30 |
|
15 | | - Time: Complexity: O( m + n), where m is the substring to be found. |
| 31 | + Time: Complexity: O(m + n), where m is the substring to be found. |
16 | 32 |
|
17 | 33 | Space: Complexity: O(m), where m is the substring to be found. |
18 | 34 |
|
|
25 | 41 |
|
26 | 42 |
|
27 | 43 | def search(text, pattern): |
28 | | - m, n = len(pattern), len(text) |
| 44 | + pattern_length = len(pattern) |
| 45 | + text_length = len(text) |
29 | 46 | offsets = [] |
30 | | - if m > n: |
| 47 | + if pattern_length > text_length: |
31 | 48 | return offsets |
32 | | - bmbc = [m] * 256 |
33 | | - for k, p in enumerate(pattern[:-1]): |
34 | | - bmbc[ord(p)] = m - k - 1 |
| 49 | + bmbc = [pattern_length] * 256 |
| 50 | + for index, char in enumerate(pattern[:-1]): |
| 51 | + bmbc[ord(char)] = pattern_length - index - 1 |
35 | 52 | bmbc = tuple(bmbc) |
36 | | - k = m - 1 |
37 | | - while k < n: |
38 | | - j = m - 1 |
39 | | - i = k |
40 | | - while j >= 0 and text[i] == pattern[j]: |
41 | | - j -= 1 |
42 | | - i -= 1 |
43 | | - if j == -1: |
44 | | - offsets.append(i + 1) |
45 | | - k += bmbc[ord(text[k])] |
| 53 | + search_index = pattern_length - 1 |
| 54 | + while search_index < text_length: |
| 55 | + pattern_index = pattern_length - 1 |
| 56 | + text_index = search_index |
| 57 | + while text_index >= 0 and \ |
| 58 | + text[text_index] == pattern[pattern_index]: |
| 59 | + pattern_index -= 1 |
| 60 | + text_index -= 1 |
| 61 | + if pattern_index == -1: |
| 62 | + offsets.append(text_index + 1) |
| 63 | + search_index += bmbc[ord(text[search_index])] |
46 | 64 |
|
47 | 65 | return offsets |
0 commit comments