forked from keon/algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrabin_karp.py
More file actions
85 lines (65 loc) · 2.45 KB
/
rabin_karp.py
File metadata and controls
85 lines (65 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Rabin-Karp String Search
A string searching algorithm that uses hashing to find a pattern in text.
Uses a rolling hash to efficiently compare the pattern hash with substrings.
Reference: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
Complexity:
Time: O(n + m) average, O(n * m) worst case
Space: O(1)
"""
from __future__ import annotations
class RollingHash:
"""A rolling hash implementation for the Rabin-Karp algorithm.
Args:
text: The text to compute the hash over.
window_size: The size of the hash window.
"""
def __init__(self, text: str, window_size: int) -> None:
self.text = text
self.hash = 0
self.window_size = window_size
for index in range(0, window_size):
self.hash += (ord(self.text[index]) - ord("a") + 1) * (
26 ** (window_size - index - 1)
)
self.window_start = 0
self.window_end = window_size
def move_window(self) -> None:
"""Slide the hash window one position to the right."""
if self.window_end <= len(self.text) - 1:
self.hash -= (ord(self.text[self.window_start]) - ord("a") + 1) * 26 ** (
self.window_size - 1
)
self.hash *= 26
self.hash += ord(self.text[self.window_end]) - ord("a") + 1
self.window_start += 1
self.window_end += 1
def window_text(self) -> str:
"""Return the current text within the hash window.
Returns:
The substring currently covered by the rolling hash window.
"""
return self.text[self.window_start : self.window_end]
def rabin_karp(word: str, text: str) -> int | None:
"""Find the first occurrence of word in text using the Rabin-Karp algorithm.
Args:
word: The pattern to search for.
text: The text to search in.
Returns:
The index of the first occurrence, or None if not found.
Examples:
>>> rabin_karp("abc", "zsnabckfkd")
3
"""
if word == "" or text == "":
return None
if len(word) > len(text):
return None
rolling_hash = RollingHash(text, len(word))
word_hash = RollingHash(word, len(word))
for _ in range(len(text) - len(word) + 1):
if (rolling_hash.hash == word_hash.hash
and rolling_hash.window_text() == word):
return rolling_hash.window_start
rolling_hash.move_window()
return None