summaryrefslogtreecommitdiff
path: root/sources
diff options
context:
space:
mode:
authorHaidong Ji2019-03-17 11:53:40 -0500
committerHaidong Ji2019-03-17 11:53:40 -0500
commit151a820c72161aacde623b444ab1f137689a36f5 (patch)
treec728f6f809033ccfbb8e88fc4ceee39ddb79b0f6 /sources
parent20bda975f02bdf906e924f176c27fd95df8c8424 (diff)
Rabin-Karp string search done!
Not too bad to convert Java to Python. Converting the for loop and playing indexing is interesting, good exercise.
Diffstat (limited to 'sources')
-rw-r--r--sources/hash_substring.py56
1 files changed, 56 insertions, 0 deletions
diff --git a/sources/hash_substring.py b/sources/hash_substring.py
new file mode 100644
index 0000000..260084a
--- /dev/null
+++ b/sources/hash_substring.py
@@ -0,0 +1,56 @@
+# python3
+import random
+
+
+def read_input():
+ return (input().rstrip(), input().rstrip())
+
+
+def print_occurrences(output):
+ print(' '.join(map(str, output)))
+
+
+def get_occurrences(pattern, text):
+ return [
+ i
+ for i in range(len(text) - len(pattern) + 1)
+ if text[i:i + len(pattern)] == pattern
+ ]
+
+
+def poly_hash(S, p, x):
+ ans = 0
+ for c in reversed(S):
+ ans = (ans * x + ord(c)) % p
+
+ return ans
+
+
+def pre_compute_hashes(T, pattenLength, p, x):
+ H = [0] * (len(T) - pattenLength + 1)
+ S = T[len(T) - pattenLength:]
+ H[len(T) - pattenLength] = poly_hash(S, p, x)
+ y = 1
+ for _ in range(pattenLength):
+ y = (y * x) % p
+ for i in reversed(range(len(T) - pattenLength)):
+ H[i] = (x * H[i + 1] + ord(T[i]) - y * ord(T[i + pattenLength])) % p
+ return H
+
+
+def rabin_karp(P, T):
+ p = 1000000007
+ x = random.randint(1, 1000000006)
+ positions = []
+ pHash = poly_hash(P, p, x)
+ H = pre_compute_hashes(T, len(P), p, x)
+ for i in range(len(T) - len(P) + 1):
+ if pHash != H[i]:
+ continue
+ if T[i:i + len(P)] == P:
+ positions.append(i)
+ return positions
+
+
+if __name__ == '__main__':
+ print_occurrences(rabin_karp(*read_input()))