summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaidong Ji2019-03-17 11:53:40 -0500
committerHaidong Ji2019-03-17 11:53:40 -0500
commit151a820c72161aacde623b444ab1f137689a36f5 (patch)
treec728f6f809033ccfbb8e88fc4ceee39ddb79b0f6
parent20bda975f02bdf906e924f176c27fd95df8c8424 (diff)
Rabin-Karp string search done!
Not too bad to convert Java to Python. Converting the for loop and playing indexing is interesting, good exercise.
-rw-r--r--sources/hash_substring.py56
-rw-r--r--tests/hash_substringTest.py31
2 files changed, 87 insertions, 0 deletions
diff --git a/sources/hash_substring.py b/sources/hash_substring.py
new file mode 100644
index 0000000..260084a
--- /dev/null
+++ b/sources/hash_substring.py
@@ -0,0 +1,56 @@
+# python3
+import random
+
+
+def read_input():
+ return (input().rstrip(), input().rstrip())
+
+
+def print_occurrences(output):
+ print(' '.join(map(str, output)))
+
+
+def get_occurrences(pattern, text):
+ return [
+ i
+ for i in range(len(text) - len(pattern) + 1)
+ if text[i:i + len(pattern)] == pattern
+ ]
+
+
+def poly_hash(S, p, x):
+ ans = 0
+ for c in reversed(S):
+ ans = (ans * x + ord(c)) % p
+
+ return ans
+
+
+def pre_compute_hashes(T, pattenLength, p, x):
+ H = [0] * (len(T) - pattenLength + 1)
+ S = T[len(T) - pattenLength:]
+ H[len(T) - pattenLength] = poly_hash(S, p, x)
+ y = 1
+ for _ in range(pattenLength):
+ y = (y * x) % p
+ for i in reversed(range(len(T) - pattenLength)):
+ H[i] = (x * H[i + 1] + ord(T[i]) - y * ord(T[i + pattenLength])) % p
+ return H
+
+
+def rabin_karp(P, T):
+ p = 1000000007
+ x = random.randint(1, 1000000006)
+ positions = []
+ pHash = poly_hash(P, p, x)
+ H = pre_compute_hashes(T, len(P), p, x)
+ for i in range(len(T) - len(P) + 1):
+ if pHash != H[i]:
+ continue
+ if T[i:i + len(P)] == P:
+ positions.append(i)
+ return positions
+
+
+if __name__ == '__main__':
+ print_occurrences(rabin_karp(*read_input()))
diff --git a/tests/hash_substringTest.py b/tests/hash_substringTest.py
new file mode 100644
index 0000000..4c8fd5c
--- /dev/null
+++ b/tests/hash_substringTest.py
@@ -0,0 +1,31 @@
+import unittest
+
+from sources.hash_substring import get_occurrences, rabin_karp
+
+
+class MyTestCase(unittest.TestCase):
+
+ def test1(self):
+ result = get_occurrences('aba', 'abacaba')
+ result = rabin_karp('aba', 'abacaba')
+ self.assertEqual(len(result), 2)
+ self.assertEqual(result[0], 0)
+ self.assertEqual(result[1], 4)
+
+ def test2(self):
+ result = get_occurrences('Test', 'testTesttesT')
+ result = rabin_karp('Test', 'testTesttesT')
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result[0], 4)
+
+ def test3(self):
+ result = get_occurrences('aaaaa', 'baaaaaaa')
+ result = rabin_karp('aaaaa', 'baaaaaaa')
+ self.assertEqual(len(result), 3)
+ self.assertEqual(result[0], 1)
+ self.assertEqual(result[1], 2)
+ self.assertEqual(result[2], 3)
+
+
+if __name__ == '__main__':
+ unittest.main()