diff options
-rw-r--r-- | sources/hash_substring.py | 56 | ||||
-rw-r--r-- | tests/hash_substringTest.py | 31 |
2 files changed, 87 insertions, 0 deletions
diff --git a/sources/hash_substring.py b/sources/hash_substring.py new file mode 100644 index 0000000..260084a --- /dev/null +++ b/sources/hash_substring.py @@ -0,0 +1,56 @@ +# python3 +import random + + +def read_input(): + return (input().rstrip(), input().rstrip()) + + +def print_occurrences(output): + print(' '.join(map(str, output))) + + +def get_occurrences(pattern, text): + return [ + i + for i in range(len(text) - len(pattern) + 1) + if text[i:i + len(pattern)] == pattern + ] + + +def poly_hash(S, p, x): + ans = 0 + for c in reversed(S): + ans = (ans * x + ord(c)) % p + + return ans + + +def pre_compute_hashes(T, pattenLength, p, x): + H = [0] * (len(T) - pattenLength + 1) + S = T[len(T) - pattenLength:] + H[len(T) - pattenLength] = poly_hash(S, p, x) + y = 1 + for _ in range(pattenLength): + y = (y * x) % p + for i in reversed(range(len(T) - pattenLength)): + H[i] = (x * H[i + 1] + ord(T[i]) - y * ord(T[i + pattenLength])) % p + return H + + +def rabin_karp(P, T): + p = 1000000007 + x = random.randint(1, 1000000006) + positions = [] + pHash = poly_hash(P, p, x) + H = pre_compute_hashes(T, len(P), p, x) + for i in range(len(T) - len(P) + 1): + if pHash != H[i]: + continue + if T[i:i + len(P)] == P: + positions.append(i) + return positions + + +if __name__ == '__main__': + print_occurrences(rabin_karp(*read_input())) diff --git a/tests/hash_substringTest.py b/tests/hash_substringTest.py new file mode 100644 index 0000000..4c8fd5c --- /dev/null +++ b/tests/hash_substringTest.py @@ -0,0 +1,31 @@ +import unittest + +from sources.hash_substring import get_occurrences, rabin_karp + + +class MyTestCase(unittest.TestCase): + + def test1(self): + result = get_occurrences('aba', 'abacaba') + result = rabin_karp('aba', 'abacaba') + self.assertEqual(len(result), 2) + self.assertEqual(result[0], 0) + self.assertEqual(result[1], 4) + + def test2(self): + result = get_occurrences('Test', 'testTesttesT') + result = rabin_karp('Test', 'testTesttesT') + self.assertEqual(len(result), 1) + self.assertEqual(result[0], 4) + + def test3(self): + result = get_occurrences('aaaaa', 'baaaaaaa') + result = rabin_karp('aaaaa', 'baaaaaaa') + self.assertEqual(len(result), 3) + self.assertEqual(result[0], 1) + self.assertEqual(result[1], 2) + self.assertEqual(result[2], 3) + + +if __name__ == '__main__': + unittest.main() |