From 151a820c72161aacde623b444ab1f137689a36f5 Mon Sep 17 00:00:00 2001 From: Haidong Ji Date: Sun, 17 Mar 2019 11:53:40 -0500 Subject: Rabin-Karp string search done! Not too bad to convert Java to Python. Converting the for loop and playing indexing is interesting, good exercise. --- sources/hash_substring.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 sources/hash_substring.py (limited to 'sources') diff --git a/sources/hash_substring.py b/sources/hash_substring.py new file mode 100644 index 0000000..260084a --- /dev/null +++ b/sources/hash_substring.py @@ -0,0 +1,56 @@ +# python3 +import random + + +def read_input(): + return (input().rstrip(), input().rstrip()) + + +def print_occurrences(output): + print(' '.join(map(str, output))) + + +def get_occurrences(pattern, text): + return [ + i + for i in range(len(text) - len(pattern) + 1) + if text[i:i + len(pattern)] == pattern + ] + + +def poly_hash(S, p, x): + ans = 0 + for c in reversed(S): + ans = (ans * x + ord(c)) % p + + return ans + + +def pre_compute_hashes(T, pattenLength, p, x): + H = [0] * (len(T) - pattenLength + 1) + S = T[len(T) - pattenLength:] + H[len(T) - pattenLength] = poly_hash(S, p, x) + y = 1 + for _ in range(pattenLength): + y = (y * x) % p + for i in reversed(range(len(T) - pattenLength)): + H[i] = (x * H[i + 1] + ord(T[i]) - y * ord(T[i + pattenLength])) % p + return H + + +def rabin_karp(P, T): + p = 1000000007 + x = random.randint(1, 1000000006) + positions = [] + pHash = poly_hash(P, p, x) + H = pre_compute_hashes(T, len(P), p, x) + for i in range(len(T) - len(P) + 1): + if pHash != H[i]: + continue + if T[i:i + len(P)] == P: + positions.append(i) + return positions + + +if __name__ == '__main__': + print_occurrences(rabin_karp(*read_input())) -- cgit v1.2.3