diff options
author | Haidong Ji | 2019-03-17 11:53:40 -0500 |
---|---|---|
committer | Haidong Ji | 2019-03-17 11:53:40 -0500 |
commit | 151a820c72161aacde623b444ab1f137689a36f5 (patch) | |
tree | c728f6f809033ccfbb8e88fc4ceee39ddb79b0f6 /sources | |
parent | 20bda975f02bdf906e924f176c27fd95df8c8424 (diff) |
Rabin-Karp string search done!
Not too bad to convert Java to Python. Converting the for loop
and playing indexing is interesting, good exercise.
Diffstat (limited to 'sources')
-rw-r--r-- | sources/hash_substring.py | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/sources/hash_substring.py b/sources/hash_substring.py new file mode 100644 index 0000000..260084a --- /dev/null +++ b/sources/hash_substring.py @@ -0,0 +1,56 @@ +# python3 +import random + + +def read_input(): + return (input().rstrip(), input().rstrip()) + + +def print_occurrences(output): + print(' '.join(map(str, output))) + + +def get_occurrences(pattern, text): + return [ + i + for i in range(len(text) - len(pattern) + 1) + if text[i:i + len(pattern)] == pattern + ] + + +def poly_hash(S, p, x): + ans = 0 + for c in reversed(S): + ans = (ans * x + ord(c)) % p + + return ans + + +def pre_compute_hashes(T, pattenLength, p, x): + H = [0] * (len(T) - pattenLength + 1) + S = T[len(T) - pattenLength:] + H[len(T) - pattenLength] = poly_hash(S, p, x) + y = 1 + for _ in range(pattenLength): + y = (y * x) % p + for i in reversed(range(len(T) - pattenLength)): + H[i] = (x * H[i + 1] + ord(T[i]) - y * ord(T[i + pattenLength])) % p + return H + + +def rabin_karp(P, T): + p = 1000000007 + x = random.randint(1, 1000000006) + positions = [] + pHash = poly_hash(P, p, x) + H = pre_compute_hashes(T, len(P), p, x) + for i in range(len(T) - len(P) + 1): + if pHash != H[i]: + continue + if T[i:i + len(P)] == P: + positions.append(i) + return positions + + +if __name__ == '__main__': + print_occurrences(rabin_karp(*read_input())) |