summaryrefslogtreecommitdiff
path: root/sources/hash_substring.py
blob: 260084a41ead1926ac9427374272765a6dcd5f03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# python3
import random


def read_input():
    return (input().rstrip(), input().rstrip())


def print_occurrences(output):
    print(' '.join(map(str, output)))


def get_occurrences(pattern, text):
    return [
        i
        for i in range(len(text) - len(pattern) + 1)
        if text[i:i + len(pattern)] == pattern
    ]


def poly_hash(S, p, x):
    ans = 0
    for c in reversed(S):
        ans = (ans * x + ord(c)) % p

    return ans


def pre_compute_hashes(T, pattenLength, p, x):
    H = [0] * (len(T) - pattenLength + 1)
    S = T[len(T) - pattenLength:]
    H[len(T) - pattenLength] = poly_hash(S, p, x)
    y = 1
    for _ in range(pattenLength):
        y = (y * x) % p
    for i in reversed(range(len(T) - pattenLength)):
        H[i] = (x * H[i + 1] + ord(T[i]) - y * ord(T[i + pattenLength])) % p
    return H


def rabin_karp(P, T):
    p = 1000000007
    x = random.randint(1, 1000000006)
    positions = []
    pHash = poly_hash(P, p, x)
    H = pre_compute_hashes(T, len(P), p, x)
    for i in range(len(T) - len(P) + 1):
        if pHash != H[i]:
            continue
        if T[i:i + len(P)] == P:
            positions.append(i)
    return positions


if __name__ == '__main__':
    print_occurrences(rabin_karp(*read_input()))