summaryrefslogtreecommitdiff
path: root/Sources/DataStructure.cpp
blob: 5cfb53c4ec6996283be32a75251aac5fe9ba6eae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <iostream>
#include <string>
#include <vector>
//#include <gtest/gtest.h>

using std::string;
typedef unsigned long long ull;

struct Data {
	string pattern, text;
};

Data read_input() {
	Data data;
	std::cin >> data.pattern >> data.text;
	return data;
}

void print_occurrences(const std::vector<int>& output) {
	for (size_t i = 0; i < output.size(); ++i)
		std::cout << output[i] << " ";
	std::cout << "\n";
}

std::vector<int> get_occurrences(const Data& input) {
	const string& s = input.pattern, t = input.text;
	std::vector<int> ans;
	for (size_t i = 0; i + s.size() <= t.size(); ++i)
		if (t.substr(i, s.size()) == s)
			ans.push_back(i);
	return ans;
}

ull poly_hash(string& S, int p, int x) {
	ull hash = 0;

	for (int i = S.size() - 1; i >= 0; --i)
		hash = (hash * x + S[i]) % p;
	return hash;
}

std::vector<ull> pre_compute_hashes(string& T, int pattenLength, int p, int x) {
	std::vector<ull> H(T.size() - pattenLength + 1);
	string S = T.substr(T.size() - pattenLength);
	H[T.size() - pattenLength] = poly_hash(S, p, x);
	ull y = 1;
	for (int i = 0; i < pattenLength; i++) {
		y = (y * x) % p;
	}
	for (int i = T.size() - pattenLength - 1; i >= 0; --i) {
		H[i] = (x * H[i + 1] + T[i] - y * T[i + pattenLength]) % p;
	}
	return H;
}

std::vector<int> rabin_karp(const Data& input) {
	string T = input.text;
	string P = input.pattern;
	int p = 1000000007;
	int x = 1 + (rand() % static_cast<int>(1000000006));
	std::vector<int> positions;
	ull pHash = poly_hash(P, p, x);
	std::vector<ull> H = pre_compute_hashes(T, P.size(), p, x);
	for (size_t i = 0; i < T.size() - P.size() + 1; i++) {
		if (pHash != H[i]) {
			continue;
		}
		if (T.compare(i, P.size(), P) == 0)
			positions.push_back(i);
	}
	return positions;
}
//TEST(RabinKarp, t) {
//	Data data;
//	data.pattern = "a";
//	data.text = "ab";
//	std::vector<int> results = rabin_karp(data);
//
//	ASSERT_EQ(1, results.size());
//	ASSERT_EQ(0, results[0]);
//
//	data.pattern = "aba";
//	data.text = "abacaba";
//	results = rabin_karp(data);
//
//	ASSERT_EQ(2, results.size());
//	ASSERT_EQ(0, results[0]);
//	ASSERT_EQ(4, results[1]);
//}
//
//TEST(RabinKarp, t1) {
//	Data data;
//	data.pattern = "Test";
//	data.text = "testTesttesT";
//	std::vector<int> results = rabin_karp(data);
//
//	ASSERT_EQ(1, results.size());
//	ASSERT_EQ(4, results[0]);
//}
//
//TEST(RabinKarp, t2) {
//	Data data;
//	data.pattern = "aaaaa";
//	data.text = "baaaaaaa";
//	std::vector<int> results = rabin_karp(data);
//
//	ASSERT_EQ(3, results.size());
//	ASSERT_EQ(1, results[0]);
//	ASSERT_EQ(2, results[1]);
//	ASSERT_EQ(3, results[2]);
//}

int main() {
    std::ios_base::sync_with_stdio(false);
    print_occurrences(rabin_karp(read_input()));
    return 0;
}