From a7f23160da19ea62e4da660b151cb8f15af8fe8f Mon Sep 17 00:00:00 2001 From: Haidong Ji Date: Sun, 17 Mar 2019 19:42:59 -0500 Subject: Rabin-Karp string search done! Good practice of passing by reference again. First time using string compare method in C++. Also I guess I'm luck when I started the Java code using the poly hash function written earlier, therefore I didn't come across the mod negative issue that I read about. Fun stuff :)--- Sources/DataStructure.cpp | 167 +++++++++++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 68 deletions(-) diff --git a/Sources/DataStructure.cpp b/Sources/DataStructure.cpp index 064ca45..5cfb53c 100644 --- a/Sources/DataStructure.cpp +++ b/Sources/DataStructure.cpp @@ -1,86 +1,117 @@ #include #include #include -#include +//#include using std::string; -using std::vector; -using std::cin; +typedef unsigned long long ull; -struct Query { - string type, s; - size_t ind; +struct Data { + string pattern, text; }; -class QueryProcessor { - int bucket_count; - // store all strings in one vector - vector> elems; - size_t hash_func(const string& s) const { - static const size_t multiplier = 263; - static const size_t prime = 1000000007; - unsigned long long hash = 0; - for (int i = static_cast(s.size()) - 1; i >= 0; --i) - hash = (hash * multiplier + s[i]) % prime; - return hash % bucket_count; - } +Data read_input() { + Data data; + std::cin >> data.pattern >> data.text; + return data; +} -public: - explicit QueryProcessor(int bucket_count) : - bucket_count(bucket_count) { - } +void print_occurrences(const std::vector& output) { + for (size_t i = 0; i < output.size(); ++i) + std::cout << output[i] << " "; + std::cout << "\n"; +} - Query readQuery() const { - Query query; - cin >> query.type; - if (query.type != "check") - cin >> query.s; - else - cin >> query.ind; - return query; - } +std::vector get_occurrences(const Data& input) { + const string& s = input.pattern, t = input.text; + std::vector ans; + for (size_t i = 0; i + s.size() <= t.size(); ++i) + if (t.substr(i, s.size()) == s) + ans.push_back(i); + return ans; +} - void writeSearchResult(bool was_found) const { - std::cout << (was_found ? "yes\n" : "no\n"); - } +ull poly_hash(string& S, int p, int x) { + ull hash = 0; - void processQuery(const Query& query) { - if (query.type == "check") { - // use reverse order, because we append strings to the end - for (int i = static_cast(elems[query.ind].size()) - 1; i >= 0; - --i) - std::cout << elems[query.ind][i] << " "; - std::cout << "\n"; - } else { - int hashIndex = hash_func(query.s); - vector::iterator it = std::find(elems[hashIndex].begin(), elems[hashIndex].end(), - query.s); - if (query.type == "find") - writeSearchResult(it != elems[hashIndex].end()); - else if (query.type == "add") { - if (it == elems[hashIndex].end()) - elems[hashIndex].push_back(query.s); - } else if (query.type == "del") { - if (it != elems[hashIndex].end()) - elems[hashIndex].erase(it); - } - } + for (int i = S.size() - 1; i >= 0; --i) + hash = (hash * x + S[i]) % p; + return hash; +} + +std::vector pre_compute_hashes(string& T, int pattenLength, int p, int x) { + std::vector H(T.size() - pattenLength + 1); + string S = T.substr(T.size() - pattenLength); + H[T.size() - pattenLength] = poly_hash(S, p, x); + ull y = 1; + for (int i = 0; i < pattenLength; i++) { + y = (y * x) % p; } + for (int i = T.size() - pattenLength - 1; i >= 0; --i) { + H[i] = (x * H[i + 1] + T[i] - y * T[i + pattenLength]) % p; + } + return H; +} - void processQueries() { - int n; - cin >> n; - elems.resize(n); - for (int i = 0; i < n; ++i) - processQuery(readQuery()); +std::vector rabin_karp(const Data& input) { + string T = input.text; + string P = input.pattern; + int p = 1000000007; + int x = 1 + (rand() % static_cast(1000000006)); + std::vector positions; + ull pHash = poly_hash(P, p, x); + std::vector H = pre_compute_hashes(T, P.size(), p, x); + for (size_t i = 0; i < T.size() - P.size() + 1; i++) { + if (pHash != H[i]) { + continue; + } + if (T.compare(i, P.size(), P) == 0) + positions.push_back(i); } -}; + return positions; +} +//TEST(RabinKarp, t) { +// Data data; +// data.pattern = "a"; +// data.text = "ab"; +// std::vector results = rabin_karp(data); +// +// ASSERT_EQ(1, results.size()); +// ASSERT_EQ(0, results[0]); +// +// data.pattern = "aba"; +// data.text = "abacaba"; +// results = rabin_karp(data); +// +// ASSERT_EQ(2, results.size()); +// ASSERT_EQ(0, results[0]); +// ASSERT_EQ(4, results[1]); +//} +// +//TEST(RabinKarp, t1) { +// Data data; +// data.pattern = "Test"; +// data.text = "testTesttesT"; +// std::vector results = rabin_karp(data); +// +// ASSERT_EQ(1, results.size()); +// ASSERT_EQ(4, results[0]); +//} +// +//TEST(RabinKarp, t2) { +// Data data; +// data.pattern = "aaaaa"; +// data.text = "baaaaaaa"; +// std::vector results = rabin_karp(data); +// +// ASSERT_EQ(3, results.size()); +// ASSERT_EQ(1, results[0]); +// ASSERT_EQ(2, results[1]); +// ASSERT_EQ(3, results[2]); +//} int main() { - std::ios_base::sync_with_stdio(false); - int bucket_count; - cin >> bucket_count; - QueryProcessor proc(bucket_count); - proc.processQueries(); - return 0; + std::ios_base::sync_with_stdio(false); + print_occurrences(rabin_karp(read_input())); + return 0; } -- cgit v1.2.3