summaryrefslogtreecommitdiff
path: root/Sources
diff options
context:
space:
mode:
authorHaidong Ji2019-03-17 19:42:59 -0500
committerHaidong Ji2019-03-17 19:42:59 -0500
commita7f23160da19ea62e4da660b151cb8f15af8fe8f (patch)
tree9dbd73a6ff22e883b872ed17b747e5d37d322fc3 /Sources
parent9735d05bae6c6cdb86789bc74def31e402c469a3 (diff)
Rabin-Karp string search done!
Good practice of passing by reference again. First time using string compare method in C++. Also I guess I'm luck when I started the Java code using the poly hash function written earlier, therefore I didn't come across the mod negative issue that I read about. Fun stuff :)
Diffstat (limited to 'Sources')
-rw-r--r--Sources/DataStructure.cpp167
1 files changed, 99 insertions, 68 deletions
diff --git a/Sources/DataStructure.cpp b/Sources/DataStructure.cpp
index 064ca45..5cfb53c 100644
--- a/Sources/DataStructure.cpp
+++ b/Sources/DataStructure.cpp
@@ -1,86 +1,117 @@
#include <iostream>
#include <string>
#include <vector>
-#include <algorithm>
+//#include <gtest/gtest.h>
using std::string;
-using std::vector;
-using std::cin;
+typedef unsigned long long ull;
-struct Query {
- string type, s;
- size_t ind;
+struct Data {
+ string pattern, text;
};
-class QueryProcessor {
- int bucket_count;
- // store all strings in one vector
- vector<vector<string>> elems;
- size_t hash_func(const string& s) const {
- static const size_t multiplier = 263;
- static const size_t prime = 1000000007;
- unsigned long long hash = 0;
- for (int i = static_cast<int>(s.size()) - 1; i >= 0; --i)
- hash = (hash * multiplier + s[i]) % prime;
- return hash % bucket_count;
- }
+Data read_input() {
+ Data data;
+ std::cin >> data.pattern >> data.text;
+ return data;
+}
-public:
- explicit QueryProcessor(int bucket_count) :
- bucket_count(bucket_count) {
- }
+void print_occurrences(const std::vector<int>& output) {
+ for (size_t i = 0; i < output.size(); ++i)
+ std::cout << output[i] << " ";
+ std::cout << "\n";
+}
- Query readQuery() const {
- Query query;
- cin >> query.type;
- if (query.type != "check")
- cin >> query.s;
- else
- cin >> query.ind;
- return query;
- }
+std::vector<int> get_occurrences(const Data& input) {
+ const string& s = input.pattern, t = input.text;
+ std::vector<int> ans;
+ for (size_t i = 0; i + s.size() <= t.size(); ++i)
+ if (t.substr(i, s.size()) == s)
+ ans.push_back(i);
+ return ans;
+}
- void writeSearchResult(bool was_found) const {
- std::cout << (was_found ? "yes\n" : "no\n");
- }
+ull poly_hash(string& S, int p, int x) {
+ ull hash = 0;
- void processQuery(const Query& query) {
- if (query.type == "check") {
- // use reverse order, because we append strings to the end
- for (int i = static_cast<int>(elems[query.ind].size()) - 1; i >= 0;
- --i)
- std::cout << elems[query.ind][i] << " ";
- std::cout << "\n";
- } else {
- int hashIndex = hash_func(query.s);
- vector<string>::iterator it = std::find(elems[hashIndex].begin(), elems[hashIndex].end(),
- query.s);
- if (query.type == "find")
- writeSearchResult(it != elems[hashIndex].end());
- else if (query.type == "add") {
- if (it == elems[hashIndex].end())
- elems[hashIndex].push_back(query.s);
- } else if (query.type == "del") {
- if (it != elems[hashIndex].end())
- elems[hashIndex].erase(it);
- }
- }
+ for (int i = S.size() - 1; i >= 0; --i)
+ hash = (hash * x + S[i]) % p;
+ return hash;
+}
+
+std::vector<ull> pre_compute_hashes(string& T, int pattenLength, int p, int x) {
+ std::vector<ull> H(T.size() - pattenLength + 1);
+ string S = T.substr(T.size() - pattenLength);
+ H[T.size() - pattenLength] = poly_hash(S, p, x);
+ ull y = 1;
+ for (int i = 0; i < pattenLength; i++) {
+ y = (y * x) % p;
}
+ for (int i = T.size() - pattenLength - 1; i >= 0; --i) {
+ H[i] = (x * H[i + 1] + T[i] - y * T[i + pattenLength]) % p;
+ }
+ return H;
+}
- void processQueries() {
- int n;
- cin >> n;
- elems.resize(n);
- for (int i = 0; i < n; ++i)
- processQuery(readQuery());
+std::vector<int> rabin_karp(const Data& input) {
+ string T = input.text;
+ string P = input.pattern;
+ int p = 1000000007;
+ int x = 1 + (rand() % static_cast<int>(1000000006));
+ std::vector<int> positions;
+ ull pHash = poly_hash(P, p, x);
+ std::vector<ull> H = pre_compute_hashes(T, P.size(), p, x);
+ for (size_t i = 0; i < T.size() - P.size() + 1; i++) {
+ if (pHash != H[i]) {
+ continue;
+ }
+ if (T.compare(i, P.size(), P) == 0)
+ positions.push_back(i);
}
-};
+ return positions;
+}
+//TEST(RabinKarp, t) {
+// Data data;
+// data.pattern = "a";
+// data.text = "ab";
+// std::vector<int> results = rabin_karp(data);
+//
+// ASSERT_EQ(1, results.size());
+// ASSERT_EQ(0, results[0]);
+//
+// data.pattern = "aba";
+// data.text = "abacaba";
+// results = rabin_karp(data);
+//
+// ASSERT_EQ(2, results.size());
+// ASSERT_EQ(0, results[0]);
+// ASSERT_EQ(4, results[1]);
+//}
+//
+//TEST(RabinKarp, t1) {
+// Data data;
+// data.pattern = "Test";
+// data.text = "testTesttesT";
+// std::vector<int> results = rabin_karp(data);
+//
+// ASSERT_EQ(1, results.size());
+// ASSERT_EQ(4, results[0]);
+//}
+//
+//TEST(RabinKarp, t2) {
+// Data data;
+// data.pattern = "aaaaa";
+// data.text = "baaaaaaa";
+// std::vector<int> results = rabin_karp(data);
+//
+// ASSERT_EQ(3, results.size());
+// ASSERT_EQ(1, results[0]);
+// ASSERT_EQ(2, results[1]);
+// ASSERT_EQ(3, results[2]);
+//}
int main() {
- std::ios_base::sync_with_stdio(false);
- int bucket_count;
- cin >> bucket_count;
- QueryProcessor proc(bucket_count);
- proc.processQueries();
- return 0;
+ std::ios_base::sync_with_stdio(false);
+ print_occurrences(rabin_karp(read_input()));
+ return 0;
}