diff options
| author | Haidong Ji | 2019-03-17 19:42:59 -0500 | 
|---|---|---|
| committer | Haidong Ji | 2019-03-17 19:42:59 -0500 | 
| commit | a7f23160da19ea62e4da660b151cb8f15af8fe8f (patch) | |
| tree | 9dbd73a6ff22e883b872ed17b747e5d37d322fc3 | |
| parent | 9735d05bae6c6cdb86789bc74def31e402c469a3 (diff) | |
Rabin-Karp string search done!
Good practice of passing by reference again. First time using string
compare method in C++. Also I guess I'm luck when I started the Java
code using the poly hash function written earlier, therefore I didn't
come across the mod negative issue that I read about. Fun stuff :)
| -rw-r--r-- | Sources/DataStructure.cpp | 167 | 
1 files changed, 99 insertions, 68 deletions
| diff --git a/Sources/DataStructure.cpp b/Sources/DataStructure.cpp index 064ca45..5cfb53c 100644 --- a/Sources/DataStructure.cpp +++ b/Sources/DataStructure.cpp @@ -1,86 +1,117 @@  #include <iostream>  #include <string>  #include <vector> -#include <algorithm> +//#include <gtest/gtest.h>  using std::string; -using std::vector; -using std::cin; +typedef unsigned long long ull; -struct Query { -	string type, s; -	size_t ind; +struct Data { +	string pattern, text;  }; -class QueryProcessor { -	int bucket_count; -	// store all strings in one vector -	vector<vector<string>> elems; -	size_t hash_func(const string& s) const { -		static const size_t multiplier = 263; -		static const size_t prime = 1000000007; -		unsigned long long hash = 0; -		for (int i = static_cast<int>(s.size()) - 1; i >= 0; --i) -			hash = (hash * multiplier + s[i]) % prime; -		return hash % bucket_count; -	} +Data read_input() { +	Data data; +	std::cin >> data.pattern >> data.text; +	return data; +} -public: -	explicit QueryProcessor(int bucket_count) : -			bucket_count(bucket_count) { -	} +void print_occurrences(const std::vector<int>& output) { +	for (size_t i = 0; i < output.size(); ++i) +		std::cout << output[i] << " "; +	std::cout << "\n"; +} -	Query readQuery() const { -		Query query; -		cin >> query.type; -		if (query.type != "check") -			cin >> query.s; -		else -			cin >> query.ind; -		return query; -	} +std::vector<int> get_occurrences(const Data& input) { +	const string& s = input.pattern, t = input.text; +	std::vector<int> ans; +	for (size_t i = 0; i + s.size() <= t.size(); ++i) +		if (t.substr(i, s.size()) == s) +			ans.push_back(i); +	return ans; +} -	void writeSearchResult(bool was_found) const { -		std::cout << (was_found ? "yes\n" : "no\n"); -	} +ull poly_hash(string& S, int p, int x) { +	ull hash = 0; -	void processQuery(const Query& query) { -		if (query.type == "check") { -			// use reverse order, because we append strings to the end -			for (int i = static_cast<int>(elems[query.ind].size()) - 1; i >= 0; -					--i) -				std::cout << elems[query.ind][i] << " "; -			std::cout << "\n"; -		} else { -			int hashIndex = hash_func(query.s); -			vector<string>::iterator it = std::find(elems[hashIndex].begin(), elems[hashIndex].end(), -					query.s); -			if (query.type == "find") -				writeSearchResult(it != elems[hashIndex].end()); -			else if (query.type == "add") { -				if (it == elems[hashIndex].end()) -					elems[hashIndex].push_back(query.s); -			} else if (query.type == "del") { -				if (it != elems[hashIndex].end()) -					elems[hashIndex].erase(it); -			} -		} +	for (int i = S.size() - 1; i >= 0; --i) +		hash = (hash * x + S[i]) % p; +	return hash; +} + +std::vector<ull> pre_compute_hashes(string& T, int pattenLength, int p, int x) { +	std::vector<ull> H(T.size() - pattenLength + 1); +	string S = T.substr(T.size() - pattenLength); +	H[T.size() - pattenLength] = poly_hash(S, p, x); +	ull y = 1; +	for (int i = 0; i < pattenLength; i++) { +		y = (y * x) % p;  	} +	for (int i = T.size() - pattenLength - 1; i >= 0; --i) { +		H[i] = (x * H[i + 1] + T[i] - y * T[i + pattenLength]) % p; +	} +	return H; +} -	void processQueries() { -		int n; -		cin >> n; -		elems.resize(n); -		for (int i = 0; i < n; ++i) -			processQuery(readQuery()); +std::vector<int> rabin_karp(const Data& input) { +	string T = input.text; +	string P = input.pattern; +	int p = 1000000007; +	int x = 1 + (rand() % static_cast<int>(1000000006)); +	std::vector<int> positions; +	ull pHash = poly_hash(P, p, x); +	std::vector<ull> H = pre_compute_hashes(T, P.size(), p, x); +	for (size_t i = 0; i < T.size() - P.size() + 1; i++) { +		if (pHash != H[i]) { +			continue; +		} +		if (T.compare(i, P.size(), P) == 0) +			positions.push_back(i);  	} -}; +	return positions; +} +//TEST(RabinKarp, t) { +//	Data data; +//	data.pattern = "a"; +//	data.text = "ab"; +//	std::vector<int> results = rabin_karp(data); +// +//	ASSERT_EQ(1, results.size()); +//	ASSERT_EQ(0, results[0]); +// +//	data.pattern = "aba"; +//	data.text = "abacaba"; +//	results = rabin_karp(data); +// +//	ASSERT_EQ(2, results.size()); +//	ASSERT_EQ(0, results[0]); +//	ASSERT_EQ(4, results[1]); +//} +// +//TEST(RabinKarp, t1) { +//	Data data; +//	data.pattern = "Test"; +//	data.text = "testTesttesT"; +//	std::vector<int> results = rabin_karp(data); +// +//	ASSERT_EQ(1, results.size()); +//	ASSERT_EQ(4, results[0]); +//} +// +//TEST(RabinKarp, t2) { +//	Data data; +//	data.pattern = "aaaaa"; +//	data.text = "baaaaaaa"; +//	std::vector<int> results = rabin_karp(data); +// +//	ASSERT_EQ(3, results.size()); +//	ASSERT_EQ(1, results[0]); +//	ASSERT_EQ(2, results[1]); +//	ASSERT_EQ(3, results[2]); +//}  int main() { -	std::ios_base::sync_with_stdio(false); -	int bucket_count; -	cin >> bucket_count; -	QueryProcessor proc(bucket_count); -	proc.processQueries(); -	return 0; +    std::ios_base::sync_with_stdio(false); +    print_occurrences(rabin_karp(read_input())); +    return 0;  } | 
