summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaidong Ji2019-03-16 21:52:16 -0500
committerHaidong Ji2019-03-16 21:52:16 -0500
commitb1aea90f00a4bf65253946b9f1017b6c9804535a (patch)
treeebde562f654c314620da40d1df587e0cfe04c498
parent8fdeec331c424f2a7cfc98619e98a0d2420ddf7c (diff)
Rabin-Karp string search done!
Fun exercise. Some takeaways: 1. Smallest test case is easy to walk through and verify the logic, indexing, and such is correct; 2. Persist, don't give up, take it step by step, and you'll reach the goal! Hooray!
-rw-r--r--src/main/HashSubstring.java135
-rw-r--r--src/test/HashChainsTest.java1
-rw-r--r--src/test/HashSubstringTest.java52
3 files changed, 188 insertions, 0 deletions
diff --git a/src/main/HashSubstring.java b/src/main/HashSubstring.java
new file mode 100644
index 0000000..baef360
--- /dev/null
+++ b/src/main/HashSubstring.java
@@ -0,0 +1,135 @@
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+import java.util.concurrent.ThreadLocalRandom;
+
+public class HashSubstring {
+
+ private static FastScanner in;
+ private static PrintWriter out;
+
+ public static void main(String[] args) throws IOException {
+ in = new FastScanner();
+ out = new PrintWriter(new BufferedOutputStream(System.out));
+ printOccurrences(rabinKarp(readInput()));
+ out.close();
+ }
+
+ private static Data readInput() throws IOException {
+ String pattern = in.next();
+ String text = in.next();
+ return new Data(pattern, text);
+ }
+
+ private static void printOccurrences(List<Integer> ans) {
+ for (Integer cur : ans) {
+ out.print(cur);
+ out.print(" ");
+ }
+ }
+
+ static List<Integer> getOccurrences(Data input) {
+ String s = input.pattern, t = input.text;
+ int m = s.length(), n = t.length();
+ List<Integer> occurrences = new ArrayList<>();
+ for (int i = 0; i + m <= n; ++i) {
+ boolean equal = true;
+ for (int j = 0; j < m; ++j) {
+ if (s.charAt(j) != t.charAt(i + j)) {
+ equal = false;
+ break;
+ }
+ }
+ if (equal)
+ occurrences.add(i);
+ }
+ return occurrences;
+ }
+
+ static class Data {
+ String pattern;
+ String text;
+
+ Data(String pattern, String text) {
+ this.pattern = pattern;
+ this.text = text;
+ }
+ }
+
+ static long polyHash(String S, int p, int x) {
+ long hash = 0;
+
+ for (int i = S.length() - 1; i >= 0; --i)
+ hash = (hash * x + S.charAt(i)) % p;
+
+ return hash;
+ }
+
+ static long[] preComputeHashes(String T, int pattenLength, int p, int x) {
+ long[] H = new long[T.length() - pattenLength + 1];
+ String S = T.substring(T.length() - pattenLength);
+ H[T.length() - pattenLength] = polyHash(S, p, x);
+ long y = 1;
+ for (int i = 0; i < pattenLength; i++) {
+ y = (y * x) % p;
+ }
+ for (int i = T.length() - pattenLength - 1; i >= 0; --i) {
+ H[i] = (x * H[i + 1] + T.charAt(i) - y * T.charAt(i + pattenLength)) % p;
+ }
+ return H;
+ }
+
+ static List<Integer> rabinKarp(Data input) {
+ String T = input.text;
+ String P = input.pattern;
+ int p = 1000000007;
+ int x = ThreadLocalRandom.current().nextInt(1, p);
+ List<Integer> positions = new ArrayList<>();
+ long pHash = polyHash(P, p, x);
+ long[] H = preComputeHashes(T, P.length(), p, x);
+ for (int i = 0; i < T.length() - P.length() + 1; i++) {
+ if (pHash != H[i]) {
+ continue;
+ }
+ if (T.substring(i, i + P.length()).equals(P))
+ positions.add(i);
+ }
+ return positions;
+ }
+
+ static List<Integer> rabinKarp(String P, String T) {
+ int p = 1000000007;
+ int x = ThreadLocalRandom.current().nextInt(1, p);
+ List<Integer> positions = new ArrayList<>();
+ long pHash = polyHash(P, p, x);
+ long[] H = preComputeHashes(T, P.length(), p, x);
+ for (int i = 0; i < T.length() - P.length() + 1; i++) {
+ if (pHash != H[i]) {
+ continue;
+ }
+ if (T.substring(i, i + P.length()).equals(P))
+ positions.add(i);
+ }
+ return positions;
+ }
+
+ static class FastScanner {
+ private BufferedReader reader;
+ private StringTokenizer tokenizer;
+
+ FastScanner() {
+ reader = new BufferedReader(new InputStreamReader(System.in));
+ tokenizer = null;
+ }
+
+ String next() throws IOException {
+ while (tokenizer == null || !tokenizer.hasMoreTokens()) {
+ tokenizer = new StringTokenizer(reader.readLine());
+ }
+ return tokenizer.nextToken();
+ }
+ }
+}
+
+
diff --git a/src/test/HashChainsTest.java b/src/test/HashChainsTest.java
index 9a7f192..57255fc 100644
--- a/src/test/HashChainsTest.java
+++ b/src/test/HashChainsTest.java
@@ -15,5 +15,6 @@ class HashChainsTest {
hc.bucketCount = 3;
assertEquals(1, hc.hashFunc("add"));
assertEquals(1, hc.hashFunc("help"));
+ assertEquals(2, hc.hashFunc("del"));
}
} \ No newline at end of file
diff --git a/src/test/HashSubstringTest.java b/src/test/HashSubstringTest.java
new file mode 100644
index 0000000..854b329
--- /dev/null
+++ b/src/test/HashSubstringTest.java
@@ -0,0 +1,52 @@
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class HashSubstringTest {
+ @Test
+ void test() {
+ HashSubstring.Data data = new HashSubstring.Data("aba", "abacaba");
+ assertEquals(2, HashSubstring.getOccurrences(data).size());
+ assertEquals(0, HashSubstring.getOccurrences(data).get(0));
+ assertEquals(4, HashSubstring.getOccurrences(data).get(1));
+
+ List<Integer> postions = HashSubstring.rabinKarp("a", "ab");
+ assertEquals(1, postions.size());
+ assertEquals(0, postions.get(0));
+
+ postions = HashSubstring.rabinKarp("a", "ba");
+ assertEquals(1, postions.size());
+ assertEquals(1, postions.get(0));
+
+ postions = HashSubstring.rabinKarp("aba", "abacaba");
+ assertEquals(2, postions.size());
+ assertEquals(0, postions.get(0));
+ assertEquals(4, postions.get(1));
+ }
+
+ @Test
+ void test1() {
+ HashSubstring.Data data = new HashSubstring.Data("Test", "testTesttesT");
+ assertEquals(1, HashSubstring.getOccurrences(data).size());
+ assertEquals(4, HashSubstring.getOccurrences(data).get(0));
+ List<Integer> postions = HashSubstring.rabinKarp("Test", "testTesttesT");
+ assertEquals(1, postions.size());
+ assertEquals(4, postions.get(0));
+ }
+
+ @Test
+ void test2() {
+ HashSubstring.Data data = new HashSubstring.Data("aaaaa", "baaaaaaa");
+ assertEquals(3, HashSubstring.getOccurrences(data).size());
+ assertEquals(1, HashSubstring.getOccurrences(data).get(0));
+ assertEquals(2, HashSubstring.getOccurrences(data).get(1));
+ assertEquals(3, HashSubstring.getOccurrences(data).get(2));
+ List<Integer> positions = HashSubstring.rabinKarp("aaaaa", "baaaaaaa");
+ assertEquals(3, positions.size());
+ assertEquals(1, positions.get(0));
+ assertEquals(2, positions.get(1));
+ assertEquals(3, positions.get(2));
+ }
+} \ No newline at end of file