From b1aea90f00a4bf65253946b9f1017b6c9804535a Mon Sep 17 00:00:00 2001 From: Haidong Ji Date: Sat, 16 Mar 2019 21:52:16 -0500 Subject: Rabin-Karp string search done! Fun exercise. Some takeaways: 1. Smallest test case is easy to walk through and verify the logic, indexing, and such is correct; 2. Persist, don't give up, take it step by step, and you'll reach the goal! Hooray! --- src/main/HashSubstring.java | 135 ++++++++++++++++++++++++++++++++++++++++ src/test/HashChainsTest.java | 1 + src/test/HashSubstringTest.java | 52 ++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 src/main/HashSubstring.java create mode 100644 src/test/HashSubstringTest.java diff --git a/src/main/HashSubstring.java b/src/main/HashSubstring.java new file mode 100644 index 0000000..baef360 --- /dev/null +++ b/src/main/HashSubstring.java @@ -0,0 +1,135 @@ +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.StringTokenizer; +import java.util.concurrent.ThreadLocalRandom; + +public class HashSubstring { + + private static FastScanner in; + private static PrintWriter out; + + public static void main(String[] args) throws IOException { + in = new FastScanner(); + out = new PrintWriter(new BufferedOutputStream(System.out)); + printOccurrences(rabinKarp(readInput())); + out.close(); + } + + private static Data readInput() throws IOException { + String pattern = in.next(); + String text = in.next(); + return new Data(pattern, text); + } + + private static void printOccurrences(List ans) { + for (Integer cur : ans) { + out.print(cur); + out.print(" "); + } + } + + static List getOccurrences(Data input) { + String s = input.pattern, t = input.text; + int m = s.length(), n = t.length(); + List occurrences = new ArrayList<>(); + for (int i = 0; i + m <= n; ++i) { + boolean equal = true; + for (int j = 0; j < m; ++j) { + if (s.charAt(j) != t.charAt(i + j)) { + equal = false; + break; + } + } + if (equal) + occurrences.add(i); + } + return occurrences; + } + + static class Data { + String pattern; + String text; + + Data(String pattern, String text) { + this.pattern = pattern; + this.text = text; + } + } + + static long polyHash(String S, int p, int x) { + long hash = 0; + + for (int i = S.length() - 1; i >= 0; --i) + hash = (hash * x + S.charAt(i)) % p; + + return hash; + } + + static long[] preComputeHashes(String T, int pattenLength, int p, int x) { + long[] H = new long[T.length() - pattenLength + 1]; + String S = T.substring(T.length() - pattenLength); + H[T.length() - pattenLength] = polyHash(S, p, x); + long y = 1; + for (int i = 0; i < pattenLength; i++) { + y = (y * x) % p; + } + for (int i = T.length() - pattenLength - 1; i >= 0; --i) { + H[i] = (x * H[i + 1] + T.charAt(i) - y * T.charAt(i + pattenLength)) % p; + } + return H; + } + + static List rabinKarp(Data input) { + String T = input.text; + String P = input.pattern; + int p = 1000000007; + int x = ThreadLocalRandom.current().nextInt(1, p); + List positions = new ArrayList<>(); + long pHash = polyHash(P, p, x); + long[] H = preComputeHashes(T, P.length(), p, x); + for (int i = 0; i < T.length() - P.length() + 1; i++) { + if (pHash != H[i]) { + continue; + } + if (T.substring(i, i + P.length()).equals(P)) + positions.add(i); + } + return positions; + } + + static List rabinKarp(String P, String T) { + int p = 1000000007; + int x = ThreadLocalRandom.current().nextInt(1, p); + List positions = new ArrayList<>(); + long pHash = polyHash(P, p, x); + long[] H = preComputeHashes(T, P.length(), p, x); + for (int i = 0; i < T.length() - P.length() + 1; i++) { + if (pHash != H[i]) { + continue; + } + if (T.substring(i, i + P.length()).equals(P)) + positions.add(i); + } + return positions; + } + + static class FastScanner { + private BufferedReader reader; + private StringTokenizer tokenizer; + + FastScanner() { + reader = new BufferedReader(new InputStreamReader(System.in)); + tokenizer = null; + } + + String next() throws IOException { + while (tokenizer == null || !tokenizer.hasMoreTokens()) { + tokenizer = new StringTokenizer(reader.readLine()); + } + return tokenizer.nextToken(); + } + } +} + + diff --git a/src/test/HashChainsTest.java b/src/test/HashChainsTest.java index 9a7f192..57255fc 100644 --- a/src/test/HashChainsTest.java +++ b/src/test/HashChainsTest.java @@ -15,5 +15,6 @@ class HashChainsTest { hc.bucketCount = 3; assertEquals(1, hc.hashFunc("add")); assertEquals(1, hc.hashFunc("help")); + assertEquals(2, hc.hashFunc("del")); } } \ No newline at end of file diff --git a/src/test/HashSubstringTest.java b/src/test/HashSubstringTest.java new file mode 100644 index 0000000..854b329 --- /dev/null +++ b/src/test/HashSubstringTest.java @@ -0,0 +1,52 @@ +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +class HashSubstringTest { + @Test + void test() { + HashSubstring.Data data = new HashSubstring.Data("aba", "abacaba"); + assertEquals(2, HashSubstring.getOccurrences(data).size()); + assertEquals(0, HashSubstring.getOccurrences(data).get(0)); + assertEquals(4, HashSubstring.getOccurrences(data).get(1)); + + List postions = HashSubstring.rabinKarp("a", "ab"); + assertEquals(1, postions.size()); + assertEquals(0, postions.get(0)); + + postions = HashSubstring.rabinKarp("a", "ba"); + assertEquals(1, postions.size()); + assertEquals(1, postions.get(0)); + + postions = HashSubstring.rabinKarp("aba", "abacaba"); + assertEquals(2, postions.size()); + assertEquals(0, postions.get(0)); + assertEquals(4, postions.get(1)); + } + + @Test + void test1() { + HashSubstring.Data data = new HashSubstring.Data("Test", "testTesttesT"); + assertEquals(1, HashSubstring.getOccurrences(data).size()); + assertEquals(4, HashSubstring.getOccurrences(data).get(0)); + List postions = HashSubstring.rabinKarp("Test", "testTesttesT"); + assertEquals(1, postions.size()); + assertEquals(4, postions.get(0)); + } + + @Test + void test2() { + HashSubstring.Data data = new HashSubstring.Data("aaaaa", "baaaaaaa"); + assertEquals(3, HashSubstring.getOccurrences(data).size()); + assertEquals(1, HashSubstring.getOccurrences(data).get(0)); + assertEquals(2, HashSubstring.getOccurrences(data).get(1)); + assertEquals(3, HashSubstring.getOccurrences(data).get(2)); + List positions = HashSubstring.rabinKarp("aaaaa", "baaaaaaa"); + assertEquals(3, positions.size()); + assertEquals(1, positions.get(0)); + assertEquals(2, positions.get(1)); + assertEquals(3, positions.get(2)); + } +} \ No newline at end of file -- cgit v1.2.3