diff options
author | Haidong Ji | 2019-03-16 21:52:16 -0500 |
---|---|---|
committer | Haidong Ji | 2019-03-16 21:52:16 -0500 |
commit | b1aea90f00a4bf65253946b9f1017b6c9804535a (patch) | |
tree | ebde562f654c314620da40d1df587e0cfe04c498 /src/main | |
parent | 8fdeec331c424f2a7cfc98619e98a0d2420ddf7c (diff) |
Rabin-Karp string search done!
Fun exercise. Some takeaways:
1. Smallest test case is easy to walk through and verify the logic,
indexing, and such is correct;
2. Persist, don't give up, take it step by step, and you'll reach the goal!
Hooray!
Diffstat (limited to 'src/main')
-rw-r--r-- | src/main/HashSubstring.java | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/src/main/HashSubstring.java b/src/main/HashSubstring.java new file mode 100644 index 0000000..baef360 --- /dev/null +++ b/src/main/HashSubstring.java @@ -0,0 +1,135 @@ +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.StringTokenizer; +import java.util.concurrent.ThreadLocalRandom; + +public class HashSubstring { + + private static FastScanner in; + private static PrintWriter out; + + public static void main(String[] args) throws IOException { + in = new FastScanner(); + out = new PrintWriter(new BufferedOutputStream(System.out)); + printOccurrences(rabinKarp(readInput())); + out.close(); + } + + private static Data readInput() throws IOException { + String pattern = in.next(); + String text = in.next(); + return new Data(pattern, text); + } + + private static void printOccurrences(List<Integer> ans) { + for (Integer cur : ans) { + out.print(cur); + out.print(" "); + } + } + + static List<Integer> getOccurrences(Data input) { + String s = input.pattern, t = input.text; + int m = s.length(), n = t.length(); + List<Integer> occurrences = new ArrayList<>(); + for (int i = 0; i + m <= n; ++i) { + boolean equal = true; + for (int j = 0; j < m; ++j) { + if (s.charAt(j) != t.charAt(i + j)) { + equal = false; + break; + } + } + if (equal) + occurrences.add(i); + } + return occurrences; + } + + static class Data { + String pattern; + String text; + + Data(String pattern, String text) { + this.pattern = pattern; + this.text = text; + } + } + + static long polyHash(String S, int p, int x) { + long hash = 0; + + for (int i = S.length() - 1; i >= 0; --i) + hash = (hash * x + S.charAt(i)) % p; + + return hash; + } + + static long[] preComputeHashes(String T, int pattenLength, int p, int x) { + long[] H = new long[T.length() - pattenLength + 1]; + String S = T.substring(T.length() - pattenLength); + H[T.length() - pattenLength] = polyHash(S, p, x); + long y = 1; + for (int i = 0; i < pattenLength; i++) { + y = (y * x) % p; + } + for (int i = T.length() - pattenLength - 1; i >= 0; --i) { + H[i] = (x * H[i + 1] + T.charAt(i) - y * T.charAt(i + pattenLength)) % p; + } + return H; + } + + static List<Integer> rabinKarp(Data input) { + String T = input.text; + String P = input.pattern; + int p = 1000000007; + int x = ThreadLocalRandom.current().nextInt(1, p); + List<Integer> positions = new ArrayList<>(); + long pHash = polyHash(P, p, x); + long[] H = preComputeHashes(T, P.length(), p, x); + for (int i = 0; i < T.length() - P.length() + 1; i++) { + if (pHash != H[i]) { + continue; + } + if (T.substring(i, i + P.length()).equals(P)) + positions.add(i); + } + return positions; + } + + static List<Integer> rabinKarp(String P, String T) { + int p = 1000000007; + int x = ThreadLocalRandom.current().nextInt(1, p); + List<Integer> positions = new ArrayList<>(); + long pHash = polyHash(P, p, x); + long[] H = preComputeHashes(T, P.length(), p, x); + for (int i = 0; i < T.length() - P.length() + 1; i++) { + if (pHash != H[i]) { + continue; + } + if (T.substring(i, i + P.length()).equals(P)) + positions.add(i); + } + return positions; + } + + static class FastScanner { + private BufferedReader reader; + private StringTokenizer tokenizer; + + FastScanner() { + reader = new BufferedReader(new InputStreamReader(System.in)); + tokenizer = null; + } + + String next() throws IOException { + while (tokenizer == null || !tokenizer.hasMoreTokens()) { + tokenizer = new StringTokenizer(reader.readLine()); + } + return tokenizer.nextToken(); + } + } +} + + |