Source File
search.go
Belonging Package
strings
// Copyright 2012 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package strings// stringFinder efficiently finds strings in a source text. It's implemented// using the Boyer-Moore string search algorithm:// https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm// https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged// document uses 1-based indexing)type stringFinder struct {// pattern is the string that we are searching for in the text.pattern string// badCharSkip[b] contains the distance between the last byte of pattern// and the rightmost occurrence of b in pattern. If b is not in pattern,// badCharSkip[b] is len(pattern).//// Whenever a mismatch is found with byte b in the text, we can safely// shift the matching frame at least badCharSkip[b] until the next time// the matching char could be in alignment.badCharSkip [256]int// goodSuffixSkip[i] defines how far we can shift the matching frame given// that the suffix pattern[i+1:] matches, but the byte pattern[i] does// not. There are two cases to consider://// 1. The matched suffix occurs elsewhere in pattern (with a different// byte preceding it that we might possibly match). In this case, we can// shift the matching frame to align with the next suffix chunk. For// example, the pattern "mississi" has the suffix "issi" next occurring// (in right-to-left order) at index 1, so goodSuffixSkip[3] ==// shift+len(suffix) == 3+4 == 7.//// 2. If the matched suffix does not occur elsewhere in pattern, then the// matching frame may share part of its prefix with the end of the// matching suffix. In this case, goodSuffixSkip[i] will contain how far// to shift the frame to align this portion of the prefix to the// suffix. For example, in the pattern "abcxxxabc", when the first// mismatch from the back is found to be in position 3, the matching// suffix "xxabc" is not found elsewhere in the pattern. However, its// rightmost "abc" (at position 6) is a prefix of the whole pattern, so// goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.goodSuffixSkip []int}func ( string) *stringFinder {:= &stringFinder{pattern: ,goodSuffixSkip: make([]int, len()),}// last is the index of the last character in the pattern.:= len() - 1// Build bad character table.// Bytes not in the pattern can skip one pattern's length.for := range .badCharSkip {.badCharSkip[] = len()}// The loop condition is < instead of <= so that the last byte does not// have a zero distance to itself. Finding this byte out of place implies// that it is not in the last position.for := 0; < ; ++ {.badCharSkip[[]] = -}// Build good suffix table.// First pass: set each value to the next index which starts a prefix of// pattern.:=for := ; >= 0; -- {if HasPrefix(, [+1:]) {= + 1}// lastPrefix is the shift, and (last-i) is len(suffix)..goodSuffixSkip[] = + -}// Second pass: find repeats of pattern's suffix starting from the front.for := 0; < ; ++ {:= longestCommonSuffix(, [1:+1])if [-] != [-] {// (last-i) is the shift, and lenSuffix is len(suffix)..goodSuffixSkip[-] = + -}}return}func (, string) ( int) {for ; < len() && < len(); ++ {if [len()-1-] != [len()-1-] {break}}return}// next returns the index in text of the first occurrence of the pattern. If// the pattern is not found, it returns -1.func ( *stringFinder) ( string) int {:= len(.pattern) - 1for < len() {// Compare backwards from the end until the first unmatching character.:= len(.pattern) - 1for >= 0 && [] == .pattern[] {----}if < 0 {return + 1 // match}+= max(.badCharSkip[[]], .goodSuffixSkip[])}return -1}func (, int) int {if > {return}return}