buildr/internal/ignore/pattern.go
Peter 1261932bdc
All checks were successful
continuous-integration/drone/push Build is passing
refactor: apply golangci-lint findings
2023-06-22 19:16:00 +02:00

196 lines
5.2 KiB
Go

package ignore
import (
"bytes"
"regexp"
"strings"
)
const doubleStar = "**"
type ignorePattern struct {
Pattern *regexp.Regexp
Include bool
}
func (p ignorePattern) ignore(path string) (match, ignore bool) {
match = p.Pattern.MatchString(path)
if match {
ignore = !p.Include
}
return match, ignore
}
func parsePattern(pattern string) (*ignorePattern, error) {
p := &ignorePattern{}
// An optional prefix "!" which negates the pattern; any matching file
// excluded by a previous pattern will become included again.
if strings.HasPrefix(pattern, "!") {
pattern = pattern[1:]
p.Include = true
} else {
p.Include = false
}
// Remove leading back-slash escape for escaped hash ('#') or
// exclamation mark ('!').
pattern = strings.TrimPrefix(pattern, "\\")
// Split pattern into segments.
patternSegs := strings.Split(pattern, "/")
// A pattern beginning with a slash ('/') will only match paths
// directly on the root directory instead of any descendant paths.
// So remove empty first segment to make pattern absoluut to root.
// A pattern without a beginning slash ('/') will match any
// descendant path. This is equivalent to "**/{pattern}". So
// prepend with double-asterisks to make pattern relative to
// root.
if patternSegs[0] == "" {
patternSegs = patternSegs[1:]
} else if patternSegs[0] != doubleStar {
patternSegs = append([]string{doubleStar}, patternSegs...)
}
// A pattern ending with a slash ('/') will match all descendant
// paths of if it is a directory but not if it is a regular file.
// This is equivalent to "{pattern}/**". So, set last segment to
// double asterisks to include all descendants.
if patternSegs[len(patternSegs)-1] == "" {
patternSegs[len(patternSegs)-1] = doubleStar
}
// Build regular expression from pattern.
var expr bytes.Buffer
expr.WriteString("^")
needSlash := false
for i, seg := range patternSegs {
switch seg {
case doubleStar:
//nolint:gocritic // should be alright
switch {
case i == 0 && i == len(patternSegs)-1:
// A pattern consisting solely of double-asterisks ('**')
// will match every path.
expr.WriteString(".+")
case i == 0:
// A normalized pattern beginning with double-asterisks
// ('**') will match any leading path segments.
expr.WriteString("(?:.+/)?")
needSlash = false
case i == len(patternSegs)-1:
// A normalized pattern ending with double-asterisks ('**')
// will match any trailing path segments.
expr.WriteString("/.+")
default:
// A pattern with inner double-asterisks ('**') will match
// multiple (or zero) inner path segments.
expr.WriteString("(?:/.+)?")
needSlash = true
}
case "*":
// Match single path segment.
if needSlash {
expr.WriteString("/")
}
expr.WriteString("[^/]+")
needSlash = true
default:
// Match segment glob pattern.
if needSlash {
expr.WriteString("/")
}
expr.WriteString(translateGlob(seg))
needSlash = true
}
}
expr.WriteString("$")
if compiled, err := regexp.Compile(expr.String()); err != nil {
return nil, err
} else {
p.Pattern = compiled
}
return p, nil
}
// NOTE: This is derived from `fnmatch.translate()` and is similar to
// the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
func translateGlob(glob string) string {
var regex bytes.Buffer
escape := false
for i := 0; i < len(glob); i++ {
char := glob[i]
// Escape the character.
switch {
case escape:
escape = false
regex.WriteString(regexp.QuoteMeta(string(char)))
case char == '\\':
// Escape character, escape next character.
escape = true
case char == '*':
// Multi-character wildcard. Match any string (except slashes),
// including an empty string.
regex.WriteString("[^/]*")
case char == '?':
// Single-character wildcard. Match any single character (except
// a slash).
regex.WriteString("[^/]")
case char == '[':
regex.WriteString(translateBracketExpression(&i, glob))
default:
// Regular character, escape it for regex.
regex.WriteString(regexp.QuoteMeta(string(char)))
}
}
return regex.String()
}
// Bracket expression wildcard. Except for the beginning
// exclamation mark, the whole bracket expression can be used
// directly as regex but we have to find where the expression
// ends.
// - "[][!]" matches ']', '[' and '!'.
// - "[]-]" matches ']' and '-'.
// - "[!]a-]" matches any character except ']', 'a' and '-'.
func translateBracketExpression(i *int, glob string) string {
regex := string(glob[*i])
*i++
j := *i
// Pass bracket expression negation.
if j < len(glob) && glob[j] == '!' {
j++
}
// Pass first closing bracket if it is at the beginning of the
// expression.
if j < len(glob) && glob[j] == ']' {
j++
}
// Find closing bracket. Stop once we reach the end or find it.
for j < len(glob) && glob[j] != ']' {
j++
}
if j < len(glob) {
if glob[*i] == '!' {
//nolint:ineffassign,wastedassign // check later
regex += "^"
*i++
}
regex = regexp.QuoteMeta(glob[*i:j])
*i = j
} else {
// Failed to find closing bracket, treat opening bracket as a
// bracket literal instead of as an expression.
regex = regexp.QuoteMeta(string(glob[*i]))
}
return "[" + regex + "]"
}