buildr/internal/ignore/pattern.go

package ignore

import (
	"bytes"
	"regexp"
	"strings"
)

const doubleStar = "**"

type ignorePattern struct {
	Pattern *regexp.Regexp
	Include bool
}

func (p ignorePattern) ignore(path string) (match, ignore bool) {
	match = p.Pattern.MatchString(path)
	if match {
		ignore = !p.Include
	}

	return match, ignore
}

func parsePattern(pattern string) (*ignorePattern, error) {
	p := &ignorePattern{}

	// An optional prefix "!" which negates the pattern; any matching file
	// excluded by a previous pattern will become included again.
	if strings.HasPrefix(pattern, "!") {
		pattern = pattern[1:]
		p.Include = true
	} else {
		p.Include = false
	}

	// Remove leading back-slash escape for escaped hash ('#') or
	// exclamation mark ('!').
	pattern = strings.TrimPrefix(pattern, "\\")

	// Split pattern into segments.
	patternSegs := strings.Split(pattern, "/")

	// A pattern beginning with a slash ('/') will only match paths
	// directly on the root directory instead of any descendant paths.
	// So remove empty first segment to make pattern absoluut to root.
	// A pattern without a beginning slash ('/') will match any
	// descendant path. This is equivalent to "**/{pattern}". So
	// prepend with double-asterisks to make pattern relative to
	// root.

	if patternSegs[0] == "" {
		patternSegs = patternSegs[1:]
	} else if patternSegs[0] != doubleStar {
		patternSegs = append([]string{doubleStar}, patternSegs...)
	}

	// A pattern ending with a slash ('/') will match all descendant
	// paths of if it is a directory but not if it is a regular file.
	// This is equivalent to "{pattern}/**". So, set last segment to
	// double asterisks to include all descendants.
	if patternSegs[len(patternSegs)-1] == "" {
		patternSegs[len(patternSegs)-1] = doubleStar
	}

	// Build regular expression from pattern.
	var expr bytes.Buffer
	expr.WriteString("^")
	needSlash := false

	for i, seg := range patternSegs {
		switch seg {
		case doubleStar:
			//nolint:gocritic // should be alright
			switch {
			case i == 0 && i == len(patternSegs)-1:
				// A pattern consisting solely of double-asterisks ('**')
				// will match every path.
				expr.WriteString(".+")
			case i == 0:
				// A normalized pattern beginning with double-asterisks
				// ('**') will match any leading path segments.
				expr.WriteString("(?:.+/)?")
				needSlash = false
			case i == len(patternSegs)-1:
				// A normalized pattern ending with double-asterisks ('**')
				// will match any trailing path segments.
				expr.WriteString("/.+")
			default:
				// A pattern with inner double-asterisks ('**') will match
				// multiple (or zero) inner path segments.
				expr.WriteString("(?:/.+)?")
				needSlash = true
			}
		case "*":
			// Match single path segment.
			if needSlash {
				expr.WriteString("/")
			}
			expr.WriteString("[^/]+")
			needSlash = true
		default:
			// Match segment glob pattern.
			if needSlash {
				expr.WriteString("/")
			}
			expr.WriteString(translateGlob(seg))
			needSlash = true
		}
	}

	expr.WriteString("$")
	if compiled, err := regexp.Compile(expr.String()); err != nil {
		return nil, err
	} else {
		p.Pattern = compiled
	}
	return p, nil
}

// NOTE: This is derived from `fnmatch.translate()` and is similar to
// the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
func translateGlob(glob string) string {
	var regex bytes.Buffer
	escape := false

	for i := 0; i < len(glob); i++ {
		char := glob[i]
		// Escape the character.
		switch {
		case escape:
			escape = false
			regex.WriteString(regexp.QuoteMeta(string(char)))
		case char == '\\':
			// Escape character, escape next character.
			escape = true
		case char == '*':
			// Multi-character wildcard. Match any string (except slashes),
			// including an empty string.
			regex.WriteString("[^/]*")
		case char == '?':
			// Single-character wildcard. Match any single character (except
			// a slash).
			regex.WriteString("[^/]")
		case char == '[':
			regex.WriteString(translateBracketExpression(&i, glob))
		default:
			// Regular character, escape it for regex.
			regex.WriteString(regexp.QuoteMeta(string(char)))
		}
	}
	return regex.String()
}

// Bracket expression wildcard. Except for the beginning
// exclamation mark, the whole bracket expression can be used
// directly as regex but we have to find where the expression
// ends.
// - "[][!]" matches ']', '[' and '!'.
// - "[]-]" matches ']' and '-'.
// - "[!]a-]" matches any character except ']', 'a' and '-'.
func translateBracketExpression(i *int, glob string) string {
	regex := string(glob[*i])
	*i++
	j := *i

	// Pass bracket expression negation.
	if j < len(glob) && glob[j] == '!' {
		j++
	}
	// Pass first closing bracket if it is at the beginning of the
	// expression.
	if j < len(glob) && glob[j] == ']' {
		j++
	}
	// Find closing bracket. Stop once we reach the end or find it.
	for j < len(glob) && glob[j] != ']' {
		j++
	}

	if j < len(glob) {
		if glob[*i] == '!' {
			//nolint:ineffassign,wastedassign // check later
			regex += "^"
			*i++
		}
		regex = regexp.QuoteMeta(glob[*i:j])
		*i = j
	} else {
		// Failed to find closing bracket, treat opening bracket as a
		// bracket literal instead of as an expression.
		regex = regexp.QuoteMeta(string(glob[*i]))
	}
	return "[" + regex + "]"
}