pkg/systemd/parser/split.go

package parser

import (
	"fmt"
	"strings"
	"unicode"
)

/* Functions to split/join, unescape/escape strings similar to Exec=... lines in unit files */

type SplitFlags = uint64

const (
	SplitRelax                  SplitFlags = 1 << iota // Allow unbalanced quote and eat up trailing backslash.
	SplitCUnescape                                     // Unescape known escape sequences.
	SplitUnescapeRelax                                 // Allow and keep unknown escape sequences, allow and keep trailing backslash.
	SplitUnescapeSeparators                            // Unescape separators (those specified, or whitespace by default).
	SplitKeepQuote                                     // Ignore separators in quoting with "" and ''.
	SplitUnquote                                       // Ignore separators in quoting with "" and '', and remove the quotes.
	SplitDontCoalesceSeparators                        // Don't treat multiple adjacent separators as one
	SplitRetainEscape                                  // Treat escape character '\' as any other character without special meaning
	SplitRetainSeparators                              // Do not advance the original string pointer past the separator(s) */
)

const WhitespaceSeparators = " \t\n\r"

func unoctchar(v byte) int {
	if v >= '0' && v <= '7' {
		return int(v - '0')
	}

	return -1
}

func unhexchar(v byte) int {
	if v >= '0' && v <= '9' {
		return int(v - '0')
	}

	if v >= 'a' && v <= 'f' {
		return int(v - 'a' + 10)
	}

	if v >= 'A' && v <= 'F' {
		return int(v - 'A' + 10)
	}

	return -1
}

func isValidUnicode(c uint32) bool {
	return c <= unicode.MaxRune
}

/* This is based on code from systemd (src/basic/escape.c), marked LGPL-2.1-or-later and is copyrighted by the systemd developers */

func cUnescapeOne(p string, acceptNul bool) (int, rune, bool) {
	var count = 1
	var eightBit = false
	var ret rune

	// Unescapes C style. Returns the unescaped character in ret.
	// Returns eightBit as true if the escaped sequence either fits in
	// one byte in UTF-8 or is a non-unicode literal byte and should
	// instead be copied directly.

	if len(p) < 1 {
		return -1, 0, false
	}

	switch p[0] {
	case 'a':
		ret = '\a'
	case 'b':
		ret = '\b'
	case 'f':
		ret = '\f'
	case 'n':
		ret = '\n'
	case 'r':
		ret = '\r'
	case 't':
		ret = '\t'
	case 'v':
		ret = '\v'
	case '\\':
		ret = '\\'
	case '"':
		ret = '"'
	case '\'':
		ret = '\''
	case 's':
		/* This is an extension of the XDG syntax files */
		ret = ' '
	case 'x':
		/* hexadecimal encoding */
		if len(p) < 3 {
			return -1, 0, false
		}

		a := unhexchar(p[1])
		if a < 0 {
			return -1, 0, false
		}

		b := unhexchar(p[2])
		if b < 0 {
			return -1, 0, false
		}

		/* Don't allow NUL bytes */
		if a == 0 && b == 0 && !acceptNul {
			return -1, 0, false
		}

		ret = rune((a << 4) | b)
		eightBit = true
		count = 3
	case 'u':
		/* C++11 style 16bit unicode */

		if len(p) < 5 {
			return -1, 0, false
		}

		var a [4]int
		for i := 0; i < 4; i++ {
			a[i] = unhexchar(p[1+i])
			if a[i] < 0 {
				return -1, 0, false
			}
		}

		c := (uint32(a[0]) << 12) | (uint32(a[1]) << 8) | (uint32(a[2]) << 4) | uint32(a[3])

		/* Don't allow 0 chars */
		if c == 0 && !acceptNul {
			return -1, 0, false
		}

		ret = rune(c)
		count = 5
	case 'U':
		/* C++11 style 32bit unicode */

		if len(p) < 9 {
			return -1, 0, false
		}

		var a [8]int
		for i := 0; i < 8; i++ {
			a[i] = unhexchar(p[1+i])
			if a[i] < 0 {
				return -10, 0, false
			}
		}

		c := (uint32(a[0]) << 28) | (uint32(a[1]) << 24) | (uint32(a[2]) << 20) | (uint32(a[3]) << 16) |
			(uint32(a[4]) << 12) | (uint32(a[5]) << 8) | (uint32(a[6]) << 4) | uint32(a[7])

		/* Don't allow 0 chars */
		if c == 0 && !acceptNul {
			return -1, 0, false
		}

		/* Don't allow invalid code points */
		if !isValidUnicode(c) {
			return -1, 0, false
		}

		ret = rune(c)
		count = 9
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
		/* octal encoding */

		if len(p) < 3 {
			return -1, 0, false
		}

		a := unoctchar(p[0])
		if a < 0 {
			return -1, 0, false
		}

		b := unoctchar(p[0])
		if b < 0 {
			return -1, 0, false
		}

		c := unoctchar(p[0])
		if c < 0 {
			return -1, 0, false
		}

		/* don't allow NUL bytes */
		if a == 0 && b == 0 && c == 0 && !acceptNul {
			return -1, 0, false
		}

		/* Don't allow bytes above 255 */
		m := (uint32(a) << 6) | (uint32(b) << 3) | uint32(c)
		if m > 255 {
			return -1, 0, false
		}

		ret = rune(m)
		eightBit = true
		count = 3
	default:
		return -1, 0, false
	}

	return count, ret, eightBit
}

/* This is based on code from systemd (src/basic/extract-workd.c), marked LGPL-2.1-or-later and is copyrighted by the systemd developers */

// Returns: word, remaining, more-words, error
func extractFirstWord(in string, separators string, flags SplitFlags) (string, string, bool, error) {
	var s strings.Builder
	var quote byte     // 0 or ' or "
	backslash := false // whether we've just seen a backslash

	// The string handling in this function is a bit weird, using
	// 0 bytes to mark end-of-string. This is because it is a direct
	// conversion of the C in systemd, and w want to ensure
	// exactly the same behaviour of some complex code

	p := 0
	end := len(in)
	var c byte

	nextChar := func() byte {
		p++
		if p >= end {
			return 0
		}
		return in[p]
	}

	/* Bail early if called after last value or with no input */
	if len(in) == 0 {
		goto finish
	}

	// Parses the first word of a string, and returns it and the
	// remainder. Removes all quotes in the process. When parsing
	// fails (because of an uneven number of quotes or similar),
	// the rest is at the first invalid character. */

loop1:
	for c = in[0]; ; c = nextChar() {
		switch {
		case c == 0:
			goto finishForceTerminate
		case strings.ContainsRune(separators, rune(c)):
			if flags&SplitDontCoalesceSeparators != 0 {
				if !(flags&SplitRetainSeparators != 0) {
					p++
				}
				goto finishForceNext
			}
		default:
			// We found a non-blank character, so we will always
			// want to return a string (even if it is empty),
			// allocate it here.
			break loop1
		}
	}

	for ; ; c = nextChar() {
		switch {
		case backslash:
			if c == 0 {
				if flags&SplitUnescapeRelax != 0 &&
					(quote == 0 || flags&SplitRelax != 0) {
					// If we find an unquoted trailing backslash and we're in
					// SplitUnescapeRelax mode, keep it verbatim in the
					// output.
					//
					// Unbalanced quotes will only be allowed in SplitRelax
					// mode, SplitUnescapeRelax mode does not allow them.
					s.WriteString("\\")
					goto finishForceTerminate
				}
				if flags&SplitRelax != 0 {
					goto finishForceTerminate
				}
				return "", "", false, fmt.Errorf("unbalanced escape")
			}

			if flags&(SplitCUnescape|SplitUnescapeSeparators) != 0 {
				var r = -1
				var u rune

				if flags&SplitCUnescape != 0 {
					r, u, _ = cUnescapeOne(in[p:], false)
				}

				switch {
				case r > 0:
					p += r - 1
					s.WriteRune(u)
				case (flags&SplitUnescapeSeparators != 0) &&
					(strings.ContainsRune(separators, rune(c)) || c == '\\'):
					/* An escaped separator char or the escape char itself */
					s.WriteByte(c)
				case flags&SplitUnescapeRelax != 0:
					s.WriteByte('\\')
					s.WriteByte(c)
				default:
					return "", "", false, fmt.Errorf("unsupported escape char")
				}
			} else {
				s.WriteByte(c)
			}

			backslash = false
		case quote != 0:
			/* inside either single or double quotes */
		quoteloop:
			for ; ; c = nextChar() {
				switch {
				case c == 0:
					if flags&SplitRelax != 0 {
						goto finishForceTerminate
					}
					return "", "", false, fmt.Errorf("unbalanced quotes")
				case c == quote:
					/* found the end quote */
					quote = 0
					if flags&SplitUnquote != 0 {
						break quoteloop
					}
				case c == '\\' && !(flags&SplitRetainEscape != 0):
					backslash = true
					break quoteloop
				}

				s.WriteByte(c)

				if quote == 0 {
					break quoteloop
				}
			}
		default:
		nonquoteloop:
			for ; ; c = nextChar() {
				switch {
				case c == 0:
					goto finishForceTerminate
				case (c == '\'' || c == '"') && (flags&(SplitKeepQuote|SplitUnquote) != 0):
					quote = c
					if flags&SplitUnquote != 0 {
						break nonquoteloop
					}
				case c == '\\' && !(flags&SplitRetainEscape != 0):
					backslash = true
					break nonquoteloop
				case strings.ContainsRune(separators, rune(c)):
					if flags&SplitDontCoalesceSeparators != 0 {
						if !(flags&SplitRetainSeparators != 0) {
							p++
						}
						goto finishForceNext
					}

					if !(flags&SplitRetainSeparators != 0) {
						/* Skip additional coalesced separators. */
						for ; ; c = nextChar() {
							if c == 0 {
								goto finishForceTerminate
							}
							if !strings.ContainsRune(separators, rune(c)) {
								break
							}
						}
					}
					goto finish
				}

				s.WriteByte(c)

				if quote != 0 {
					break nonquoteloop
				}
			}
		}
	}

finishForceTerminate:
	p = end

finish:
	if s.Len() == 0 {
		return "", "", false, nil
	}

finishForceNext:
	return s.String(), in[p:], true, nil
}

func splitStringAppend(appendTo []string, s string, separators string, flags SplitFlags) ([]string, error) {
	orig := appendTo
	for {
		word, remaining, moreWords, err := extractFirstWord(s, separators, flags)
		if err != nil {
			return orig, err
		}

		if !moreWords {
			break
		}
		appendTo = append(appendTo, word)
		s = remaining
	}
	return appendTo, nil
}

func splitString(s string, separators string, flags SplitFlags) ([]string, error) {
	return splitStringAppend(make([]string, 0), s, separators, flags)
}

func charNeedEscape(c rune, isPath bool) bool {
	if c > 128 {
		return false /* unicode is ok */
	}

	pathRune := (isPath && c == '-') ||
		(isPath && c == '/')

	return unicode.IsSpace(c) ||
		unicode.IsControl(c) ||
		pathRune ||
		c == '"' ||
		c == '\'' ||
		c == '\\'
}

func wordNeedEscape(word string) bool {
	for _, c := range word {
		if charNeedEscape(c, false) {
			return true
		}
	}
	return false
}

func appendEscapeWord(escaped *strings.Builder, word string) {
	escaped.WriteRune('"')
	escapeString(escaped, word, false)
	escaped.WriteRune('"')
}

func escapeString(escaped *strings.Builder, word string, isPath bool) {
	for i, c := range word {
		if charNeedEscape(c, isPath) {
			switch c {
			case '\a':
				escaped.WriteString("\\a")
			case '\b':
				escaped.WriteString("\\b")
			case '\n':
				escaped.WriteString("\\n")
			case '\r':
				escaped.WriteString("\\r")
			case '\t':
				escaped.WriteString("\\t")
			case '\v':
				escaped.WriteString("\\v")
			case '\f':
				escaped.WriteString("\\f")
			case '\\':
				escaped.WriteString("\\\\")
			case ' ':
				escaped.WriteString(" ")
			case '"':
				escaped.WriteString("\\\"")
			case '\'':
				escaped.WriteString("'")
			case '/':
				if isPath && i != 0 {
					escaped.WriteString("-")
				}
			default:
				escaped.WriteString(fmt.Sprintf("\\x%.2x", c))
			}
		} else {
			escaped.WriteRune(c)
		}
	}
}

func escapeWords(words []string) string {
	var escaped strings.Builder

	for i, word := range words {
		if i != 0 {
			escaped.WriteString(" ")
		}
		if wordNeedEscape(word) {
			appendEscapeWord(&escaped, word)
		} else {
			escaped.WriteString(word)
		}
	}
	return escaped.String()
}