// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:generate go run makeisprint.go -output isprint.gopackage strconvimport ()const (lowerhex = "0123456789abcdef"upperhex = "0123456789ABCDEF")func ( string, byte, , bool) string {returnstring(appendQuotedWith(make([]byte, 0, 3*len()/2), , , , ))}func ( rune, byte, , bool) string {returnstring(appendQuotedRuneWith(nil, , , , ))}func ( []byte, string, byte, , bool) []byte {// Often called with big strings, so preallocate. If there's quoting, // this is conservative but still helps a lot.ifcap()-len() < len() { := make([]byte, len(), len()+1+len()+1)copy(, ) = } = append(, )for := 0; len() > 0; = [:] { := rune([0]) = 1if >= utf8.RuneSelf { , = utf8.DecodeRuneInString() }if == 1 && == utf8.RuneError { = append(, `\x`...) = append(, lowerhex[[0]>>4]) = append(, lowerhex[[0]&0xF])continue } = appendEscapedRune(, , , , ) } = append(, )return}func ( []byte, rune, byte, , bool) []byte { = append(, )if !utf8.ValidRune() { = utf8.RuneError } = appendEscapedRune(, , , , ) = append(, )return}func ( []byte, rune, byte, , bool) []byte {var [utf8.UTFMax]byteif == rune() || == '\\' { // always backslashed = append(, '\\') = append(, byte())return }if {if < utf8.RuneSelf && IsPrint() { = append(, byte())return } } elseifIsPrint() || && isInGraphicList() { := utf8.EncodeRune([:], ) = append(, [:]...)return }switch {case'\a': = append(, `\a`...)case'\b': = append(, `\b`...)case'\f': = append(, `\f`...)case'\n': = append(, `\n`...)case'\r': = append(, `\r`...)case'\t': = append(, `\t`...)case'\v': = append(, `\v`...)default:switch {case < ' ': = append(, `\x`...) = append(, lowerhex[byte()>>4]) = append(, lowerhex[byte()&0xF])case > utf8.MaxRune: = 0xFFFDfallthroughcase < 0x10000: = append(, `\u`...)for := 12; >= 0; -= 4 { = append(, lowerhex[>>uint()&0xF]) }default: = append(, `\U`...)for := 28; >= 0; -= 4 { = append(, lowerhex[>>uint()&0xF]) } } }return}// Quote returns a double-quoted Go string literal representing s. The// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for// control characters and non-printable characters as defined by// IsPrint.func ( string) string {returnquoteWith(, '"', false, false)}// AppendQuote appends a double-quoted Go string literal representing s,// as generated by Quote, to dst and returns the extended buffer.func ( []byte, string) []byte {returnappendQuotedWith(, , '"', false, false)}// QuoteToASCII returns a double-quoted Go string literal representing s.// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for// non-ASCII characters and non-printable characters as defined by IsPrint.func ( string) string {returnquoteWith(, '"', true, false)}// AppendQuoteToASCII appends a double-quoted Go string literal representing s,// as generated by QuoteToASCII, to dst and returns the extended buffer.func ( []byte, string) []byte {returnappendQuotedWith(, , '"', true, false)}// QuoteToGraphic returns a double-quoted Go string literal representing s.// The returned string leaves Unicode graphic characters, as defined by// IsGraphic, unchanged and uses Go escape sequences (\t, \n, \xFF, \u0100)// for non-graphic characters.func ( string) string {returnquoteWith(, '"', false, true)}// AppendQuoteToGraphic appends a double-quoted Go string literal representing s,// as generated by QuoteToGraphic, to dst and returns the extended buffer.func ( []byte, string) []byte {returnappendQuotedWith(, , '"', false, true)}// QuoteRune returns a single-quoted Go character literal representing the// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)// for control characters and non-printable characters as defined by IsPrint.func ( rune) string {returnquoteRuneWith(, '\'', false, false)}// AppendQuoteRune appends a single-quoted Go character literal representing the rune,// as generated by QuoteRune, to dst and returns the extended buffer.func ( []byte, rune) []byte {returnappendQuotedRuneWith(, , '\'', false, false)}// QuoteRuneToASCII returns a single-quoted Go character literal representing// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,// \u0100) for non-ASCII characters and non-printable characters as defined// by IsPrint.func ( rune) string {returnquoteRuneWith(, '\'', true, false)}// AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,// as generated by QuoteRuneToASCII, to dst and returns the extended buffer.func ( []byte, rune) []byte {returnappendQuotedRuneWith(, , '\'', true, false)}// QuoteRuneToGraphic returns a single-quoted Go character literal representing// the rune. If the rune is not a Unicode graphic character,// as defined by IsGraphic, the returned string will use a Go escape sequence// (\t, \n, \xFF, \u0100).func ( rune) string {returnquoteRuneWith(, '\'', false, true)}// AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,// as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.func ( []byte, rune) []byte {returnappendQuotedRuneWith(, , '\'', false, true)}// CanBackquote reports whether the string s can be represented// unchanged as a single-line backquoted string without control// characters other than tab.func ( string) bool {forlen() > 0 { , := utf8.DecodeRuneInString() = [:]if > 1 {if == '\ufeff' {returnfalse// BOMs are invisible and should not be quoted. }continue// All other multibyte runes are correctly encoded and assumed printable. }if == utf8.RuneError {returnfalse }if ( < ' ' && != '\t') || == '`' || == '\u007F' {returnfalse } }returntrue}func ( byte) ( rune, bool) { := rune()switch {case'0' <= && <= '9':return - '0', truecase'a' <= && <= 'f':return - 'a' + 10, truecase'A' <= && <= 'F':return - 'A' + 10, true }return}// UnquoteChar decodes the first character or byte in the escaped string// or character literal represented by the string s.// It returns four values://// 1) value, the decoded Unicode code point or byte value;// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;// 3) tail, the remainder of the string after the character; and// 4) an error that will be nil if the character is syntactically valid.//// The second argument, quote, specifies the type of literal being parsed// and therefore which escaped quote character is permitted.// If set to a single quote, it permits the sequence \' and disallows unescaped '.// If set to a double quote, it permits \" and disallows unescaped ".// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.func ( string, byte) ( rune, bool, string, error) {// easy casesiflen() == 0 { = ErrSyntaxreturn }switch := [0]; {case == && ( == '\'' || == '"'): = ErrSyntaxreturncase >= utf8.RuneSelf: , := utf8.DecodeRuneInString()return , true, [:], nilcase != '\\':returnrune([0]), false, [1:], nil }// hard case: c is backslashiflen() <= 1 { = ErrSyntaxreturn } := [1] = [2:]switch {case'a': = '\a'case'b': = '\b'case'f': = '\f'case'n': = '\n'case'r': = '\r'case't': = '\t'case'v': = '\v'case'x', 'u', 'U': := 0switch {case'x': = 2case'u': = 4case'U': = 8 }varruneiflen() < { = ErrSyntaxreturn }for := 0; < ; ++ { , := unhex([])if ! { = ErrSyntaxreturn } = <<4 | } = [:]if == 'x' {// single-byte string, possibly not UTF-8 = break }if > utf8.MaxRune { = ErrSyntaxreturn } = = truecase'0', '1', '2', '3', '4', '5', '6', '7': := rune() - '0'iflen() < 2 { = ErrSyntaxreturn }for := 0; < 2; ++ { // one digit already; two more := rune([]) - '0'if < 0 || > 7 { = ErrSyntaxreturn } = ( << 3) | } = [2:]if > 255 { = ErrSyntaxreturn } = case'\\': = '\\'case'\'', '"':if != { = ErrSyntaxreturn } = rune()default: = ErrSyntaxreturn } = return}// Unquote interprets s as a single-quoted, double-quoted,// or backquoted Go string literal, returning the string value// that s quotes. (If s is single-quoted, it would be a Go// character literal; Unquote returns the corresponding// one-character string.)func ( string) (string, error) { := len()if < 2 {return"", ErrSyntax } := [0]if != [-1] {return"", ErrSyntax } = [1 : -1]if == '`' {ifcontains(, '`') {return"", ErrSyntax }ifcontains(, '\r') {// -1 because we know there is at least one \r to remove. := make([]byte, 0, len()-1)for := 0; < len(); ++ {if [] != '\r' { = append(, []) } }returnstring(), nil }return , nil }if != '"' && != '\'' {return"", ErrSyntax }ifcontains(, '\n') {return"", ErrSyntax }// Is it trivial? Avoid allocation.if !contains(, '\\') && !contains(, ) {switch {case'"':ifutf8.ValidString() {return , nil }case'\'': , := utf8.DecodeRuneInString()if == len() && ( != utf8.RuneError || != 1) {return , nil } } }var [utf8.UTFMax]byte := make([]byte, 0, 3*len()/2) // Try to avoid more allocations.forlen() > 0 { , , , := UnquoteChar(, )if != nil {return"", } = if < utf8.RuneSelf || ! { = append(, byte()) } else { := utf8.EncodeRune([:], ) = append(, [:]...) }if == '\'' && len() != 0 {// single-quoted must be single characterreturn"", ErrSyntax } }returnstring(), nil}// contains reports whether the string contains the byte c.func ( string, byte) bool {returnbytealg.IndexByteString(, ) != -1}// bsearch16 returns the smallest i such that a[i] >= x.// If there is no such i, bsearch16 returns len(a).func ( []uint16, uint16) int { , := 0, len()for < { := + (-)/2if [] < { = + 1 } else { = } }return}// bsearch32 returns the smallest i such that a[i] >= x.// If there is no such i, bsearch32 returns len(a).func ( []uint32, uint32) int { , := 0, len()for < { := + (-)/2if [] < { = + 1 } else { = } }return}// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests// to give the same answer. It allows this package not to depend on unicode,// and therefore not pull in all the Unicode tables. If the linker were better// at tossing unused tables, we could get rid of this implementation.// That would be nice.// IsPrint reports whether the rune is defined as printable by Go, with// the same definition as unicode.IsPrint: letters, numbers, punctuation,// symbols and ASCII space.func ( rune) bool {// Fast check for Latin-1if <= 0xFF {if0x20 <= && <= 0x7E {// All the ASCII is printable from space through DEL-1.returntrue }if0xA1 <= && <= 0xFF {// Similarly for ¡ through ÿ...return != 0xAD// ...except for the bizarre soft hyphen. }returnfalse }// Same algorithm, either on uint16 or uint32 value. // First, find first i such that isPrint[i] >= x. // This is the index of either the start or end of a pair that might span x. // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). // If we find x in a range, make sure x is not in isNotPrint list.if0 <= && < 1<<16 { , , := uint16(), isPrint16, isNotPrint16 := bsearch16(, )if >= len() || < [&^1] || [|1] < {returnfalse } := bsearch16(, )return >= len() || [] != } , , := uint32(), isPrint32, isNotPrint32 := bsearch32(, )if >= len() || < [&^1] || [|1] < {returnfalse }if >= 0x20000 {returntrue } -= 0x10000 := bsearch16(, uint16())return >= len() || [] != uint16()}// IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such// characters include letters, marks, numbers, punctuation, symbols, and// spaces, from categories L, M, N, P, S, and Zs.func ( rune) bool {ifIsPrint() {returntrue }returnisInGraphicList()}// isInGraphicList reports whether the rune is in the isGraphic list. This separation// from IsGraphic allows quoteWith to avoid two calls to IsPrint.// Should be called only if IsPrint fails.func ( rune) bool {// We know r must fit in 16 bits - see makeisprint.go.if > 0xFFFF {returnfalse } := uint16() := bsearch16(isGraphic, )return < len(isGraphic) && == isGraphic[]}