package syntax
import (
)
type Regexp struct {
Op Op
Flags Flags
Sub []*Regexp
Sub0 [1]*Regexp
Rune []rune
Rune0 [2]rune
Min, Max int
Cap int
Name string
}
type Op uint8
const (
OpNoMatch Op = 1 + iota
OpEmptyMatch
OpLiteral
OpCharClass
OpAnyCharNotNL
OpAnyChar
OpBeginLine
OpEndLine
OpBeginText
OpEndText
OpWordBoundary
OpNoWordBoundary
OpCapture
OpStar
OpPlus
OpQuest
OpRepeat
OpConcat
OpAlternate
)
const opPseudo Op = 128
func ( *Regexp) ( *Regexp) bool {
if == nil || == nil {
return ==
}
if .Op != .Op {
return false
}
switch .Op {
case OpEndText:
if .Flags&WasDollar != .Flags&WasDollar {
return false
}
case OpLiteral, OpCharClass:
if len(.Rune) != len(.Rune) {
return false
}
for , := range .Rune {
if != .Rune[] {
return false
}
}
case OpAlternate, OpConcat:
if len(.Sub) != len(.Sub) {
return false
}
for , := range .Sub {
if !.(.Sub[]) {
return false
}
}
case OpStar, OpPlus, OpQuest:
if .Flags&NonGreedy != .Flags&NonGreedy || !.Sub[0].(.Sub[0]) {
return false
}
case OpRepeat:
if .Flags&NonGreedy != .Flags&NonGreedy || .Min != .Min || .Max != .Max || !.Sub[0].(.Sub[0]) {
return false
}
case OpCapture:
if .Cap != .Cap || .Name != .Name || !.Sub[0].(.Sub[0]) {
return false
}
}
return true
}
func ( *strings.Builder, *Regexp) {
switch .Op {
default:
.WriteString("<invalid op" + strconv.Itoa(int(.Op)) + ">")
case OpNoMatch:
.WriteString(`[^\x00-\x{10FFFF}]`)
case OpEmptyMatch:
.WriteString(`(?:)`)
case OpLiteral:
if .Flags&FoldCase != 0 {
.WriteString(`(?i:`)
}
for , := range .Rune {
escape(, , false)
}
if .Flags&FoldCase != 0 {
.WriteString(`)`)
}
case OpCharClass:
if len(.Rune)%2 != 0 {
.WriteString(`[invalid char class]`)
break
}
.WriteRune('[')
if len(.Rune) == 0 {
.WriteString(`^\x00-\x{10FFFF}`)
} else if .Rune[0] == 0 && .Rune[len(.Rune)-1] == unicode.MaxRune && len(.Rune) > 2 {
.WriteRune('^')
for := 1; < len(.Rune)-1; += 2 {
, := .Rune[]+1, .Rune[+1]-1
escape(, , == '-')
if != {
.WriteRune('-')
escape(, , == '-')
}
}
} else {
for := 0; < len(.Rune); += 2 {
, := .Rune[], .Rune[+1]
escape(, , == '-')
if != {
.WriteRune('-')
escape(, , == '-')
}
}
}
.WriteRune(']')
case OpAnyCharNotNL:
.WriteString(`(?-s:.)`)
case OpAnyChar:
.WriteString(`(?s:.)`)
case OpBeginLine:
.WriteString(`(?m:^)`)
case OpEndLine:
.WriteString(`(?m:$)`)
case OpBeginText:
.WriteString(`\A`)
case OpEndText:
if .Flags&WasDollar != 0 {
.WriteString(`(?-m:$)`)
} else {
.WriteString(`\z`)
}
case OpWordBoundary:
.WriteString(`\b`)
case OpNoWordBoundary:
.WriteString(`\B`)
case OpCapture:
if .Name != "" {
.WriteString(`(?P<`)
.WriteString(.Name)
.WriteRune('>')
} else {
.WriteRune('(')
}
if .Sub[0].Op != OpEmptyMatch {
(, .Sub[0])
}
.WriteRune(')')
case OpStar, OpPlus, OpQuest, OpRepeat:
if := .Sub[0]; .Op > OpCapture || .Op == OpLiteral && len(.Rune) > 1 {
.WriteString(`(?:`)
(, )
.WriteString(`)`)
} else {
(, )
}
switch .Op {
case OpStar:
.WriteRune('*')
case OpPlus:
.WriteRune('+')
case OpQuest:
.WriteRune('?')
case OpRepeat:
.WriteRune('{')
.WriteString(strconv.Itoa(.Min))
if .Max != .Min {
.WriteRune(',')
if .Max >= 0 {
.WriteString(strconv.Itoa(.Max))
}
}
.WriteRune('}')
}
if .Flags&NonGreedy != 0 {
.WriteRune('?')
}
case OpConcat:
for , := range .Sub {
if .Op == OpAlternate {
.WriteString(`(?:`)
(, )
.WriteString(`)`)
} else {
(, )
}
}
case OpAlternate:
for , := range .Sub {
if > 0 {
.WriteRune('|')
}
(, )
}
}
}
func ( *Regexp) () string {
var strings.Builder
writeRegexp(&, )
return .String()
}
const meta = `\.+*?()|[]{}^$`
func ( *strings.Builder, rune, bool) {
if unicode.IsPrint() {
if strings.ContainsRune(meta, ) || {
.WriteRune('\\')
}
.WriteRune()
return
}
switch {
case '\a':
.WriteString(`\a`)
case '\f':
.WriteString(`\f`)
case '\n':
.WriteString(`\n`)
case '\r':
.WriteString(`\r`)
case '\t':
.WriteString(`\t`)
case '\v':
.WriteString(`\v`)
default:
if < 0x100 {
.WriteString(`\x`)
:= strconv.FormatInt(int64(), 16)
if len() == 1 {
.WriteRune('0')
}
.WriteString()
break
}
.WriteString(`\x{`)
.WriteString(strconv.FormatInt(int64(), 16))
.WriteString(`}`)
}
}
func ( *Regexp) () int {
:= 0
if .Op == OpCapture {
= .Cap
}
for , := range .Sub {
if := .(); < {
=
}
}
return
}
func ( *Regexp) () []string {
:= make([]string, .MaxCap()+1)
.capNames()
return
}
func ( *Regexp) ( []string) {
if .Op == OpCapture {
[.Cap] = .Name
}
for , := range .Sub {
.()
}
}