goProject/trunk/goutil/xmlUtil/gxpath/internal/parse/parse.go
皮蛋13361098506 1b77f62820 初始化项目
2025-01-06 16:01:02 +08:00

448 lines
10 KiB
Go

// https://www.w3.org/TR/xpath/
package parse
import "fmt"
type parser struct {
r *scanner
d int
}
// newOperatorNode returns new operator node OperatorNode.
func newOperatorNode(op string, left, right Node) Node {
return &OperatorNode{NodeType: NodeOperator, Op: op, Left: left, Right: right}
}
// newOperand returns new constant operand node OperandNode.
func newOperandNode(v interface{}) Node {
return &OperandNode{NodeType: NodeConstantOperand, Val: v}
}
// newAxisNode returns new axis node AxisNode.
func newAxisNode(axeTyp, localName, prefix, prop string, n Node) Node {
return &AxisNode{
NodeType: NodeAxis,
LocalName: localName,
Prefix: prefix,
AxeType: axeTyp,
Prop: prop,
Input: n,
}
}
// newVariableNode returns new variable node VariableNode.
func newVariableNode(prefix, name string) Node {
return &VariableNode{NodeType: NodeVariable, Name: name, Prefix: prefix}
}
// newFilterNode returns a new filter node FilterNode.
func newFilterNode(n, m Node) Node {
return &FilterNode{NodeType: NodeFilter, Input: n, Condition: m}
}
// newRootNode returns a root node.
func newRootNode(s string) Node {
return &RootNode{NodeType: NodeRoot, slash: s}
}
// newFunctionNode returns function call node.
func newFunctionNode(name, prefix string, args []Node) Node {
return &FunctionNode{NodeType: NodeFunction, Prefix: prefix, FuncName: name, Args: args}
}
// testOp reports whether current item name is an operand op.
func testOp(r *scanner, op string) bool {
return r.typ == itemName && r.prefix == "" && r.name == op
}
func isPrimaryExpr(r *scanner) bool {
switch r.typ {
case itemString, itemNumber, itemDollar, itemLParens:
return true
case itemName:
return r.canBeFunc && !isNodeType(r)
}
return false
}
func isNodeType(r *scanner) bool {
switch r.name {
case "node", "text", "processing-instruction", "comment":
return r.prefix == ""
}
return false
}
func isStep(item itemType) bool {
switch item {
case itemDot, itemDotDot, itemAt, itemAxe, itemStar, itemName:
return true
}
return false
}
func checkItem(r *scanner, typ itemType) {
if r.typ != typ {
panic(fmt.Sprintf("%s has an invalid token", r.text))
}
}
// parseExpression parsing the expression with input Node n.
func (p *parser) parseExpression(n Node) Node {
if p.d = p.d + 1; p.d > 200 {
panic("the xpath query is too complex(depth > 200)")
}
n = p.parseOrExpr(n)
p.d--
return n
}
// next scanning next item on forward.
func (p *parser) next() bool {
return p.r.nextItem()
}
func (p *parser) skipItem(typ itemType) {
checkItem(p.r, typ)
p.next()
}
// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
func (p *parser) parseOrExpr(n Node) Node {
opnd := p.parseAndExpr(n)
for {
if !testOp(p.r, "or") {
break
}
p.next()
opnd = newOperatorNode("or", opnd, p.parseAndExpr(n))
}
return opnd
}
// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
func (p *parser) parseAndExpr(n Node) Node {
opnd := p.parseEqualityExpr(n)
for {
if !testOp(p.r, "and") {
break
}
p.next()
opnd = newOperatorNode("and", opnd, p.parseEqualityExpr(n))
}
return opnd
}
// EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr
func (p *parser) parseEqualityExpr(n Node) Node {
opnd := p.parseRelationalExpr(n)
Loop:
for {
var op string
switch p.r.typ {
case itemEq:
op = "="
case itemNe:
op = "!="
default:
break Loop
}
p.next()
opnd = newOperatorNode(op, opnd, p.parseRelationalExpr(n))
}
return opnd
}
// RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr
// | RelationalExpr '<=' AdditiveExpr
// | RelationalExpr '>=' AdditiveExpr
func (p *parser) parseRelationalExpr(n Node) Node {
opnd := p.parseAdditiveExpr(n)
Loop:
for {
var op string
switch p.r.typ {
case itemLt:
op = "<"
case itemGt:
op = ">"
case itemLe:
op = "<="
case itemGe:
op = ">="
default:
break Loop
}
p.next()
opnd = newOperatorNode(op, opnd, p.parseAdditiveExpr(n))
}
return opnd
}
// AdditiveExpr ::= MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr
func (p *parser) parseAdditiveExpr(n Node) Node {
opnd := p.parseMultiplicativeExpr(n)
Loop:
for {
var op string
switch p.r.typ {
case itemPlus:
op = "+"
case itemMinus:
op = "-"
default:
break Loop
}
p.next()
opnd = newOperatorNode(op, opnd, p.parseMultiplicativeExpr(n))
}
return opnd
}
// MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator(*) UnaryExpr
// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr
func (p *parser) parseMultiplicativeExpr(n Node) Node {
opnd := p.parseUnaryExpr(n)
Loop:
for {
var op string
if p.r.typ == itemStar {
op = "*"
} else if testOp(p.r, "div") || testOp(p.r, "mod") {
op = p.r.name
} else {
break Loop
}
p.next()
opnd = newOperatorNode(op, opnd, p.parseUnaryExpr(n))
}
return opnd
}
// UnaryExpr ::= UnionExpr | '-' UnaryExpr
func (p *parser) parseUnaryExpr(n Node) Node {
minus := false
// ignore '-' sequence
for p.r.typ == itemMinus {
p.next()
minus = !minus
}
opnd := p.parseUnionExpr(n)
if minus {
opnd = newOperatorNode("*", opnd, newOperandNode(float64(-1)))
}
return opnd
}
// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
func (p *parser) parseUnionExpr(n Node) Node {
opnd := p.parsePathExpr(n)
Loop:
for {
if p.r.typ != itemUnion {
break Loop
}
p.next()
opnd2 := p.parsePathExpr(n)
// Checking the node type that must be is node set type?
opnd = newOperatorNode("|", opnd, opnd2)
}
return opnd
}
// PathExpr ::= LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath | FilterExpr '//' RelativeLocationPath
func (p *parser) parsePathExpr(n Node) Node {
var opnd Node
if isPrimaryExpr(p.r) {
opnd = p.parseFilterExpr(n)
switch p.r.typ {
case itemSlash:
p.next()
opnd = p.parseRelativeLocationPath(opnd)
case itemSlashSlash:
p.next()
opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd))
}
} else {
opnd = p.parseLocationPath(nil)
}
return opnd
}
// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
func (p *parser) parseFilterExpr(n Node) Node {
opnd := p.parsePrimaryExpr(n)
if p.r.typ == itemLBracket {
opnd = newFilterNode(opnd, p.parsePredicate(opnd))
}
return opnd
}
// Predicate ::= '[' PredicateExpr ']'
func (p *parser) parsePredicate(n Node) Node {
p.skipItem(itemLBracket)
opnd := p.parseExpression(n)
p.skipItem(itemRBracket)
return opnd
}
// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
func (p *parser) parseLocationPath(n Node) (opnd Node) {
switch p.r.typ {
case itemSlash:
p.next()
opnd = newRootNode("/")
if isStep(p.r.typ) {
opnd = p.parseRelativeLocationPath(opnd) // ?? child:: or self ??
}
case itemSlashSlash:
p.next()
opnd = newRootNode("//")
opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd))
default:
opnd = p.parseRelativeLocationPath(n)
}
return opnd
}
// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | AbbreviatedRelativeLocationPath
func (p *parser) parseRelativeLocationPath(n Node) Node {
opnd := n
Loop:
for {
opnd = p.parseStep(opnd)
switch p.r.typ {
case itemSlashSlash:
p.next()
opnd = newAxisNode("descendant-or-self", "", "", "", opnd)
case itemSlash:
p.next()
default:
break Loop
}
}
return opnd
}
// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
func (p *parser) parseStep(n Node) Node {
axeTyp := "child" // default axes value.
if p.r.typ == itemDot || p.r.typ == itemDotDot {
if p.r.typ == itemDot {
axeTyp = "self"
} else {
axeTyp = "parent"
}
p.next()
return newAxisNode(axeTyp, "", "", "", n)
}
switch p.r.typ {
case itemAt:
p.next()
axeTyp = "attribute"
case itemAxe:
axeTyp = p.r.name
p.next()
}
opnd := p.parseNodeTest(n, axeTyp)
if p.r.typ == itemLBracket {
opnd = newFilterNode(opnd, p.parsePredicate(opnd))
}
return opnd
}
// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
func (p *parser) parseNodeTest(n Node, axeTyp string) (opnd Node) {
switch p.r.typ {
case itemName:
if p.r.canBeFunc && isNodeType(p.r) {
var prop string
switch p.r.name {
case "comment", "text", "processing-instruction", "node":
prop = p.r.name
}
var name string
p.next()
p.skipItem(itemLParens)
if prop == "processing-instruction" && p.r.typ != itemRParens {
checkItem(p.r, itemString)
name = p.r.strval
p.next()
}
p.skipItem(itemRParens)
opnd = newAxisNode(axeTyp, name, "", prop, n)
} else {
prefix := p.r.prefix
name := p.r.name
p.next()
if p.r.name == "*" {
name = ""
}
opnd = newAxisNode(axeTyp, name, prefix, "", n)
}
case itemStar:
opnd = newAxisNode(axeTyp, "", "", "", n)
p.next()
default:
panic("expression must evaluate to a node-set")
}
return opnd
}
// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
func (p *parser) parsePrimaryExpr(n Node) (opnd Node) {
switch p.r.typ {
case itemString:
opnd = newOperandNode(p.r.strval)
p.next()
case itemNumber:
opnd = newOperandNode(p.r.numval)
p.next()
case itemDollar:
p.next()
checkItem(p.r, itemName)
opnd = newVariableNode(p.r.prefix, p.r.name)
p.next()
case itemLParens:
p.next()
opnd = p.parseExpression(n)
p.skipItem(itemRParens)
case itemName:
if p.r.canBeFunc && !isNodeType(p.r) {
opnd = p.parseMethod(nil)
}
}
return opnd
}
// FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')'
func (p *parser) parseMethod(n Node) Node {
var args []Node
name := p.r.name
prefix := p.r.prefix
p.skipItem(itemName)
p.skipItem(itemLParens)
if p.r.typ != itemRParens {
for {
args = append(args, p.parseExpression(n))
if p.r.typ == itemRParens {
break
}
p.skipItem(itemComma)
}
}
p.skipItem(itemRParens)
return newFunctionNode(name, prefix, args)
}
// Parse parsing the XPath express string expr and returns a tree Node.
func Parse(expr string) Node {
r := &scanner{text: expr}
r.nextChar()
r.nextItem()
p := &parser{r: r}
return p.parseExpression(nil)
}