448 lines
10 KiB
Go
448 lines
10 KiB
Go
// https://www.w3.org/TR/xpath/
|
|
|
|
package parse
|
|
|
|
import "fmt"
|
|
|
|
type parser struct {
|
|
r *scanner
|
|
d int
|
|
}
|
|
|
|
// newOperatorNode returns new operator node OperatorNode.
|
|
func newOperatorNode(op string, left, right Node) Node {
|
|
return &OperatorNode{NodeType: NodeOperator, Op: op, Left: left, Right: right}
|
|
}
|
|
|
|
// newOperand returns new constant operand node OperandNode.
|
|
func newOperandNode(v interface{}) Node {
|
|
return &OperandNode{NodeType: NodeConstantOperand, Val: v}
|
|
}
|
|
|
|
// newAxisNode returns new axis node AxisNode.
|
|
func newAxisNode(axeTyp, localName, prefix, prop string, n Node) Node {
|
|
return &AxisNode{
|
|
NodeType: NodeAxis,
|
|
LocalName: localName,
|
|
Prefix: prefix,
|
|
AxeType: axeTyp,
|
|
Prop: prop,
|
|
Input: n,
|
|
}
|
|
}
|
|
|
|
// newVariableNode returns new variable node VariableNode.
|
|
func newVariableNode(prefix, name string) Node {
|
|
return &VariableNode{NodeType: NodeVariable, Name: name, Prefix: prefix}
|
|
}
|
|
|
|
// newFilterNode returns a new filter node FilterNode.
|
|
func newFilterNode(n, m Node) Node {
|
|
return &FilterNode{NodeType: NodeFilter, Input: n, Condition: m}
|
|
}
|
|
|
|
// newRootNode returns a root node.
|
|
func newRootNode(s string) Node {
|
|
return &RootNode{NodeType: NodeRoot, slash: s}
|
|
}
|
|
|
|
// newFunctionNode returns function call node.
|
|
func newFunctionNode(name, prefix string, args []Node) Node {
|
|
return &FunctionNode{NodeType: NodeFunction, Prefix: prefix, FuncName: name, Args: args}
|
|
}
|
|
|
|
// testOp reports whether current item name is an operand op.
|
|
func testOp(r *scanner, op string) bool {
|
|
return r.typ == itemName && r.prefix == "" && r.name == op
|
|
}
|
|
|
|
func isPrimaryExpr(r *scanner) bool {
|
|
switch r.typ {
|
|
case itemString, itemNumber, itemDollar, itemLParens:
|
|
return true
|
|
case itemName:
|
|
return r.canBeFunc && !isNodeType(r)
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isNodeType(r *scanner) bool {
|
|
switch r.name {
|
|
case "node", "text", "processing-instruction", "comment":
|
|
return r.prefix == ""
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isStep(item itemType) bool {
|
|
switch item {
|
|
case itemDot, itemDotDot, itemAt, itemAxe, itemStar, itemName:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func checkItem(r *scanner, typ itemType) {
|
|
if r.typ != typ {
|
|
panic(fmt.Sprintf("%s has an invalid token", r.text))
|
|
}
|
|
}
|
|
|
|
// parseExpression parsing the expression with input Node n.
|
|
func (p *parser) parseExpression(n Node) Node {
|
|
if p.d = p.d + 1; p.d > 200 {
|
|
panic("the xpath query is too complex(depth > 200)")
|
|
}
|
|
n = p.parseOrExpr(n)
|
|
p.d--
|
|
return n
|
|
}
|
|
|
|
// next scanning next item on forward.
|
|
func (p *parser) next() bool {
|
|
return p.r.nextItem()
|
|
}
|
|
|
|
func (p *parser) skipItem(typ itemType) {
|
|
checkItem(p.r, typ)
|
|
p.next()
|
|
}
|
|
|
|
// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
|
|
func (p *parser) parseOrExpr(n Node) Node {
|
|
opnd := p.parseAndExpr(n)
|
|
for {
|
|
if !testOp(p.r, "or") {
|
|
break
|
|
}
|
|
p.next()
|
|
opnd = newOperatorNode("or", opnd, p.parseAndExpr(n))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
|
|
func (p *parser) parseAndExpr(n Node) Node {
|
|
opnd := p.parseEqualityExpr(n)
|
|
for {
|
|
if !testOp(p.r, "and") {
|
|
break
|
|
}
|
|
p.next()
|
|
opnd = newOperatorNode("and", opnd, p.parseEqualityExpr(n))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr
|
|
func (p *parser) parseEqualityExpr(n Node) Node {
|
|
opnd := p.parseRelationalExpr(n)
|
|
Loop:
|
|
for {
|
|
var op string
|
|
switch p.r.typ {
|
|
case itemEq:
|
|
op = "="
|
|
case itemNe:
|
|
op = "!="
|
|
default:
|
|
break Loop
|
|
}
|
|
p.next()
|
|
opnd = newOperatorNode(op, opnd, p.parseRelationalExpr(n))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr
|
|
// | RelationalExpr '<=' AdditiveExpr
|
|
// | RelationalExpr '>=' AdditiveExpr
|
|
func (p *parser) parseRelationalExpr(n Node) Node {
|
|
opnd := p.parseAdditiveExpr(n)
|
|
Loop:
|
|
for {
|
|
var op string
|
|
switch p.r.typ {
|
|
case itemLt:
|
|
op = "<"
|
|
case itemGt:
|
|
op = ">"
|
|
case itemLe:
|
|
op = "<="
|
|
case itemGe:
|
|
op = ">="
|
|
default:
|
|
break Loop
|
|
}
|
|
p.next()
|
|
opnd = newOperatorNode(op, opnd, p.parseAdditiveExpr(n))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// AdditiveExpr ::= MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr
|
|
func (p *parser) parseAdditiveExpr(n Node) Node {
|
|
opnd := p.parseMultiplicativeExpr(n)
|
|
Loop:
|
|
for {
|
|
var op string
|
|
switch p.r.typ {
|
|
case itemPlus:
|
|
op = "+"
|
|
case itemMinus:
|
|
op = "-"
|
|
default:
|
|
break Loop
|
|
}
|
|
p.next()
|
|
opnd = newOperatorNode(op, opnd, p.parseMultiplicativeExpr(n))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator(*) UnaryExpr
|
|
// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr
|
|
func (p *parser) parseMultiplicativeExpr(n Node) Node {
|
|
opnd := p.parseUnaryExpr(n)
|
|
Loop:
|
|
for {
|
|
var op string
|
|
if p.r.typ == itemStar {
|
|
op = "*"
|
|
} else if testOp(p.r, "div") || testOp(p.r, "mod") {
|
|
op = p.r.name
|
|
} else {
|
|
break Loop
|
|
}
|
|
p.next()
|
|
opnd = newOperatorNode(op, opnd, p.parseUnaryExpr(n))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// UnaryExpr ::= UnionExpr | '-' UnaryExpr
|
|
func (p *parser) parseUnaryExpr(n Node) Node {
|
|
minus := false
|
|
// ignore '-' sequence
|
|
for p.r.typ == itemMinus {
|
|
p.next()
|
|
minus = !minus
|
|
}
|
|
opnd := p.parseUnionExpr(n)
|
|
if minus {
|
|
opnd = newOperatorNode("*", opnd, newOperandNode(float64(-1)))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
|
|
func (p *parser) parseUnionExpr(n Node) Node {
|
|
opnd := p.parsePathExpr(n)
|
|
Loop:
|
|
for {
|
|
if p.r.typ != itemUnion {
|
|
break Loop
|
|
}
|
|
p.next()
|
|
opnd2 := p.parsePathExpr(n)
|
|
// Checking the node type that must be is node set type?
|
|
opnd = newOperatorNode("|", opnd, opnd2)
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// PathExpr ::= LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath | FilterExpr '//' RelativeLocationPath
|
|
func (p *parser) parsePathExpr(n Node) Node {
|
|
var opnd Node
|
|
if isPrimaryExpr(p.r) {
|
|
opnd = p.parseFilterExpr(n)
|
|
switch p.r.typ {
|
|
case itemSlash:
|
|
p.next()
|
|
opnd = p.parseRelativeLocationPath(opnd)
|
|
case itemSlashSlash:
|
|
p.next()
|
|
opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd))
|
|
}
|
|
} else {
|
|
opnd = p.parseLocationPath(nil)
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
|
|
func (p *parser) parseFilterExpr(n Node) Node {
|
|
opnd := p.parsePrimaryExpr(n)
|
|
if p.r.typ == itemLBracket {
|
|
opnd = newFilterNode(opnd, p.parsePredicate(opnd))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// Predicate ::= '[' PredicateExpr ']'
|
|
func (p *parser) parsePredicate(n Node) Node {
|
|
p.skipItem(itemLBracket)
|
|
opnd := p.parseExpression(n)
|
|
p.skipItem(itemRBracket)
|
|
return opnd
|
|
}
|
|
|
|
// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
|
|
func (p *parser) parseLocationPath(n Node) (opnd Node) {
|
|
switch p.r.typ {
|
|
case itemSlash:
|
|
p.next()
|
|
opnd = newRootNode("/")
|
|
if isStep(p.r.typ) {
|
|
opnd = p.parseRelativeLocationPath(opnd) // ?? child:: or self ??
|
|
}
|
|
case itemSlashSlash:
|
|
p.next()
|
|
opnd = newRootNode("//")
|
|
opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd))
|
|
default:
|
|
opnd = p.parseRelativeLocationPath(n)
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | AbbreviatedRelativeLocationPath
|
|
func (p *parser) parseRelativeLocationPath(n Node) Node {
|
|
opnd := n
|
|
Loop:
|
|
for {
|
|
opnd = p.parseStep(opnd)
|
|
switch p.r.typ {
|
|
case itemSlashSlash:
|
|
p.next()
|
|
opnd = newAxisNode("descendant-or-self", "", "", "", opnd)
|
|
case itemSlash:
|
|
p.next()
|
|
default:
|
|
break Loop
|
|
}
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
|
|
func (p *parser) parseStep(n Node) Node {
|
|
axeTyp := "child" // default axes value.
|
|
if p.r.typ == itemDot || p.r.typ == itemDotDot {
|
|
if p.r.typ == itemDot {
|
|
axeTyp = "self"
|
|
} else {
|
|
axeTyp = "parent"
|
|
}
|
|
p.next()
|
|
return newAxisNode(axeTyp, "", "", "", n)
|
|
}
|
|
switch p.r.typ {
|
|
case itemAt:
|
|
p.next()
|
|
axeTyp = "attribute"
|
|
case itemAxe:
|
|
axeTyp = p.r.name
|
|
p.next()
|
|
}
|
|
opnd := p.parseNodeTest(n, axeTyp)
|
|
if p.r.typ == itemLBracket {
|
|
opnd = newFilterNode(opnd, p.parsePredicate(opnd))
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
|
|
func (p *parser) parseNodeTest(n Node, axeTyp string) (opnd Node) {
|
|
switch p.r.typ {
|
|
case itemName:
|
|
if p.r.canBeFunc && isNodeType(p.r) {
|
|
var prop string
|
|
switch p.r.name {
|
|
case "comment", "text", "processing-instruction", "node":
|
|
prop = p.r.name
|
|
}
|
|
var name string
|
|
p.next()
|
|
p.skipItem(itemLParens)
|
|
if prop == "processing-instruction" && p.r.typ != itemRParens {
|
|
checkItem(p.r, itemString)
|
|
name = p.r.strval
|
|
p.next()
|
|
}
|
|
p.skipItem(itemRParens)
|
|
opnd = newAxisNode(axeTyp, name, "", prop, n)
|
|
} else {
|
|
prefix := p.r.prefix
|
|
name := p.r.name
|
|
p.next()
|
|
if p.r.name == "*" {
|
|
name = ""
|
|
}
|
|
opnd = newAxisNode(axeTyp, name, prefix, "", n)
|
|
}
|
|
case itemStar:
|
|
opnd = newAxisNode(axeTyp, "", "", "", n)
|
|
p.next()
|
|
default:
|
|
panic("expression must evaluate to a node-set")
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
|
|
func (p *parser) parsePrimaryExpr(n Node) (opnd Node) {
|
|
switch p.r.typ {
|
|
case itemString:
|
|
opnd = newOperandNode(p.r.strval)
|
|
p.next()
|
|
case itemNumber:
|
|
opnd = newOperandNode(p.r.numval)
|
|
p.next()
|
|
case itemDollar:
|
|
p.next()
|
|
checkItem(p.r, itemName)
|
|
opnd = newVariableNode(p.r.prefix, p.r.name)
|
|
p.next()
|
|
case itemLParens:
|
|
p.next()
|
|
opnd = p.parseExpression(n)
|
|
p.skipItem(itemRParens)
|
|
case itemName:
|
|
if p.r.canBeFunc && !isNodeType(p.r) {
|
|
opnd = p.parseMethod(nil)
|
|
}
|
|
}
|
|
return opnd
|
|
}
|
|
|
|
// FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')'
|
|
func (p *parser) parseMethod(n Node) Node {
|
|
var args []Node
|
|
name := p.r.name
|
|
prefix := p.r.prefix
|
|
|
|
p.skipItem(itemName)
|
|
p.skipItem(itemLParens)
|
|
if p.r.typ != itemRParens {
|
|
for {
|
|
args = append(args, p.parseExpression(n))
|
|
if p.r.typ == itemRParens {
|
|
break
|
|
}
|
|
p.skipItem(itemComma)
|
|
}
|
|
}
|
|
p.skipItem(itemRParens)
|
|
return newFunctionNode(name, prefix, args)
|
|
}
|
|
|
|
// Parse parsing the XPath express string expr and returns a tree Node.
|
|
func Parse(expr string) Node {
|
|
r := &scanner{text: expr}
|
|
r.nextChar()
|
|
r.nextItem()
|
|
p := &parser{r: r}
|
|
return p.parseExpression(nil)
|
|
}
|