ast_parser.go 3.9 KB
package astexpr

import (
	"errors"
	"fmt"
	"strings"
)

var precedence = map[string]int{
	"+": 20, "-": 20, "*": 40, "/": 40, "%": 40, "^": 60,
	"=": 10, ">": 10, "<": 10, "<=": 10, ">=": 10, "&": 40,
}

const (
	// Identifier 标识符 e.g.函数名、表字段
	Identifier = iota
	// Literal 文字 e.g. 50
	Literal
	// Operator 计算操作 e.g. + - * /
	Operator
	// COMMA 命令, e.g. (
	COMMA
	// CompareOperator 比较操作 e.g. < = >
	CompareOperator
	// StringArgs 字符串参数
	StringArgs
)

type Token struct {
	// raw characters
	Tok string
	// type with Literal/Operator
	Type,
	Flag int

	Offset int
}

type Parser struct {
	Source      string
	SourceRunes []rune
	ch          rune
	offset      int

	err error
}

func ParseToken(s string) ([]*Token, error) {
	p := &Parser{
		Source:      s,
		SourceRunes: []rune(s),
		err:         nil,
		//ch:     s[0],
	}
	p.ch = p.SourceRunes[0]
	toks := p.parse()
	if p.err != nil {
		return nil, p.err
	}
	return toks, nil
}

func (p *Parser) parse() []*Token {
	toks := make([]*Token, 0)
	for {
		tok := p.nextTok()
		if tok == nil {
			break
		}
		toks = append(toks, tok)
	}
	return toks
}

func (p *Parser) nextTok() *Token {
	if p.offset >= len(p.SourceRunes) || p.err != nil {
		return nil
	}
	var err error
	for p.isWhitespace(p.ch) && err == nil {
		err = p.nextCh()
	}
	start := p.offset
	var tok *Token
	switch p.ch {
	case
		'(',
		')',
		'+',
		'-',
		'*',
		'/',
		'^',
		'%',
		'&':
		tok = &Token{
			Tok:  string(p.ch),
			Type: Operator,
		}
		tok.Offset = start
		err = p.nextCh()
	case
		'>',
		'<',
		'=':
		if p.isCompareWordChar(p.ch) {
			for p.isCompareWordChar(p.ch) && p.nextCh() == nil {
			}
			tok = &Token{
				Tok:  string(p.SourceRunes[start:p.offset]),
				Type: CompareOperator,
			}
			tok.Offset = start
		} else if p.ch != ' ' {
			s := fmt.Sprintf("symbol error: unknown '%v', pos [%v:]\n%s",
				string(p.ch),
				start,
				ErrPos(p.Source, start))
			p.err = errors.New(s)
		}
	case
		'0',
		'1',
		'2',
		'3',
		'4',
		'5',
		'6',
		'7',
		'8',
		'9':
		for p.isDigitNum(p.ch) && p.nextCh() == nil {
			if (p.ch == '-' || p.ch == '+') && p.SourceRunes[p.offset-1] != 'e' {
				break
			}
		}
		tok = &Token{
			Tok:  strings.ReplaceAll(string(p.SourceRunes[start:p.offset]), "_", ""),
			Type: Literal,
		}
		tok.Offset = start
	case '"':
		for (p.isDigitNum(p.ch) || p.isChar(p.ch) || p.isCompareWordChar(p.ch) || p.ch == '*') && p.nextCh() == nil {
			if p.ch == '"' {
				break
			}
		}
		err = p.nextCh()
		tok = &Token{
			Tok:  string(p.SourceRunes[start:p.offset]),
			Type: StringArgs,
		}
		tok.Offset = start
	case ',':
		tok = &Token{
			Tok:  string(p.ch),
			Type: COMMA,
		}
		tok.Offset = start
		err = p.nextCh()

	default:
		if p.isChar(p.ch) {
			for p.isWordChar(p.ch) && p.nextCh() == nil {
			}
			tok = &Token{
				Tok:  string(p.SourceRunes[start:p.offset]),
				Type: Identifier,
			}
			tok.Offset = start
		} else if p.ch != ' ' {
			s := fmt.Sprintf("symbol error: unknown '%v', pos [%v:]\n%s",
				string(p.ch),
				start,
				ErrPos(p.Source, start))
			p.err = errors.New(s)
		}
	}
	return tok
}

func (p *Parser) nextCh() error {
	p.offset++
	if p.offset < len(p.SourceRunes) {
		p.ch = p.SourceRunes[p.offset]
		return nil
	}
	return errors.New("EOF")
}

func (p *Parser) isWhitespace(c rune) bool {
	return c == ' ' ||
		c == '\t' ||
		c == '\n' ||
		c == '\v' ||
		c == '\f' ||
		c == '\r'
}

func (p *Parser) isDigitNum(c rune) bool {
	return '0' <= c && c <= '9' || c == '.' || c == '_' || c == 'e' || c == '-' || c == '+'
}

func (p *Parser) isChar(c rune) bool {
	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '.' || c == '"' || isChineseCharacter(c) || c == '_' //|| p.isDigitNum(c)
	//判断是汉字
}

func (p *Parser) isWordChar(c rune) bool {
	return p.isChar(c) || '0' <= c && c <= '9'
}

func (p *Parser) isCompareWordChar(c rune) bool {
	return c == '=' || c == '<' || c == '>'
}

func isChineseCharacter(c rune) bool {
	return len([]byte(string(c))) > 2
}