lexmachine/machines/dfa_machine.go
zhouzhihong 879a86b6b7 新特性 (#1)
1. 增加utf8文本的行数和列数计算字段

Co-authored-by: zhouzhihong <zhouzhihong@ijunhai.com>
Reviewed-on: https://gitea.xintech.co/zhouzhihong/lexmachine/pulls/1
2022-08-25 14:41:32 +08:00

155 lines
3.8 KiB
Go

package machines
// DFATrans represents a Deterministic Finite Automatons state transition table
type DFATrans [][256]int
// DFAAccepting represents maps from accepting DFA states to match identifiers.
// These both identify which states are accepting states and which matches they
// belong to from the AST.
type DFAAccepting map[int]int
type lineCol struct {
line, col int
}
// Compute the line and column of a particular index inside of a byte slice.
func mapLineCols(text []byte) []lineCol {
// text := []rune(string(t))
m := make([]lineCol, len(text))
line := 1
col := 0
for i := 0; i < len(text); i++ {
if text[i] == '\n' {
col = 0
line++
} else {
col++
}
m[i] = lineCol{line: line, col: col}
}
return m
}
// DFALexerEngine does the actual tokenization of the byte slice text using the
// DFA state machine. If the lexing process fails the Scanner will return
// an UnconsumedInput error.
func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAccepting, text []byte) Scanner {
lineCols := mapLineCols(text)
done := false
matchID := -1
matchTC := -1
var scan Scanner
scan = func(tc int) (int, *Match, error, Scanner) {
if done && tc == len(text) {
return tc, nil, nil, nil
}
startTC := tc
if tc < matchTC {
// we back-tracked so reset the last matchTC
matchTC = -1
} else if tc == matchTC {
// the caller did not reset the tc, we are where we left
} else if matchTC != -1 && tc > matchTC {
// we skipped text
matchTC = tc
}
state := startState
for ; tc < len(text) && state != errorState; tc++ {
if match, has := accepting[state]; has {
matchID = match
matchTC = tc
}
state = trans[state][text[tc]]
if state == errorState && matchID > -1 {
startLC := lineCols[startTC]
endLC := lineCols[matchTC-1]
match := &Match{
PC: matchID,
TC: startTC,
StartLine: startLC.line,
StartColumn: startLC.col,
EndLine: endLC.line,
EndColumn: endLC.col,
Bytes: text[startTC:matchTC],
}
if matchTC == startTC {
err := &EmptyMatchError{
MatchID: matchID,
TC: tc,
Line: startLC.line,
Column: startLC.col,
}
return startTC, nil, err, scan
}
matchID = -1
return matchTC, match, nil, scan
}
}
if match, has := accepting[state]; has {
matchID = match
matchTC = tc
}
if startTC <= len(text) && matchID > -1 && matchTC == startTC {
var startLC lineCol
if startTC < len(text) {
startLC = lineCols[startTC]
}
err := &EmptyMatchError{
MatchID: matchID,
TC: tc,
Line: startLC.line,
Column: startLC.col,
}
matchID = -1
return startTC, nil, err, scan
}
if startTC < len(text) && matchTC <= len(text) && matchID > -1 {
startLC := lineCols[startTC]
endLC := lineCols[matchTC-1]
match := &Match{
PC: matchID,
TC: startTC,
StartLine: startLC.line,
StartColumn: startLC.col,
EndLine: endLC.line,
EndColumn: endLC.col,
Bytes: text[startTC:matchTC],
}
matchID = -1
return matchTC, match, nil, scan
}
if matchTC != len(text) && startTC >= len(text) {
// the user has moved us farther than the text. Assume that was
// the intent and return EOF.
return tc, nil, nil, nil
} else if matchTC != len(text) {
done = true
if matchTC == -1 {
matchTC = 0
}
startLC := lineCols[startTC]
etc := tc
var endLC lineCol
if etc >= len(lineCols) {
endLC = lineCols[len(lineCols)-1]
} else {
endLC = lineCols[etc]
}
err := &UnconsumedInput{
StartTC: startTC,
FailTC: etc,
StartLine: startLC.line,
StartColumn: startLC.col,
FailLine: endLC.line,
FailColumn: endLC.col,
Text: text,
}
return tc, nil, err, scan
} else {
return tc, nil, nil, nil
}
}
return scan
}