diff --git a/frontend/frontend_test.go b/frontend/frontend_test.go index 22f9c07..d131d4e 100644 --- a/frontend/frontend_test.go +++ b/frontend/frontend_test.go @@ -23,10 +23,7 @@ func TestParse(x *testing.T) { } func tMatch(program inst.Slice, text string, t *test.T) { - expected := []machines.Match{{PC: len(program) - 1, TC: 0, StartLine: 1, StartColumn: 1, EndLine: 1, EndColumn: len(text), Bytes: []byte(text), TSLine: 1, TSColumn: 0, TELine: 1, TEColumn: 1}} - if expected[0].EndColumn == 0 { - expected[0].EndColumn = 1 - } + expected := []machines.Match{{PC: len(program) - 1, TC: 0, Bytes: []byte(text)}} i := 0 scan := machines.LexerEngine(program, []byte(text)) for tc, m, err, scan := scan(0); scan != nil; tc, m, err, scan = scan(tc) { diff --git a/lexer.go b/lexer.go index 939b23b..3e88a78 100644 --- a/lexer.go +++ b/lexer.go @@ -31,8 +31,6 @@ type Token struct { StartColumn int EndLine int EndColumn int - - TSLine, TSColumn, TELine, TEColumn int } // Equals checks the equality of two tokens ignoring the Value field. @@ -111,12 +109,7 @@ type Scanner struct { Text []byte TC int pTC int - sLine int - sColumn int - eLine int - eColumn int - - lpp map[int]lastPostion + lpp map[int]lastPostion } type lastPostion struct { @@ -148,7 +141,7 @@ type lastPostion struct { func (s *Scanner) Next() (tok interface{}, err error, eos bool) { var token interface{} for token == nil { - tc, match, err, scan := s.scan(s.TC) + ntc, match, err, scan := s.scan(s.TC) if scan == nil { return nil, nil, true } else if err != nil { @@ -158,68 +151,51 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) { } s.scan = scan s.pTC = s.TC - s.TC = tc - s.sLine = match.StartLine - s.sColumn = match.StartColumn - s.eLine = match.EndLine - s.eColumn = match.EndColumn - - p := s.pTC - l, c := s.lpp[p].l, s.lpp[p].c - stc := s.TC - len(match.Bytes) - - for { - if s.Text[p] == '\n' { - l++ - c = 0 - } else { - c++ - } - - if p == stc { - match.TSLine = l - match.TSColumn = c - } - - match.TELine = l - match.TEColumn = c - - _, sz := utf8.DecodeRune(s.Text[p:]) - p += sz - if p >= s.TC { - break - } - } - - s.lpp[s.TC] = lastPostion{ - l: l, - c: c, - } + s.TC = ntc pattern := s.lexer.patterns[s.matches[match.PC]] token, err = pattern.action(s, match) + + lpp := s.lpp[s.pTC] + line, col := lpp.l, lpp.c + for i := s.pTC; i < s.TC; { + ch, sz := utf8.DecodeRune(s.Text[i:]) + if ch == rune('\n') { + line++ + col = 0 + } else { + col++ + } + + if (i == match.TC) && token != nil { + token.(*Token).StartLine = line + token.(*Token).StartColumn = col + } + i += sz + } + + if token != nil { + token.(*Token).EndLine = line + token.(*Token).EndColumn = col + } + + s.lpp[s.TC] = lastPostion{l: line, c: col} + if err != nil { return nil, err, false } } + return token, nil, false } // Token is a helper function for constructing a Token type inside of a Action. func (s *Scanner) Token(typ int, value interface{}, m *machines.Match) *Token { return &Token{ - Type: typ, - Value: value, - Lexeme: m.Bytes, - TC: m.TC, - StartLine: m.StartLine, - StartColumn: m.StartColumn, - EndLine: m.EndLine, - EndColumn: m.EndColumn, - TSLine: m.TSLine, - TSColumn: m.TSColumn, - TELine: m.TELine, - TEColumn: m.TEColumn, + Type: typ, + Value: value, + Lexeme: m.Bytes, + TC: m.TC, } } diff --git a/lexer_test.go b/lexer_test.go index 4c8089f..26c09c2 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -77,8 +77,8 @@ func TestSimple(x *testing.T) { } } return nil, - fmt.Errorf("unclosed comment starting at %d, (%d, %d)", - match.TC, match.StartLine, match.StartColumn) + fmt.Errorf("unclosed comment starting at %d", + match.TC) }, ) @@ -95,23 +95,22 @@ func TestSimple(x *testing.T) { `) expected := []*Token{ - {NAME, "name", []byte("name"), 3, 2, 3, 2, 6, 2, 3, 2, 6}, - {EQUALS, nil, []byte("="), 8, 2, 8, 2, 8, 2, 8, 2, 8}, - {NUMBER, 10, []byte("10"), 10, 2, 10, 2, 11, 2, 10, 2, 11}, - {PRINT, nil, []byte("print"), 15, 3, 3, 3, 7, 3, 3, 3, 7}, - {NAME, "name", []byte("name"), 21, 3, 9, 3, 12, 3, 9, 3, 12}, - {PRINT, nil, []byte("print"), 28, 4, 3, 4, 7, 4, 3, 4, 7}, - {NAME, "fred", []byte("fred"), 34, 4, 9, 4, 12, 4, 9, 4, 12}, - {NAME, "name", []byte("name"), 41, 5, 3, 5, 6, 5, 3, 5, 6}, - {EQUALS, nil, []byte("="), 46, 5, 8, 5, 8, 5, 8, 5, 8}, - {NUMBER, 12, []byte("12"), 47, 5, 9, 5, 10, 5, 9, 5, 10}, - {NAME, "printname", []byte("printname"), 112, 9, 11, 9, 19, 9, 11, 9, 19}, - {EQUALS, nil, []byte("="), 122, 9, 21, 9, 21, 9, 21, 9, 21}, - {NUMBER, 13, []byte("13"), 124, 9, 23, 9, 24, 9, 23, 9, 24}, - {PRINT, nil, []byte("print"), 129, 10, 3, 10, 7, 10, 3, 10, 7}, - {NAME, "printname", []byte("printname"), 135, 10, 9, 10, 17, 10, 9, 10, 17}, + {NAME, "name", []byte("name"), 3, 2, 3, 2, 6}, + {EQUALS, nil, []byte("="), 8, 2, 8, 2, 8}, + {NUMBER, 10, []byte("10"), 10, 2, 10, 2, 11}, + {PRINT, nil, []byte("print"), 15, 3, 3, 3, 7}, + {NAME, "name", []byte("name"), 21, 3, 9, 3, 12}, + {PRINT, nil, []byte("print"), 28, 4, 3, 4, 7}, + {NAME, "fred", []byte("fred"), 34, 4, 9, 4, 12}, + {NAME, "name", []byte("name"), 41, 5, 3, 5, 6}, + {EQUALS, nil, []byte("="), 46, 5, 8, 5, 8}, + {NUMBER, 12, []byte("12"), 47, 5, 9, 5, 10}, + {NAME, "printname", []byte("printname"), 112, 9, 11, 9, 19}, + {EQUALS, nil, []byte("="), 122, 9, 21, 9, 21}, + {NUMBER, 13, []byte("13"), 124, 9, 23, 9, 24}, + {PRINT, nil, []byte("print"), 129, 10, 3, 10, 7}, + {NAME, "printname", []byte("printname"), 135, 10, 9, 10, 17}, } - scan := func(lexer *Lexer) { scanner, err := lexer.Scanner(text) if err != nil { diff --git a/machines/dfa_machine.go b/machines/dfa_machine.go index e065f4c..e04b3b8 100644 --- a/machines/dfa_machine.go +++ b/machines/dfa_machine.go @@ -34,7 +34,6 @@ func mapLineCols(text []byte) []lineCol { // DFA state machine. If the lexing process fails the Scanner will return // an UnconsumedInput error. func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAccepting, text []byte) Scanner { - lineCols := mapLineCols(text) done := false matchID := -1 matchTC := -1 @@ -62,23 +61,15 @@ func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAcc } state = trans[state][text[tc]] if state == errorState && matchID > -1 { - startLC := lineCols[startTC] - endLC := lineCols[matchTC-1] match := &Match{ - PC: matchID, - TC: startTC, - StartLine: startLC.line, - StartColumn: startLC.col, - EndLine: endLC.line, - EndColumn: endLC.col, - Bytes: text[startTC:matchTC], + PC: matchID, + TC: startTC, + Bytes: text[startTC:matchTC], } if matchTC == startTC { err := &EmptyMatchError{ MatchID: matchID, - TC: tc, - Line: startLC.line, - Column: startLC.col, + TC: startTC, } return startTC, nil, err, scan } @@ -91,30 +82,19 @@ func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAcc matchTC = tc } if startTC <= len(text) && matchID > -1 && matchTC == startTC { - var startLC lineCol - if startTC < len(text) { - startLC = lineCols[startTC] - } + err := &EmptyMatchError{ MatchID: matchID, - TC: tc, - Line: startLC.line, - Column: startLC.col, + TC: startTC, } matchID = -1 return startTC, nil, err, scan } if startTC < len(text) && matchTC <= len(text) && matchID > -1 { - startLC := lineCols[startTC] - endLC := lineCols[matchTC-1] match := &Match{ - PC: matchID, - TC: startTC, - StartLine: startLC.line, - StartColumn: startLC.col, - EndLine: endLC.line, - EndColumn: endLC.col, - Bytes: text[startTC:matchTC], + PC: matchID, + TC: startTC, + Bytes: text[startTC:matchTC], } matchID = -1 return matchTC, match, nil, scan @@ -128,22 +108,12 @@ func DFALexerEngine(startState, errorState int, trans DFATrans, accepting DFAAcc if matchTC == -1 { matchTC = 0 } - startLC := lineCols[startTC] etc := tc - var endLC lineCol - if etc >= len(lineCols) { - endLC = lineCols[len(lineCols)-1] - } else { - endLC = lineCols[etc] - } + err := &UnconsumedInput{ - StartTC: startTC, - FailTC: etc, - StartLine: startLC.line, - StartColumn: startLC.col, - FailLine: endLC.line, - FailColumn: endLC.col, - Text: text, + StartTC: startTC, + FailTC: etc, + Text: text, } return tc, nil, err, scan } else { diff --git a/machines/machine.go b/machines/machine.go index e4c44e0..27b474b 100644 --- a/machines/machine.go +++ b/machines/machine.go @@ -13,26 +13,20 @@ import ( // string type EmptyMatchError struct { TC int - Line int - Column int MatchID int } func (e *EmptyMatchError) Error() string { - return fmt.Sprintf("Lexer error: matched the empty string at %d:%d (tc=%d) for match id %d.", - e.Line, e.Column, e.TC, e.MatchID, + return fmt.Sprintf("Lexer error: matched the empty string at %d for match id %d.", + e.TC, e.MatchID, ) } // UnconsumedInput error type type UnconsumedInput struct { - StartTC int - FailTC int - StartLine int - StartColumn int - FailLine int - FailColumn int - Text []byte + StartTC int + FailTC int + Text []byte } // Error implements the error interface @@ -51,60 +45,18 @@ func (u *UnconsumedInput) Error() string { } stc := min(u.StartTC, len(u.Text)-1) etc := min(max(u.StartTC+1, u.FailTC), len(u.Text)) - return fmt.Sprintf("Lexer error: could not match text starting at %v:%v failing at %v:%v.\n\tunmatched text: %q", - u.StartLine, u.StartColumn, - u.FailLine, u.FailColumn, + return fmt.Sprintf("Lexer error: could not match text starting at %v failing at %v.\n\tunmatched text: %q", + u.StartTC, + u.FailTC, string(u.Text[stc:etc]), ) } // A Match represents the positional and textual information from a match. type Match struct { - PC int - TC int - StartLine int - StartColumn int - EndLine int - EndColumn int - Bytes []byte // the actual bytes matched during scanning. - - TSLine, TSColumn, TELine, TEColumn int -} - -func computeLineCol(text []byte, prevTC, tc, line, col int) (int, int) { - if tc < 0 { - return line, col - } - if tc < prevTC { - for i := prevTC; i > tc && i > 0; i-- { - if text[i] == '\n' { - line-- - } - } - col = 0 - for i := tc; i >= 0; i-- { - if text[i] == '\n' { - break - } - col++ - } - return line, col - } - for i := prevTC + 1; i <= tc && i < len(text); i++ { - if text[i] == '\n' { - col = 0 - line++ - } else { - col++ - } - } - if prevTC == tc && tc == 0 && tc < len(text) { - if text[tc] == '\n' { - line++ - col-- - } - } - return line, col + PC int // pattern cursor + TC int // start position of text cursor + Bytes []byte // the actual bytes matched during scanning. } // Equals checks two matches for equality @@ -117,16 +69,13 @@ func (m *Match) Equals(other *Match) bool { return false } return m.PC == other.PC && - m.StartLine == other.StartLine && - m.StartColumn == other.StartColumn && - m.EndLine == other.EndLine && - m.EndColumn == other.EndColumn && + m.TC == other.TC && bytes.Equal(m.Bytes, other.Bytes) } // String formats the match for humans func (m Match) String() string { - return fmt.Sprintf("", m.PC, m.TC, m.StartLine, m.StartColumn, m.EndLine, m.EndColumn, string(m.Bytes)) + return fmt.Sprintf("", m.PC, m.TC, string(m.Bytes)) } // Scanner is a functional iterator returned by the LexerEngine. See @@ -141,10 +90,6 @@ func LexerEngine(program inst.Slice, text []byte) Scanner { matchPC := -1 matchTC := -1 - prevTC := 0 - line := 1 - col := 1 - var scan Scanner var cqueue, nqueue *queue.Queue = queue.New(len(program)), queue.New(len(program)) scan = func(tc int) (int, *Match, error, Scanner) { @@ -197,27 +142,19 @@ func LexerEngine(program inst.Slice, text []byte) Scanner { } cqueue, nqueue = nqueue, cqueue if cqueue.Empty() && matchPC > -1 { - line, col = computeLineCol(text, prevTC, startTC, line, col) - eLine, eCol := computeLineCol(text, startTC, matchTC-1, line, col) + match := &Match{ - PC: matchPC, - TC: startTC, - StartLine: line, - StartColumn: col, - EndLine: eLine, - EndColumn: eCol, - Bytes: text[startTC:matchTC], + PC: matchPC, + TC: startTC, + Bytes: text[startTC:matchTC], } if matchTC == startTC { err := &EmptyMatchError{ MatchID: matchPC, TC: tc, - Line: line, - Column: col, } return startTC, nil, err, scan } - prevTC = startTC matchPC = -1 return matchTC, match, nil, scan } @@ -231,16 +168,10 @@ func LexerEngine(program inst.Slice, text []byte) Scanner { if matchTC == -1 { matchTC = 0 } - sline, scol := computeLineCol(text, 0, startTC, 1, 1) - fline, fcol := computeLineCol(text, 0, tc, 1, 1) err := &UnconsumedInput{ - StartTC: startTC, - FailTC: tc, - StartLine: sline, - StartColumn: scol, - FailLine: fline, - FailColumn: fcol, - Text: text, + StartTC: startTC, + FailTC: tc, + Text: text, } return tc, nil, err, scan } else { diff --git a/machines/machine_test.go b/machines/machine_test.go index b0ad37e..4b43ae9 100644 --- a/machines/machine_test.go +++ b/machines/machine_test.go @@ -34,7 +34,7 @@ func TestLexerMatch(t *testing.T) { t.Log(program) mtext := []byte("ababcbcbb") expected := []Match{ - {16, 0, 1, 1, 1, len(mtext), mtext, 1, 0, 1, 1}, + {16, 0, mtext}, } i := 0 for tc, m, err, scan := LexerEngine(program, text)(0); scan != nil; tc, m, err, scan = scan(tc) { @@ -114,9 +114,9 @@ func TestLexerThreeStrings(t *testing.T) { t.Log(len(text)) t.Log(program) expected := []Match{ - {8, 0, 1, 1, 1, 6, []byte("struct"), 1, 0, 1, 1}, - {13, 6, 1, 7, 1, 8, []byte(" "), 1, 0, 1, 1}, - {15, 8, 1, 9, 1, 9, []byte("*"), 1, 0, 1, 1}, + {8, 0, []byte("struct")}, + {13, 6, []byte(" ")}, + {15, 8, []byte("*")}, } i := 0 @@ -165,9 +165,9 @@ func TestLexerRestart(t *testing.T) { t.Log(len(text)) t.Log(program) expected := []Match{ - {8, 0, 1, 1, 1, 6, []byte("struct"), 1, 0, 1, 1}, - {19, 6, 2, 0, 2, 2, []byte("\n "), 1, 0, 1, 1}, - {21, 9, 2, 3, 2, 3, []byte("*"), 1, 0, 1, 1}, + {8, 0, []byte("struct")}, + {19, 6, []byte("\n ")}, + {21, 9, []byte("*")}, } check := func(m *Match, i int, err error) {