From 3d56a5d5f1c9a802938f5f8b564ea75e577a6f1e Mon Sep 17 00:00:00 2001 From: zhouzhihong Date: Thu, 25 Aug 2022 14:33:22 +0800 Subject: [PATCH] Add text line and text column calculator. --- frontend/frontend_test.go | 2 +- lexer.go | 48 +++++++++++++++++++++++++++++++++++++++ lexer_test.go | 21 +---------------- machines/machine.go | 2 ++ machines/machine_test.go | 14 ++++++------ 5 files changed, 59 insertions(+), 28 deletions(-) diff --git a/frontend/frontend_test.go b/frontend/frontend_test.go index de345c9..22f9c07 100644 --- a/frontend/frontend_test.go +++ b/frontend/frontend_test.go @@ -23,7 +23,7 @@ func TestParse(x *testing.T) { } func tMatch(program inst.Slice, text string, t *test.T) { - expected := []machines.Match{{len(program) - 1, 0, 1, 1, 1, len(text), []byte(text)}} + expected := []machines.Match{{PC: len(program) - 1, TC: 0, StartLine: 1, StartColumn: 1, EndLine: 1, EndColumn: len(text), Bytes: []byte(text), TSLine: 1, TSColumn: 0, TELine: 1, TEColumn: 1}} if expected[0].EndColumn == 0 { expected[0].EndColumn = 1 } diff --git a/lexer.go b/lexer.go index 73241d7..ea6d37e 100644 --- a/lexer.go +++ b/lexer.go @@ -3,6 +3,7 @@ package lexmachine import ( "bytes" "fmt" + "unicode/utf8" dfapkg "gitea.xintech.co/zhouzhihong/lexmachine/dfa" "gitea.xintech.co/zhouzhihong/lexmachine/frontend" @@ -112,6 +113,12 @@ type Scanner struct { sColumn int eLine int eColumn int + + lpp map[int]lastPostion +} + +type lastPostion struct { + l, c int } // Next iterates through the string being scanned returning one token at a time @@ -155,6 +162,38 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) { s.eLine = match.EndLine s.eColumn = match.EndColumn + p := s.pTC + l, c := s.lpp[p].l, s.lpp[p].c + stc := s.TC - len(match.Bytes) + + for { + if s.Text[p] == '\n' { + l++ + c = 0 + } else { + c++ + } + + if p == stc { + match.TSLine = l + match.TSColumn = c + } + + match.TELine = l + match.TEColumn = c + + _, sz := utf8.DecodeRune(s.Text[p:]) + p += sz + if p >= s.TC { + break + } + } + + s.lpp[s.TC] = lastPostion{ + l: l, + c: c, + } + pattern := s.lexer.patterns[s.matches[match.PC]] token, err = pattern.action(s, match) if err != nil { @@ -204,6 +243,7 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) { scan: machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, textCopy), Text: textCopy, TC: 0, + lpp: make(map[int]lastPostion), } } else { s = &Scanner{ @@ -212,8 +252,16 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) { scan: machines.LexerEngine(l.program, textCopy), Text: textCopy, TC: 0, + lpp: make(map[int]lastPostion), } } + + //init + s.lpp[0] = lastPostion{ + l: 1, + c: 0, + } + return s, nil } diff --git a/lexer_test.go b/lexer_test.go index a80815e..79e9797 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -316,26 +316,7 @@ func TestRegression(t *testing.T) { func TestRegression2(t *testing.T) { text := `# dhcpd.conf -# -# Sample configuration file for ISC dhcpd -# - -# option definitions common to all supported networks... -option domain-name "你好"; -option domain-name-servers ns1.example.org, ns2.example.org; - -default-lease-time 600; -max-lease-time 7200; - -# The ddns-updates-style parameter controls whether or not the server will -# attempt to do a DNS update when a lease is confirmed. We default to the -# behavior of the version 2 packages ('none', since DHCP v2 didn't -# have support for DDNS.) -ddns-update-style none; - -# If this DHCP server is the official DHCP server for the local -# network, the authoritative directive should be uncommented. -#authoritative; +option domain-name "你好" ` literals := []string{ diff --git a/machines/machine.go b/machines/machine.go index b6325b0..e4c44e0 100644 --- a/machines/machine.go +++ b/machines/machine.go @@ -67,6 +67,8 @@ type Match struct { EndLine int EndColumn int Bytes []byte // the actual bytes matched during scanning. + + TSLine, TSColumn, TELine, TEColumn int } func computeLineCol(text []byte, prevTC, tc, line, col int) (int, int) { diff --git a/machines/machine_test.go b/machines/machine_test.go index 1b42d05..b0ad37e 100644 --- a/machines/machine_test.go +++ b/machines/machine_test.go @@ -34,7 +34,7 @@ func TestLexerMatch(t *testing.T) { t.Log(program) mtext := []byte("ababcbcbb") expected := []Match{ - {16, 0, 1, 1, 1, len(mtext), mtext}, + {16, 0, 1, 1, 1, len(mtext), mtext, 1, 0, 1, 1}, } i := 0 for tc, m, err, scan := LexerEngine(program, text)(0); scan != nil; tc, m, err, scan = scan(tc) { @@ -114,9 +114,9 @@ func TestLexerThreeStrings(t *testing.T) { t.Log(len(text)) t.Log(program) expected := []Match{ - {8, 0, 1, 1, 1, 6, []byte("struct")}, - {13, 6, 1, 7, 1, 8, []byte(" ")}, - {15, 8, 1, 9, 1, 9, []byte("*")}, + {8, 0, 1, 1, 1, 6, []byte("struct"), 1, 0, 1, 1}, + {13, 6, 1, 7, 1, 8, []byte(" "), 1, 0, 1, 1}, + {15, 8, 1, 9, 1, 9, []byte("*"), 1, 0, 1, 1}, } i := 0 @@ -165,9 +165,9 @@ func TestLexerRestart(t *testing.T) { t.Log(len(text)) t.Log(program) expected := []Match{ - {8, 0, 1, 1, 1, 6, []byte("struct")}, - {19, 6, 2, 0, 2, 2, []byte("\n ")}, - {21, 9, 2, 3, 2, 3, []byte("*")}, + {8, 0, 1, 1, 1, 6, []byte("struct"), 1, 0, 1, 1}, + {19, 6, 2, 0, 2, 2, []byte("\n "), 1, 0, 1, 1}, + {21, 9, 2, 3, 2, 3, []byte("*"), 1, 0, 1, 1}, } check := func(m *Match, i int, err error) {