新特性 #1

Merged
zhouzhihong merged 3 commits from dev into main 2022-08-25 14:41:32 +08:00
5 changed files with 59 additions and 28 deletions
Showing only changes of commit 3d56a5d5f1 - Show all commits

View File

@ -23,7 +23,7 @@ func TestParse(x *testing.T) {
}
func tMatch(program inst.Slice, text string, t *test.T) {
expected := []machines.Match{{len(program) - 1, 0, 1, 1, 1, len(text), []byte(text)}}
expected := []machines.Match{{PC: len(program) - 1, TC: 0, StartLine: 1, StartColumn: 1, EndLine: 1, EndColumn: len(text), Bytes: []byte(text), TSLine: 1, TSColumn: 0, TELine: 1, TEColumn: 1}}
if expected[0].EndColumn == 0 {
expected[0].EndColumn = 1
}

View File

@ -3,6 +3,7 @@ package lexmachine
import (
"bytes"
"fmt"
"unicode/utf8"
dfapkg "gitea.xintech.co/zhouzhihong/lexmachine/dfa"
"gitea.xintech.co/zhouzhihong/lexmachine/frontend"
@ -112,6 +113,12 @@ type Scanner struct {
sColumn int
eLine int
eColumn int
lpp map[int]lastPostion
}
type lastPostion struct {
l, c int
}
// Next iterates through the string being scanned returning one token at a time
@ -155,6 +162,38 @@ func (s *Scanner) Next() (tok interface{}, err error, eos bool) {
s.eLine = match.EndLine
s.eColumn = match.EndColumn
p := s.pTC
l, c := s.lpp[p].l, s.lpp[p].c
stc := s.TC - len(match.Bytes)
for {
if s.Text[p] == '\n' {
l++
c = 0
} else {
c++
}
if p == stc {
match.TSLine = l
match.TSColumn = c
}
match.TELine = l
match.TEColumn = c
_, sz := utf8.DecodeRune(s.Text[p:])
p += sz
if p >= s.TC {
break
}
}
s.lpp[s.TC] = lastPostion{
l: l,
c: c,
}
pattern := s.lexer.patterns[s.matches[match.PC]]
token, err = pattern.action(s, match)
if err != nil {
@ -204,6 +243,7 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
scan: machines.DFALexerEngine(l.dfa.Start, l.dfa.Error, l.dfa.Trans, l.dfa.Accepting, textCopy),
Text: textCopy,
TC: 0,
lpp: make(map[int]lastPostion),
}
} else {
s = &Scanner{
@ -212,8 +252,16 @@ func (l *Lexer) Scanner(text []byte) (*Scanner, error) {
scan: machines.LexerEngine(l.program, textCopy),
Text: textCopy,
TC: 0,
lpp: make(map[int]lastPostion),
}
}
//init
s.lpp[0] = lastPostion{
l: 1,
c: 0,
}
return s, nil
}

View File

@ -316,26 +316,7 @@ func TestRegression(t *testing.T) {
func TestRegression2(t *testing.T) {
text := `# dhcpd.conf
#
# Sample configuration file for ISC dhcpd
#
# option definitions common to all supported networks...
option domain-name "你好";
option domain-name-servers ns1.example.org, ns2.example.org;
default-lease-time 600;
max-lease-time 7200;
# The ddns-updates-style parameter controls whether or not the server will
# attempt to do a DNS update when a lease is confirmed. We default to the
# behavior of the version 2 packages ('none', since DHCP v2 didn't
# have support for DDNS.)
ddns-update-style none;
# If this DHCP server is the official DHCP server for the local
# network, the authoritative directive should be uncommented.
#authoritative;
option domain-name "你好"
`
literals := []string{

View File

@ -67,6 +67,8 @@ type Match struct {
EndLine int
EndColumn int
Bytes []byte // the actual bytes matched during scanning.
TSLine, TSColumn, TELine, TEColumn int
}
func computeLineCol(text []byte, prevTC, tc, line, col int) (int, int) {

View File

@ -34,7 +34,7 @@ func TestLexerMatch(t *testing.T) {
t.Log(program)
mtext := []byte("ababcbcbb")
expected := []Match{
{16, 0, 1, 1, 1, len(mtext), mtext},
{16, 0, 1, 1, 1, len(mtext), mtext, 1, 0, 1, 1},
}
i := 0
for tc, m, err, scan := LexerEngine(program, text)(0); scan != nil; tc, m, err, scan = scan(tc) {
@ -114,9 +114,9 @@ func TestLexerThreeStrings(t *testing.T) {
t.Log(len(text))
t.Log(program)
expected := []Match{
{8, 0, 1, 1, 1, 6, []byte("struct")},
{13, 6, 1, 7, 1, 8, []byte(" ")},
{15, 8, 1, 9, 1, 9, []byte("*")},
{8, 0, 1, 1, 1, 6, []byte("struct"), 1, 0, 1, 1},
{13, 6, 1, 7, 1, 8, []byte(" "), 1, 0, 1, 1},
{15, 8, 1, 9, 1, 9, []byte("*"), 1, 0, 1, 1},
}
i := 0
@ -165,9 +165,9 @@ func TestLexerRestart(t *testing.T) {
t.Log(len(text))
t.Log(program)
expected := []Match{
{8, 0, 1, 1, 1, 6, []byte("struct")},
{19, 6, 2, 0, 2, 2, []byte("\n ")},
{21, 9, 2, 3, 2, 3, []byte("*")},
{8, 0, 1, 1, 1, 6, []byte("struct"), 1, 0, 1, 1},
{19, 6, 2, 0, 2, 2, []byte("\n "), 1, 0, 1, 1},
{21, 9, 2, 3, 2, 3, []byte("*"), 1, 0, 1, 1},
}
check := func(m *Match, i int, err error) {