From fd42cc208a6d0e4ffc62a5e44d4a1caaf48c25af Mon Sep 17 00:00:00 2001 From: zhouzhihong Date: Wed, 24 Aug 2022 23:25:24 +0800 Subject: [PATCH 1/3] Try to fixed postion of token for unicode. --- dfa/dfa_helpers_test.go | 42 +- dfa/gen_test.go | 4 +- doc.go | 116 ++++ examples/sensors-parser/.gitignore | 1 + examples/sensors-parser/README.md | 119 ++++ examples/sensors-parser/ast.go | 53 ++ examples/sensors-parser/main.go | 67 +++ examples/sensors-parser/sensors.conf | 4 + examples/sensors-parser/sensors.y | 101 ++++ examples/sensors-parser/sensors_golex.go | 115 ++++ examples/sensors-parser/y.go | 665 +++++++++++++++++++++++ examples/sensors/main.go | 130 +++++ frontend/ast.go | 12 +- frontend/desugar.go | 2 +- frontend/desugar_test.go | 11 +- frontend/frontend_test.go | 52 +- frontend/parser.go | 26 +- grammar | 36 ++ lexc/main.go | 2 +- lexer.go | 17 +- lexer_test.go | 98 ++-- machines/dfa_machine.go | 5 +- machines/machine.go | 28 +- machines/machine_test.go | 22 +- 24 files changed, 1568 insertions(+), 160 deletions(-) create mode 100644 doc.go create mode 100644 examples/sensors-parser/.gitignore create mode 100644 examples/sensors-parser/README.md create mode 100644 examples/sensors-parser/ast.go create mode 100644 examples/sensors-parser/main.go create mode 100644 examples/sensors-parser/sensors.conf create mode 100644 examples/sensors-parser/sensors.y create mode 100644 examples/sensors-parser/sensors_golex.go create mode 100644 examples/sensors-parser/y.go create mode 100644 examples/sensors/main.go create mode 100644 grammar diff --git a/dfa/dfa_helpers_test.go b/dfa/dfa_helpers_test.go index ac6a8e7..ea17d75 100644 --- a/dfa/dfa_helpers_test.go +++ b/dfa/dfa_helpers_test.go @@ -34,7 +34,7 @@ func TestLabeledAst(x *testing.T) { for _, regex := range []string{ "a", "b", "asdf", "s|a", "sdf*", "(sdf)+(asdf)*", "w|(s|e)*(s)+(s?fe)**", "(a|we|f*|s*?)|W(LSD)Adf[23-s]", } { - ast, err := frontend.Parse([]rune(regex)) + ast, err := frontend.Parse([]byte(regex)) t.AssertNil(err) verify(frontend.DesugarRanges(ast)) } @@ -59,7 +59,7 @@ func followEquals(follow []map[int]bool, expected [][]int) bool { func testFollow(x *testing.T, regex string, expectedPos []frontend.AST, expectedFollows [][]int) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune(regex)) + ast, err := frontend.Parse([]byte(regex)) t.AssertNil(err) lAst := Label(ast) positions := lAst.Positions @@ -334,7 +334,7 @@ func TestFollowNested(x *testing.T) { func TestMatchesEmptyString_char(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a")) + ast, err := frontend.Parse([]byte("a")) t.AssertNil(err) nullable := Label(ast).MatchesEmptyString() t.Assert(!nullable[len(nullable)-1], "character should not match the empty string, %v", ast) @@ -342,7 +342,7 @@ func TestMatchesEmptyString_char(x *testing.T) { func TestMatchesEmptyString_range(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("[a-z]")) + ast, err := frontend.Parse([]byte("[a-z]")) t.AssertNil(err) nullable := Label(ast).MatchesEmptyString() t.Assert(!nullable[len(nullable)-1], "range should not match the empty string, %v", ast) @@ -350,7 +350,7 @@ func TestMatchesEmptyString_range(x *testing.T) { func TestMatchesEmptyString_maybe(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a?")) + ast, err := frontend.Parse([]byte("a?")) t.AssertNil(err) nullable := Label(ast).MatchesEmptyString() t.Assert(nullable[len(nullable)-1], "maybe should match the empty string, %v", ast) @@ -358,7 +358,7 @@ func TestMatchesEmptyString_maybe(x *testing.T) { func TestMatchesEmptyString_star(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a*")) + ast, err := frontend.Parse([]byte("a*")) t.AssertNil(err) nullable := Label(ast).MatchesEmptyString() t.Assert(nullable[len(nullable)-1], "star should match the empty string, %v", ast) @@ -366,11 +366,11 @@ func TestMatchesEmptyString_star(x *testing.T) { func TestMatchesEmptyString_plus(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a+")) + ast, err := frontend.Parse([]byte("a+")) t.AssertNil(err) nullable := Label(ast).MatchesEmptyString() t.Assert(!nullable[len(nullable)-1], "a+ should not match the empty string, %v", ast) - ast, err = frontend.Parse([]rune("a?+")) + ast, err = frontend.Parse([]byte("a?+")) t.AssertNil(err) nullable = Label(ast).MatchesEmptyString() t.Assert(nullable[len(nullable)-1], "a?+ should match the empty string, %v", ast) @@ -393,7 +393,7 @@ func TestMatchesEmptyString_concat(x *testing.T) { } func testMatchesEmptyString(t *test.T, regex string, matches bool, message string, args ...interface{}) { - ast, err := frontend.Parse([]rune(regex)) + ast, err := frontend.Parse([]byte(regex)) t.AssertNil(err) nullable := Label(ast).MatchesEmptyString() t.Assert(nullable[len(nullable)-1] == matches, message, args...) @@ -429,7 +429,7 @@ func astList(lAst *LabeledAST, l []int) []frontend.AST { func TestFirst_char(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a")) + ast, err := frontend.Parse([]byte("a")) t.AssertNil(err) lAst := Label(ast) first := []frontend.AST{ @@ -440,7 +440,7 @@ func TestFirst_char(x *testing.T) { func TestLast_char(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a")) + ast, err := frontend.Parse([]byte("a")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ @@ -452,7 +452,7 @@ func TestLast_char(x *testing.T) { func TestFirst_range(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("[a-z]")) + ast, err := frontend.Parse([]byte("[a-z]")) t.AssertNil(err) lAst := Label(ast) first := []frontend.AST{ @@ -463,7 +463,7 @@ func TestFirst_range(x *testing.T) { func TestLast_range(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("[a-z]")) + ast, err := frontend.Parse([]byte("[a-z]")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ @@ -475,7 +475,7 @@ func TestLast_range(x *testing.T) { func TestFirst_ops(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a?*+")) + ast, err := frontend.Parse([]byte("a?*+")) t.AssertNil(err) lAst := Label(ast) first := []frontend.AST{ @@ -487,7 +487,7 @@ func TestFirst_ops(x *testing.T) { func TestLast_ops(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a*?+")) + ast, err := frontend.Parse([]byte("a*?+")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ @@ -499,7 +499,7 @@ func TestLast_ops(x *testing.T) { func TestFirst_alt(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a|b")) + ast, err := frontend.Parse([]byte("a|b")) t.AssertNil(err) lAst := Label(ast) first := []frontend.AST{ @@ -511,7 +511,7 @@ func TestFirst_alt(x *testing.T) { func TestLast_alt(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a|b")) + ast, err := frontend.Parse([]byte("a|b")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ @@ -524,7 +524,7 @@ func TestLast_alt(x *testing.T) { func TestFirst_concat(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("a?b?c?de")) + ast, err := frontend.Parse([]byte("a?b?c?de")) t.AssertNil(err) lAst := Label(ast) first := []frontend.AST{ @@ -538,7 +538,7 @@ func TestFirst_concat(x *testing.T) { func TestLast_concat(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("abc?d?e?")) + ast, err := frontend.Parse([]byte("abc?d?e?")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ @@ -553,7 +553,7 @@ func TestLast_concat(x *testing.T) { func TestLast_concat2(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("abc*d*e*")) + ast, err := frontend.Parse([]byte("abc*d*e*")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ @@ -568,7 +568,7 @@ func TestLast_concat2(x *testing.T) { func TestLast_concat3(x *testing.T) { t := (*test.T)(x) - ast, err := frontend.Parse([]rune("abc+d+e+")) + ast, err := frontend.Parse([]byte("abc+d+e+")) t.AssertNil(err) lAst := Label(ast) last := []frontend.AST{ diff --git a/dfa/gen_test.go b/dfa/gen_test.go index 1a085ea..30b39e2 100644 --- a/dfa/gen_test.go +++ b/dfa/gen_test.go @@ -8,7 +8,7 @@ import ( ) func mustParse(regex string) frontend.AST { - ast, err := frontend.Parse([]rune(regex)) + ast, err := frontend.Parse([]byte(regex)) if err != nil { panic(err) } @@ -16,7 +16,7 @@ func mustParse(regex string) frontend.AST { } func testGen(t *test.T, regex, text string, matchID int) { - ast, err := frontend.Parse([]rune(regex)) + ast, err := frontend.Parse([]byte(regex)) t.AssertNil(err) testGenMatch(t, ast, text, matchID) } diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..efe744d --- /dev/null +++ b/doc.go @@ -0,0 +1,116 @@ +// Package lexmachine is a full lexical analysis framework for the Go +// programming language. It supports a restricted but usable set of regular +// expressions appropriate for writing lexers for complex programming +// languages. The framework also supports sub-lexers and non-regular lexing +// through an "escape hatch" which allows the users to consume any number of +// further bytes after a match. So if you want to support nested C-style +// comments or other paired structures you can do so at the lexical analysis +// stage. +// +// For a tutorial see +// http://hackthology.com/writing-a-lexer-in-go-with-lexmachine.html +// +// Example of defining a lexer +// +// // CreateLexer defines a lexer for the graphviz dot language. +// func CreateLexer() (*lexmachine.Lexer, error) { +// lexer := lexmachine.NewLexer() +// +// for _, lit := range Literals { +// r := "\\" + strings.Join(strings.Split(lit, ""), "\\") +// lexer.Add([]byte(r), token(lit)) +// } +// for _, name := range Keywords { +// lexer.Add([]byte(strings.ToLower(name)), token(name)) +// } +// +// lexer.Add([]byte(`//[^\n]*\n?`), token("COMMENT")) +// lexer.Add([]byte(`/\*([^*]|\r|\n|(\*+([^*/]|\r|\n)))*\*+/`), token("COMMENT")) +// lexer.Add([]byte(`([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*`), token("ID")) +// lexer.Add([]byte(`"([^\\"]|(\\.))*"`), token("ID")) +// lexer.Add([]byte("( |\t|\n|\r)+"), skip) +// lexer.Add([]byte(`\<`), +// func(scan *lexmachine.Scanner, match *machines.Match) (interface{}, error) { +// str := make([]byte, 0, 10) +// str = append(str, match.Bytes...) +// brackets := 1 +// match.EndLine = match.StartLine +// match.EndColumn = match.StartColumn +// for tc := scan.TC; tc < len(scan.Text); tc++ { +// str = append(str, scan.Text[tc]) +// match.EndColumn += 1 +// if scan.Text[tc] == '\n' { +// match.EndLine += 1 +// } +// if scan.Text[tc] == '<' { +// brackets += 1 +// } else if scan.Text[tc] == '>' { +// brackets -= 1 +// } +// if brackets == 0 { +// match.TC = scan.TC +// scan.TC = tc + 1 +// match.Bytes = str +// return token("ID")(scan, match) +// } +// } +// return nil, +// fmt.Errorf("unclosed HTML literal starting at %d, (%d, %d)", +// match.TC, match.StartLine, match.StartColumn) +// }, +// ) +// +// err := lexer.Compile() +// if err != nil { +// return nil, err +// } +// return lexer, nil +// } +// +// func token(name string) lex.Action { +// return func(s *lex.Scanner, m *machines.Match) (interface{}, error) { +// return s.Token(TokenIds[name], string(m.Bytes), m), nil +// } +// } +// +// Example of using a lexer +// +// func ExampleLex() error { +// lexer, err := CreateLexer() +// if err != nil { +// return err +// } +// scanner, err := lexer.Scanner([]byte(`digraph { +// rankdir=LR; +// a [label="a" shape=box]; +// c [