You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
3.1 KiB
125 lines
3.1 KiB
package lexer
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
"text/scanner"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// TextScannerLexer is a lexer that uses the text/scanner module.
|
|
var (
|
|
TextScannerLexer Definition = &defaultDefinition{}
|
|
|
|
// DefaultDefinition defines properties for the default lexer.
|
|
DefaultDefinition = TextScannerLexer
|
|
)
|
|
|
|
type defaultDefinition struct{}
|
|
|
|
func (d *defaultDefinition) Lex(r io.Reader) (Lexer, error) {
|
|
return Lex(r), nil
|
|
}
|
|
|
|
func (d *defaultDefinition) Symbols() map[string]rune {
|
|
return map[string]rune{
|
|
"EOF": scanner.EOF,
|
|
"Char": scanner.Char,
|
|
"Ident": scanner.Ident,
|
|
"Int": scanner.Int,
|
|
"Float": scanner.Float,
|
|
"String": scanner.String,
|
|
"RawString": scanner.RawString,
|
|
"Comment": scanner.Comment,
|
|
}
|
|
}
|
|
|
|
// textScannerLexer is a Lexer based on text/scanner.Scanner
|
|
type textScannerLexer struct {
|
|
scanner *scanner.Scanner
|
|
filename string
|
|
err error
|
|
}
|
|
|
|
// Lex an io.Reader with text/scanner.Scanner.
|
|
//
|
|
// This provides very fast lexing of source code compatible with Go tokens.
|
|
//
|
|
// Note that this differs from text/scanner.Scanner in that string tokens will be unquoted.
|
|
func Lex(r io.Reader) Lexer {
|
|
lexer := lexWithScanner(r, &scanner.Scanner{})
|
|
lexer.scanner.Error = func(s *scanner.Scanner, msg string) {
|
|
// This is to support single quoted strings. Hacky.
|
|
if msg != "illegal char literal" {
|
|
lexer.err = Errorf(Position(lexer.scanner.Pos()), msg)
|
|
}
|
|
}
|
|
return lexer
|
|
}
|
|
|
|
// LexWithScanner creates a Lexer from a user-provided scanner.Scanner.
|
|
//
|
|
// Useful if you need to customise the Scanner.
|
|
func LexWithScanner(r io.Reader, scan *scanner.Scanner) Lexer {
|
|
return lexWithScanner(r, scan)
|
|
}
|
|
|
|
func lexWithScanner(r io.Reader, scan *scanner.Scanner) *textScannerLexer {
|
|
lexer := &textScannerLexer{
|
|
filename: NameOfReader(r),
|
|
scanner: scan,
|
|
}
|
|
lexer.scanner.Init(r)
|
|
return lexer
|
|
}
|
|
|
|
// LexBytes returns a new default lexer over bytes.
|
|
func LexBytes(b []byte) Lexer {
|
|
return Lex(bytes.NewReader(b))
|
|
}
|
|
|
|
// LexString returns a new default lexer over a string.
|
|
func LexString(s string) Lexer {
|
|
return Lex(strings.NewReader(s))
|
|
}
|
|
|
|
func (t *textScannerLexer) Next() (Token, error) {
|
|
typ := t.scanner.Scan()
|
|
text := t.scanner.TokenText()
|
|
pos := Position(t.scanner.Position)
|
|
pos.Filename = t.filename
|
|
if t.err != nil {
|
|
return Token{}, t.err
|
|
}
|
|
return textScannerTransform(Token{
|
|
Type: typ,
|
|
Value: text,
|
|
Pos: pos,
|
|
})
|
|
}
|
|
|
|
func textScannerTransform(token Token) (Token, error) {
|
|
// Unquote strings.
|
|
switch token.Type {
|
|
case scanner.Char:
|
|
// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
|
|
// quoted string in order to support single quoted strings.
|
|
token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
|
|
fallthrough
|
|
case scanner.String:
|
|
s, err := strconv.Unquote(token.Value)
|
|
if err != nil {
|
|
return Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
|
|
}
|
|
token.Value = s
|
|
if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
|
|
token.Type = scanner.String
|
|
}
|
|
case scanner.RawString:
|
|
token.Value = token.Value[1 : len(token.Value)-1]
|
|
}
|
|
return token, nil
|
|
}
|
|
|