switch to binaries, parse identifiers
This commit is contained in:
parent
5e24400ecb
commit
c36f1a348c
2 changed files with 26 additions and 46 deletions
|
@ -5,30 +5,8 @@
|
||||||
next/1
|
next/1
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-define(open_delim(T),
|
|
||||||
T =:= $(;
|
|
||||||
T =:= $[;
|
|
||||||
T =:= ${
|
|
||||||
).
|
|
||||||
|
|
||||||
-define(close_delim(T),
|
|
||||||
T =:= $);
|
|
||||||
T =:= $];
|
|
||||||
T =:= $}
|
|
||||||
).
|
|
||||||
|
|
||||||
-define(op1(T),
|
|
||||||
T =:= $,;
|
|
||||||
T =:= $;;
|
|
||||||
T =:= $.;
|
|
||||||
T =:= $+
|
|
||||||
).
|
|
||||||
|
|
||||||
-type token_kind() ::
|
-type token_kind() ::
|
||||||
comment
|
binary()
|
||||||
| atom
|
|
||||||
| identifier
|
|
||||||
| integer
|
|
||||||
| '{'
|
| '{'
|
||||||
| '}'
|
| '}'
|
||||||
| '['
|
| '['
|
||||||
|
@ -42,31 +20,35 @@
|
||||||
| '->'
|
| '->'
|
||||||
| '+'.
|
| '+'.
|
||||||
|
|
||||||
-define(LOC(Line, Column), {{line, Line}, {column, Column}}).
|
-type token() :: {token_kind(), non_neg_integer() | inserted}.
|
||||||
-type location() :: {{line, pos_integer()}, {column, pos_integer()}} | inserted.
|
|
||||||
|
|
||||||
-type token() :: {token_kind(), location()}.
|
-define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_).
|
||||||
|
|
||||||
-record(lexer, {
|
-record(lexer, {
|
||||||
source :: string(),
|
source :: binary(),
|
||||||
line = 1 :: pos_integer(),
|
offset = 0 :: non_neg_integer()
|
||||||
column = 1 :: pos_integer(),
|
|
||||||
delim_stack = [] :: [token()]
|
|
||||||
}).
|
}).
|
||||||
|
|
||||||
-spec new(string()) -> #lexer{}.
|
-spec new(binary()) -> #lexer{}.
|
||||||
new(Source) -> #lexer{source = Source}.
|
new(Source) -> #lexer{source = Source}.
|
||||||
|
|
||||||
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
|
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
|
||||||
next(#lexer{source = [], delim_stack = []}) ->
|
next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) ->
|
||||||
none;
|
next(Lexer#lexer{source = Rest, offset = Offset + 1});
|
||||||
next(#lexer{source = [$\n | Rest], line = Line} = Lexer) ->
|
next(#lexer{source = <<C, _/bytes>>} = Lexer) when ?is_id(C) ->
|
||||||
next(Lexer#lexer{source = Rest, line = Line + 1, column = 1});
|
lex_id(Lexer, 1);
|
||||||
next(#lexer{source = [T | Rest], line = Line, column = Column} = Lexer) when ?op1(T) ->
|
next(#lexer{source = <<>>}) ->
|
||||||
Token = {list_to_atom([T]), ?LOC(Line, Column)},
|
|
||||||
{ok, Token, Lexer#lexer{source = Rest, column = Column + 1}};
|
|
||||||
next(#lexer{source = [$-, $> | Rest], line = Line, column = Column} = Lexer) ->
|
|
||||||
Token = {'->', ?LOC(Line, Column)},
|
|
||||||
{ok, Token, Lexer#lexer{source = Rest, column = Column + 2}};
|
|
||||||
next(_) ->
|
|
||||||
none.
|
none.
|
||||||
|
|
||||||
|
-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}.
|
||||||
|
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) ->
|
||||||
|
C = binary:at(Source, Len),
|
||||||
|
if
|
||||||
|
?is_id(C) ->
|
||||||
|
lex_id(Lexer, Len + 1);
|
||||||
|
true ->
|
||||||
|
{Id, Rest} = split_binary(Source, Len),
|
||||||
|
{ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}}
|
||||||
|
end;
|
||||||
|
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) ->
|
||||||
|
{ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}.
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
-module(akhamoth).
|
-module(akhamoth).
|
||||||
|
|
||||||
-export([compile/1]).
|
-export([compile/1, collect/2]).
|
||||||
|
|
||||||
-spec compile(Path :: file:name_all()) -> any().
|
|
||||||
compile(Path) ->
|
compile(Path) ->
|
||||||
{ok, SrcBin} = file:read_file(Path),
|
{ok, Src} = file:read_file(Path),
|
||||||
Src = unicode:characters_to_list(SrcBin),
|
|
||||||
Lexer = akh_lexer:new(Src),
|
Lexer = akh_lexer:new(Src),
|
||||||
collect(Lexer, []).
|
collect(Lexer, []).
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue