switch to binaries, parse identifiers

This commit is contained in:
wires 2025-02-26 18:45:15 -05:00
parent 5e24400ecb
commit c36f1a348c
Signed by: wires
SSH key fingerprint: SHA256:9GtP+M3O2IivPDlw1UY872UPUuJH2gI0yG6ExBxaaiM
2 changed files with 26 additions and 46 deletions

View file

@ -5,30 +5,8 @@
next/1 next/1
]). ]).
-define(open_delim(T),
T =:= $(;
T =:= $[;
T =:= ${
).
-define(close_delim(T),
T =:= $);
T =:= $];
T =:= $}
).
-define(op1(T),
T =:= $,;
T =:= $;;
T =:= $.;
T =:= $+
).
-type token_kind() :: -type token_kind() ::
comment binary()
| atom
| identifier
| integer
| '{' | '{'
| '}' | '}'
| '[' | '['
@ -42,31 +20,35 @@
| '->' | '->'
| '+'. | '+'.
-define(LOC(Line, Column), {{line, Line}, {column, Column}}). -type token() :: {token_kind(), non_neg_integer() | inserted}.
-type location() :: {{line, pos_integer()}, {column, pos_integer()}} | inserted.
-type token() :: {token_kind(), location()}. -define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_).
-record(lexer, { -record(lexer, {
source :: string(), source :: binary(),
line = 1 :: pos_integer(), offset = 0 :: non_neg_integer()
column = 1 :: pos_integer(),
delim_stack = [] :: [token()]
}). }).
-spec new(string()) -> #lexer{}. -spec new(binary()) -> #lexer{}.
new(Source) -> #lexer{source = Source}. new(Source) -> #lexer{source = Source}.
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
next(#lexer{source = [], delim_stack = []}) -> next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) ->
none; next(Lexer#lexer{source = Rest, offset = Offset + 1});
next(#lexer{source = [$\n | Rest], line = Line} = Lexer) -> next(#lexer{source = <<C, _/bytes>>} = Lexer) when ?is_id(C) ->
next(Lexer#lexer{source = Rest, line = Line + 1, column = 1}); lex_id(Lexer, 1);
next(#lexer{source = [T | Rest], line = Line, column = Column} = Lexer) when ?op1(T) -> next(#lexer{source = <<>>}) ->
Token = {list_to_atom([T]), ?LOC(Line, Column)},
{ok, Token, Lexer#lexer{source = Rest, column = Column + 1}};
next(#lexer{source = [$-, $> | Rest], line = Line, column = Column} = Lexer) ->
Token = {'->', ?LOC(Line, Column)},
{ok, Token, Lexer#lexer{source = Rest, column = Column + 2}};
next(_) ->
none. none.
-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}.
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) ->
C = binary:at(Source, Len),
if
?is_id(C) ->
lex_id(Lexer, Len + 1);
true ->
{Id, Rest} = split_binary(Source, Len),
{ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}}
end;
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) ->
{ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}.

View file

@ -1,11 +1,9 @@
-module(akhamoth). -module(akhamoth).
-export([compile/1]). -export([compile/1, collect/2]).
-spec compile(Path :: file:name_all()) -> any().
compile(Path) -> compile(Path) ->
{ok, SrcBin} = file:read_file(Path), {ok, Src} = file:read_file(Path),
Src = unicode:characters_to_list(SrcBin),
Lexer = akh_lexer:new(Src), Lexer = akh_lexer:new(Src),
collect(Lexer, []). collect(Lexer, []).