diff --git a/src/akh_lexer.erl b/src/akh_lexer.erl index a5388bc..cdf53c6 100644 --- a/src/akh_lexer.erl +++ b/src/akh_lexer.erl @@ -5,30 +5,8 @@ next/1 ]). --define(open_delim(T), - T =:= $(; - T =:= $[; - T =:= ${ -). - --define(close_delim(T), - T =:= $); - T =:= $]; - T =:= $} -). - --define(op1(T), - T =:= $,; - T =:= $;; - T =:= $.; - T =:= $+ -). - -type token_kind() :: - comment - | atom - | identifier - | integer + binary() | '{' | '}' | '[' @@ -42,31 +20,35 @@ | '->' | '+'. --define(LOC(Line, Column), {{line, Line}, {column, Column}}). --type location() :: {{line, pos_integer()}, {column, pos_integer()}} | inserted. +-type token() :: {token_kind(), non_neg_integer() | inserted}. --type token() :: {token_kind(), location()}. +-define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_). -record(lexer, { - source :: string(), - line = 1 :: pos_integer(), - column = 1 :: pos_integer(), - delim_stack = [] :: [token()] + source :: binary(), + offset = 0 :: non_neg_integer() }). --spec new(string()) -> #lexer{}. +-spec new(binary()) -> #lexer{}. new(Source) -> #lexer{source = Source}. -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. -next(#lexer{source = [], delim_stack = []}) -> - none; -next(#lexer{source = [$\n | Rest], line = Line} = Lexer) -> - next(Lexer#lexer{source = Rest, line = Line + 1, column = 1}); -next(#lexer{source = [T | Rest], line = Line, column = Column} = Lexer) when ?op1(T) -> - Token = {list_to_atom([T]), ?LOC(Line, Column)}, - {ok, Token, Lexer#lexer{source = Rest, column = Column + 1}}; -next(#lexer{source = [$-, $> | Rest], line = Line, column = Column} = Lexer) -> - Token = {'->', ?LOC(Line, Column)}, - {ok, Token, Lexer#lexer{source = Rest, column = Column + 2}}; -next(_) -> +next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) -> + next(Lexer#lexer{source = Rest, offset = Offset + 1}); +next(#lexer{source = <>} = Lexer) when ?is_id(C) -> + lex_id(Lexer, 1); +next(#lexer{source = <<>>}) -> none. + +-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}. +lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) -> + C = binary:at(Source, Len), + if + ?is_id(C) -> + lex_id(Lexer, Len + 1); + true -> + {Id, Rest} = split_binary(Source, Len), + {ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}} + end; +lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) -> + {ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}. diff --git a/src/akhamoth.erl b/src/akhamoth.erl index 84bb7bd..26dca55 100644 --- a/src/akhamoth.erl +++ b/src/akhamoth.erl @@ -1,11 +1,9 @@ -module(akhamoth). --export([compile/1]). +-export([compile/1, collect/2]). --spec compile(Path :: file:name_all()) -> any(). compile(Path) -> - {ok, SrcBin} = file:read_file(Path), - Src = unicode:characters_to_list(SrcBin), + {ok, Src} = file:read_file(Path), Lexer = akh_lexer:new(Src), collect(Lexer, []).