From 3ab3c94411b1d38c8f0f6643ab997efb8d359b2c Mon Sep 17 00:00:00 2001 From: wires Date: Wed, 26 Feb 2025 11:13:18 -0500 Subject: [PATCH] rewrite to iterator style --- erlang_ls.config | 1 - src/akh_lexer.erl | 76 +++++++++++++++++++++++++++++++---------------- src/akhamoth.erl | 9 +++++- src/akhamoth.hrl | 1 + 4 files changed, 60 insertions(+), 27 deletions(-) create mode 100644 src/akhamoth.hrl diff --git a/erlang_ls.config b/erlang_ls.config index 59c560a..1fb6752 100644 --- a/erlang_ls.config +++ b/erlang_ls.config @@ -1,3 +1,2 @@ formatting: formatter: erlfmt - width: 80 diff --git a/src/akh_lexer.erl b/src/akh_lexer.erl index 1c2c09a..a5388bc 100644 --- a/src/akh_lexer.erl +++ b/src/akh_lexer.erl @@ -1,10 +1,31 @@ -module(akh_lexer). --include("akh_source_map.hrl"). --export([tokenize/1]). +-include("akhamoth.hrl"). +-export([ + new/1, + next/1 +]). + +-define(open_delim(T), + T =:= $(; + T =:= $[; + T =:= ${ +). + +-define(close_delim(T), + T =:= $); + T =:= $]; + T =:= $} +). + +-define(op1(T), + T =:= $,; + T =:= $;; + T =:= $.; + T =:= $+ +). -type token_kind() :: - line_comment - | block_comment + comment | atom | identifier | integer @@ -21,26 +42,31 @@ | '->' | '+'. --type token() :: {token_kind(), span()}. +-define(LOC(Line, Column), {{line, Line}, {column, Column}}). +-type location() :: {{line, pos_integer()}, {column, pos_integer()}} | inserted. --spec tokenize(string()) -> {ok, source_map(), [token()]}. -tokenize(Text) -> tokenize(Text, 0, 1, akh_source_map:empty(), []). +-type token() :: {token_kind(), location()}. --spec tokenize( - string(), - Offset :: non_neg_integer(), - Line :: pos_integer(), - SourceMap :: source_map(), - [token()] -) -> {ok, source_map(), [token()]}. -tokenize([], _, _, SourceMap, Tokens) -> - {ok, SourceMap, Tokens}; -tokenize([$( | T], Offset, Line, SourceMap, Tokens) -> - tokenize(T, Offset + 1, Line, SourceMap, [{'(', {Offset, 1}} | Tokens]); -tokenize([$) | T], Offset, Line, SourceMap, Tokens) -> - tokenize(T, Offset + 1, Line, SourceMap, [{')', {Offset, 1}} | Tokens]); -tokenize([$\n | T], Offset, Line, SourceMap, Tokens) -> - NewMap = akh_source_map:insert(Offset, Line, SourceMap), - tokenize(T, Offset + 1, Line + 1, NewMap, Tokens); -tokenize([_ | T], Offset, Line, SourceMap, Tokens) -> - tokenize(T, Offset + 1, Line, SourceMap, [{atom, {Offset, 0}} | Tokens]). +-record(lexer, { + source :: string(), + line = 1 :: pos_integer(), + column = 1 :: pos_integer(), + delim_stack = [] :: [token()] +}). + +-spec new(string()) -> #lexer{}. +new(Source) -> #lexer{source = Source}. + +-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. +next(#lexer{source = [], delim_stack = []}) -> + none; +next(#lexer{source = [$\n | Rest], line = Line} = Lexer) -> + next(Lexer#lexer{source = Rest, line = Line + 1, column = 1}); +next(#lexer{source = [T | Rest], line = Line, column = Column} = Lexer) when ?op1(T) -> + Token = {list_to_atom([T]), ?LOC(Line, Column)}, + {ok, Token, Lexer#lexer{source = Rest, column = Column + 1}}; +next(#lexer{source = [$-, $> | Rest], line = Line, column = Column} = Lexer) -> + Token = {'->', ?LOC(Line, Column)}, + {ok, Token, Lexer#lexer{source = Rest, column = Column + 2}}; +next(_) -> + none. diff --git a/src/akhamoth.erl b/src/akhamoth.erl index bfb9730..84bb7bd 100644 --- a/src/akhamoth.erl +++ b/src/akhamoth.erl @@ -6,4 +6,11 @@ compile(Path) -> {ok, SrcBin} = file:read_file(Path), Src = unicode:characters_to_list(SrcBin), - akh_lexer:tokenize(Src). + Lexer = akh_lexer:new(Src), + collect(Lexer, []). + +collect(Lexer, Acc) -> + case akh_lexer:next(Lexer) of + none -> Acc; + {ok, T, L} -> collect(L, [T | Acc]) + end. diff --git a/src/akhamoth.hrl b/src/akhamoth.hrl new file mode 100644 index 0000000..48a2d39 --- /dev/null +++ b/src/akhamoth.hrl @@ -0,0 +1 @@ +-type err() :: any().