more rewrite of lexer, add simple ints

This commit is contained in:
wires 2025-03-11 23:00:55 -04:00
parent 5af4838924
commit e2ba4d16f4
Signed by: wires
SSH key fingerprint: SHA256:9GtP+M3O2IivPDlw1UY872UPUuJH2gI0yG6ExBxaaiM

View file

@ -1,54 +1,64 @@
-module(akh_lexer). -module(akh_lexer).
-include("akhamoth.hrl"). -moduledoc """
-export([ This module contains functions for tokenizing Akhamoth source code.
new/1, """.
next/1
]).
-type token_kind() :: -export([new/1, next/1]).
binary()
| '{'
| '}'
| '['
| ']'
| '('
| ')'
| ','
| ';'
| ':'
| '.'
| '->'
| '+'.
-type token() :: {token_kind(), non_neg_integer() | inserted}. -define(is_digit(C), C >= $0, C =< $9).
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
-define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_). -doc """
A token in the input stream
""".
-type token() :: any().
-record(lexer, { -record(lexer, {
source :: binary(), source :: binary(),
offset = 0 :: non_neg_integer() offset = 0 :: non_neg_integer()
}). }).
%%% exports
-doc """
Initializes a lexer to tokenize the given binary.
""".
-spec new(binary()) -> #lexer{}. -spec new(binary()) -> #lexer{}.
new(Source) -> #lexer{source = Source}. new(Source) -> #lexer{source = Source}.
-doc """
Attempts to get the next token in the input.
""".
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) -> next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_id_start(C) ->
next(Lexer#lexer{source = Rest, offset = Offset + 1}); lex_id(Lx, 1);
next(#lexer{source = <<C, _/bytes>>} = Lexer) when ?is_id(C) -> next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_digit(C) ->
lex_id(Lexer, 1); lex_number(Lx, 1, C - $0);
next(#lexer{source = <<>>}) -> next(#lexer{source = <<>>}) ->
none. none.
-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}. %%% local functions
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) ->
lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) ->
C = binary:at(Source, Len), C = binary:at(Source, Len),
if if
?is_id(C) -> ?is_id_start(C); ?is_digit(C) ->
lex_id(Lexer, Len + 1); lex_id(Lx, Len + 1);
true -> true ->
{Id, Rest} = split_binary(Source, Len), {Id, Rest} = split_binary(Source, Len),
{ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}} {ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}}
end; end;
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) -> lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) ->
{ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}. {ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) ->
C = binary:at(Source, Len),
if
?is_digit(C) ->
lex_number(Lx, Len + 1, Acc * 10 + C - $0);
true ->
{_, Rest} = split_binary(Source, Len),
{ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}}
end;
lex_number(#lexer{offset = Offset} = Lx, Len, Acc) ->
{ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.