-module(akh_lexer). -moduledoc """ This module contains functions for tokenizing Akhamoth source code. """. -export([new/1, next/1]). -define(is_digit(C), C >= $0, C =< $9). -define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_). -doc """ A token in the input stream """. -type token() :: any(). -record(lexer, { source :: binary(), offset = 0 :: non_neg_integer() }). %%% exports -doc """ Initializes a lexer to tokenize the given binary. """. -spec new(binary()) -> #lexer{}. new(Source) -> #lexer{source = Source}. -doc """ Attempts to get the next token in the input. """. -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. next(#lexer{source = <>} = Lx) when ?is_id_start(C) -> lex_id(Lx, 1); next(#lexer{source = <>} = Lx) when ?is_digit(C) -> lex_number(Lx, 1, C - $0); next(#lexer{source = <<>>}) -> none. %%% local functions lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) -> C = binary:at(Source, Len), if ?is_id_start(C); ?is_digit(C) -> lex_id(Lx, Len + 1); true -> {Id, Rest} = split_binary(Source, Len), {ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}} end; lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) -> {ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}. lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) -> C = binary:at(Source, Len), if ?is_digit(C) -> lex_number(Lx, Len + 1, Acc * 10 + C - $0); true -> {_, Rest} = split_binary(Source, Len), {ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}} end; lex_number(#lexer{offset = Offset} = Lx, Len, Acc) -> {ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.