-module(akh_lex). -moduledoc """ This module contains functions for tokenizing Akhamoth source code. """. -export([ new/1, source_map/1, next/1 ]). -export_type([lexer/0]). -define(is_digit(C), C >= $0, C =< $9). -define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_). -define(is_space(C), C =:= $\s; C =:= $\t). -doc """ Tokens for which the category is the same as the content. """. -type token_simple() :: '(' | ')' | '[' | ']' | '{' | '}'. -doc """ Tokens for which there is content beyond the category. """. -type token_complex() :: id | number | unknown. -doc """ A token in the input stream. """. -type token() :: {token_simple(), Position :: non_neg_integer() | inserted} | {token_complex(), Position :: non_neg_integer(), Length :: pos_integer()}. -record(lexer, { source :: binary(), offset = 0 :: non_neg_integer(), source_map :: akh_source_map:source_map() }). -opaque lexer() :: #lexer{}. -type return() :: none | {ok, token(), lexer()}. %%% exports -doc """ Initializes a lexer to tokenize the given binary. """. -spec new(binary()) -> lexer(). new(Source) -> #lexer{source = Source, source_map = akh_source_map:new()}. -doc """ Returns the source map for a lexer. """. -spec source_map(lexer()) -> akh_source_map:source_map(). source_map(#lexer{source_map = SourceMap}) -> SourceMap. -doc """ next(Lexer) Attempts to get the next token in the input. """. -spec next(lexer()) -> none | {ok, token(), lexer()}. next(#lexer{source = <>} = Lx) when ?is_id_start(C) -> lex_id(Lx#lexer{source = Rest}, 1); next(#lexer{source = <>} = Lx) when ?is_digit(C) -> lex_number(Lx#lexer{source = Rest}, 1); next(#lexer{source = <>, offset = Offset} = Lx) when ?is_space(C) -> next(Lx#lexer{source = Rest, offset = Offset + 1}); next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lx) -> new_line(Lx#lexer{source = Rest, offset = Offset + 1}); next(#lexer{source = <<$\r, $\n, Rest/bytes>>, offset = Offset} = Lx) -> new_line(Lx#lexer{source = Rest, offset = Offset + 2}); next(#lexer{source = <<>>}) -> none. %%% local functions -spec lex_id(#lexer{}, non_neg_integer()) -> return(). lex_id( #lexer{source = <>} = Lx, Len ) when ?is_id_start(C); ?is_digit(C) -> lex_id(Lx#lexer{source = Rest}, Len + 1); lex_id(#lexer{offset = Offset} = Lx, Len) -> {ok, {id, Offset, Len}, Lx#lexer{offset = Offset + Len}}. -spec lex_number(#lexer{}, non_neg_integer()) -> return(). lex_number( #lexer{source = <>} = Lx, Len ) when ?is_digit(C); C =:= $_ -> lex_number(Lx#lexer{source = Rest}, Len + 1); lex_number(#lexer{offset = Offset} = Lx, Len) -> {ok, {number, Offset, Len}, Lx#lexer{offset = Offset + Len}}. -spec new_line(#lexer{}) -> return(). new_line(#lexer{source_map = SourceMap} = Lx) -> next(Lx#lexer{source_map = akh_source_map:insert(Lx#lexer.offset, SourceMap)}).