diff --git a/src/akh_lexer.erl b/src/akh_lexer.erl index cdf53c6..7385b9f 100644 --- a/src/akh_lexer.erl +++ b/src/akh_lexer.erl @@ -1,54 +1,64 @@ -module(akh_lexer). --include("akhamoth.hrl"). --export([ - new/1, - next/1 -]). +-moduledoc """ +This module contains functions for tokenizing Akhamoth source code. +""". --type token_kind() :: - binary() - | '{' - | '}' - | '[' - | ']' - | '(' - | ')' - | ',' - | ';' - | ':' - | '.' - | '->' - | '+'. +-export([new/1, next/1]). --type token() :: {token_kind(), non_neg_integer() | inserted}. +-define(is_digit(C), C >= $0, C =< $9). +-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_). --define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_). +-doc """ +A token in the input stream +""". +-type token() :: any(). -record(lexer, { source :: binary(), offset = 0 :: non_neg_integer() }). +%%% exports + +-doc """ +Initializes a lexer to tokenize the given binary. +""". -spec new(binary()) -> #lexer{}. new(Source) -> #lexer{source = Source}. +-doc """ +Attempts to get the next token in the input. +""". -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. -next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) -> - next(Lexer#lexer{source = Rest, offset = Offset + 1}); -next(#lexer{source = <>} = Lexer) when ?is_id(C) -> - lex_id(Lexer, 1); +next(#lexer{source = <>} = Lx) when ?is_id_start(C) -> + lex_id(Lx, 1); +next(#lexer{source = <>} = Lx) when ?is_digit(C) -> + lex_number(Lx, 1, C - $0); next(#lexer{source = <<>>}) -> none. --spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}. -lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) -> +%%% local functions + +lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) -> C = binary:at(Source, Len), if - ?is_id(C) -> - lex_id(Lexer, Len + 1); + ?is_id_start(C); ?is_digit(C) -> + lex_id(Lx, Len + 1); true -> {Id, Rest} = split_binary(Source, Len), - {ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}} + {ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}} end; -lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) -> - {ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}. +lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) -> + {ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}. + +lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) -> + C = binary:at(Source, Len), + if + ?is_digit(C) -> + lex_number(Lx, Len + 1, Acc * 10 + C - $0); + true -> + {_, Rest} = split_binary(Source, Len), + {ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}} + end; +lex_number(#lexer{offset = Offset} = Lx, Len, Acc) -> + {ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.