more rewrite of lexer, add simple ints
This commit is contained in:
parent
5af4838924
commit
e2ba4d16f4
1 changed files with 42 additions and 32 deletions
|
@ -1,54 +1,64 @@
|
||||||
-module(akh_lexer).
|
-module(akh_lexer).
|
||||||
-include("akhamoth.hrl").
|
-moduledoc """
|
||||||
-export([
|
This module contains functions for tokenizing Akhamoth source code.
|
||||||
new/1,
|
""".
|
||||||
next/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
-type token_kind() ::
|
-export([new/1, next/1]).
|
||||||
binary()
|
|
||||||
| '{'
|
|
||||||
| '}'
|
|
||||||
| '['
|
|
||||||
| ']'
|
|
||||||
| '('
|
|
||||||
| ')'
|
|
||||||
| ','
|
|
||||||
| ';'
|
|
||||||
| ':'
|
|
||||||
| '.'
|
|
||||||
| '->'
|
|
||||||
| '+'.
|
|
||||||
|
|
||||||
-type token() :: {token_kind(), non_neg_integer() | inserted}.
|
-define(is_digit(C), C >= $0, C =< $9).
|
||||||
|
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
|
||||||
|
|
||||||
-define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_).
|
-doc """
|
||||||
|
A token in the input stream
|
||||||
|
""".
|
||||||
|
-type token() :: any().
|
||||||
|
|
||||||
-record(lexer, {
|
-record(lexer, {
|
||||||
source :: binary(),
|
source :: binary(),
|
||||||
offset = 0 :: non_neg_integer()
|
offset = 0 :: non_neg_integer()
|
||||||
}).
|
}).
|
||||||
|
|
||||||
|
%%% exports
|
||||||
|
|
||||||
|
-doc """
|
||||||
|
Initializes a lexer to tokenize the given binary.
|
||||||
|
""".
|
||||||
-spec new(binary()) -> #lexer{}.
|
-spec new(binary()) -> #lexer{}.
|
||||||
new(Source) -> #lexer{source = Source}.
|
new(Source) -> #lexer{source = Source}.
|
||||||
|
|
||||||
|
-doc """
|
||||||
|
Attempts to get the next token in the input.
|
||||||
|
""".
|
||||||
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
|
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
|
||||||
next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) ->
|
next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_id_start(C) ->
|
||||||
next(Lexer#lexer{source = Rest, offset = Offset + 1});
|
lex_id(Lx, 1);
|
||||||
next(#lexer{source = <<C, _/bytes>>} = Lexer) when ?is_id(C) ->
|
next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_digit(C) ->
|
||||||
lex_id(Lexer, 1);
|
lex_number(Lx, 1, C - $0);
|
||||||
next(#lexer{source = <<>>}) ->
|
next(#lexer{source = <<>>}) ->
|
||||||
none.
|
none.
|
||||||
|
|
||||||
-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}.
|
%%% local functions
|
||||||
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) ->
|
|
||||||
|
lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) ->
|
||||||
C = binary:at(Source, Len),
|
C = binary:at(Source, Len),
|
||||||
if
|
if
|
||||||
?is_id(C) ->
|
?is_id_start(C); ?is_digit(C) ->
|
||||||
lex_id(Lexer, Len + 1);
|
lex_id(Lx, Len + 1);
|
||||||
true ->
|
true ->
|
||||||
{Id, Rest} = split_binary(Source, Len),
|
{Id, Rest} = split_binary(Source, Len),
|
||||||
{ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}}
|
{ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}}
|
||||||
end;
|
end;
|
||||||
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) ->
|
lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) ->
|
||||||
{ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}.
|
{ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
|
||||||
|
|
||||||
|
lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) ->
|
||||||
|
C = binary:at(Source, Len),
|
||||||
|
if
|
||||||
|
?is_digit(C) ->
|
||||||
|
lex_number(Lx, Len + 1, Acc * 10 + C - $0);
|
||||||
|
true ->
|
||||||
|
{_, Rest} = split_binary(Source, Len),
|
||||||
|
{ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}}
|
||||||
|
end;
|
||||||
|
lex_number(#lexer{offset = Offset} = Lx, Len, Acc) ->
|
||||||
|
{ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
|
||||||
|
|
Loading…
Add table
Reference in a new issue