Compare commits
No commits in common. "d3e0bab29abe9495399c82accaa688bb6abac40e" and "1c5e487e6125766a6699d9019f77fb2f4195823d" have entirely different histories.
d3e0bab29a
...
1c5e487e61
4 changed files with 68 additions and 147 deletions
100
src/akh_lex.erl
100
src/akh_lex.erl
|
@ -1,100 +0,0 @@
|
||||||
-module(akh_lex).
|
|
||||||
-moduledoc """
|
|
||||||
This module contains functions for tokenizing Akhamoth source code.
|
|
||||||
""".
|
|
||||||
|
|
||||||
-export([
|
|
||||||
new/1,
|
|
||||||
source_map/1,
|
|
||||||
next/1
|
|
||||||
]).
|
|
||||||
|
|
||||||
-export_type([lexer/0]).
|
|
||||||
|
|
||||||
-define(is_digit(C), C >= $0, C =< $9).
|
|
||||||
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
|
|
||||||
-define(is_space(C), C =:= $\s; C =:= $\t).
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Tokens for which the category is the same as the content.
|
|
||||||
""".
|
|
||||||
-type token_simple() :: '(' | ')' | '[' | ']' | '{' | '}'.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Tokens for which there is content beyond the category.
|
|
||||||
""".
|
|
||||||
-type token_complex() :: id | number | unknown.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
A token in the input stream.
|
|
||||||
""".
|
|
||||||
-type token() ::
|
|
||||||
{token_simple(), Position :: non_neg_integer() | inserted}
|
|
||||||
| {token_complex(), Position :: non_neg_integer(), Length :: pos_integer()}.
|
|
||||||
|
|
||||||
-record(lexer, {
|
|
||||||
source :: binary(),
|
|
||||||
offset = 0 :: non_neg_integer(),
|
|
||||||
source_map :: akh_source_map:source_map()
|
|
||||||
}).
|
|
||||||
|
|
||||||
-opaque lexer() :: #lexer{}.
|
|
||||||
|
|
||||||
-type return() :: none | {ok, token(), lexer()}.
|
|
||||||
|
|
||||||
%%% exports
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Initializes a lexer to tokenize the given binary.
|
|
||||||
""".
|
|
||||||
-spec new(binary()) -> lexer().
|
|
||||||
new(Source) -> #lexer{source = Source, source_map = akh_source_map:new()}.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Returns the source map for a lexer.
|
|
||||||
""".
|
|
||||||
-spec source_map(lexer()) -> akh_source_map:source_map().
|
|
||||||
source_map(#lexer{source_map = SourceMap}) -> SourceMap.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
next(Lexer)
|
|
||||||
|
|
||||||
Attempts to get the next token in the input.
|
|
||||||
""".
|
|
||||||
-spec next(lexer()) -> none | {ok, token(), lexer()}.
|
|
||||||
next(#lexer{source = <<C, Rest/bytes>>} = Lx) when ?is_id_start(C) ->
|
|
||||||
lex_id(Lx#lexer{source = Rest}, 1);
|
|
||||||
next(#lexer{source = <<C, Rest/bytes>>} = Lx) when ?is_digit(C) ->
|
|
||||||
lex_number(Lx#lexer{source = Rest}, 1);
|
|
||||||
next(#lexer{source = <<C, Rest/bytes>>, offset = Offset} = Lx) when ?is_space(C) ->
|
|
||||||
next(Lx#lexer{source = Rest, offset = Offset + 1});
|
|
||||||
next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lx) ->
|
|
||||||
new_line(Lx#lexer{source = Rest, offset = Offset + 1});
|
|
||||||
next(#lexer{source = <<$\r, $\n, Rest/bytes>>, offset = Offset} = Lx) ->
|
|
||||||
new_line(Lx#lexer{source = Rest, offset = Offset + 2});
|
|
||||||
next(#lexer{source = <<>>}) ->
|
|
||||||
none.
|
|
||||||
|
|
||||||
%%% local functions
|
|
||||||
|
|
||||||
-spec lex_id(#lexer{}, non_neg_integer()) -> return().
|
|
||||||
lex_id(
|
|
||||||
#lexer{source = <<C, Rest/bytes>>} = Lx,
|
|
||||||
Len
|
|
||||||
) when ?is_id_start(C); ?is_digit(C) ->
|
|
||||||
lex_id(Lx#lexer{source = Rest}, Len + 1);
|
|
||||||
lex_id(#lexer{offset = Offset} = Lx, Len) ->
|
|
||||||
{ok, {id, Offset, Len}, Lx#lexer{offset = Offset + Len}}.
|
|
||||||
|
|
||||||
-spec lex_number(#lexer{}, non_neg_integer()) -> return().
|
|
||||||
lex_number(
|
|
||||||
#lexer{source = <<C, Rest/bytes>>} = Lx,
|
|
||||||
Len
|
|
||||||
) when ?is_digit(C); C =:= $_ ->
|
|
||||||
lex_number(Lx#lexer{source = Rest}, Len + 1);
|
|
||||||
lex_number(#lexer{offset = Offset} = Lx, Len) ->
|
|
||||||
{ok, {number, Offset, Len}, Lx#lexer{offset = Offset + Len}}.
|
|
||||||
|
|
||||||
-spec new_line(#lexer{}) -> return().
|
|
||||||
new_line(#lexer{source_map = SourceMap} = Lx) ->
|
|
||||||
next(Lx#lexer{source_map = akh_source_map:insert(Lx#lexer.offset, SourceMap)}).
|
|
64
src/akh_lexer.erl
Normal file
64
src/akh_lexer.erl
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
-module(akh_lexer).
|
||||||
|
-moduledoc """
|
||||||
|
This module contains functions for tokenizing Akhamoth source code.
|
||||||
|
""".
|
||||||
|
|
||||||
|
-export([new/1, next/1]).
|
||||||
|
|
||||||
|
-define(is_digit(C), C >= $0, C =< $9).
|
||||||
|
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
|
||||||
|
|
||||||
|
-doc """
|
||||||
|
A token in the input stream
|
||||||
|
""".
|
||||||
|
-type token() :: any().
|
||||||
|
|
||||||
|
-record(lexer, {
|
||||||
|
source :: binary(),
|
||||||
|
offset = 0 :: non_neg_integer()
|
||||||
|
}).
|
||||||
|
|
||||||
|
%%% exports
|
||||||
|
|
||||||
|
-doc """
|
||||||
|
Initializes a lexer to tokenize the given binary.
|
||||||
|
""".
|
||||||
|
-spec new(binary()) -> #lexer{}.
|
||||||
|
new(Source) -> #lexer{source = Source}.
|
||||||
|
|
||||||
|
-doc """
|
||||||
|
Attempts to get the next token in the input.
|
||||||
|
""".
|
||||||
|
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
|
||||||
|
next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_id_start(C) ->
|
||||||
|
lex_id(Lx, 1);
|
||||||
|
next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_digit(C) ->
|
||||||
|
lex_number(Lx, 1, C - $0);
|
||||||
|
next(#lexer{source = <<>>}) ->
|
||||||
|
none.
|
||||||
|
|
||||||
|
%%% local functions
|
||||||
|
|
||||||
|
lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) ->
|
||||||
|
C = binary:at(Source, Len),
|
||||||
|
if
|
||||||
|
?is_id_start(C); ?is_digit(C) ->
|
||||||
|
lex_id(Lx, Len + 1);
|
||||||
|
true ->
|
||||||
|
{Id, Rest} = split_binary(Source, Len),
|
||||||
|
{ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}}
|
||||||
|
end;
|
||||||
|
lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) ->
|
||||||
|
{ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
|
||||||
|
|
||||||
|
lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) ->
|
||||||
|
C = binary:at(Source, Len),
|
||||||
|
if
|
||||||
|
?is_digit(C) ->
|
||||||
|
lex_number(Lx, Len + 1, Acc * 10 + C - $0);
|
||||||
|
true ->
|
||||||
|
{_, Rest} = split_binary(Source, Len),
|
||||||
|
{ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}}
|
||||||
|
end;
|
||||||
|
lex_number(#lexer{offset = Offset} = Lx, Len, Acc) ->
|
||||||
|
{ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
|
|
@ -1,42 +0,0 @@
|
||||||
-module(akh_source_map).
|
|
||||||
-moduledoc """
|
|
||||||
The source map translates the byte offsets returned by the lexer into lines and
|
|
||||||
columns for display. Internally, it uses the balanced binary tree from
|
|
||||||
`m:gb_trees` to avoid linear lookup times.
|
|
||||||
""".
|
|
||||||
|
|
||||||
-export([
|
|
||||||
new/0,
|
|
||||||
insert/2,
|
|
||||||
location/2
|
|
||||||
]).
|
|
||||||
|
|
||||||
-export_type([source_map/0]).
|
|
||||||
|
|
||||||
-opaque source_map() :: {gb_trees:tree(non_neg_integer(), pos_integer()), Line :: pos_integer()}.
|
|
||||||
|
|
||||||
-type location() :: {Line :: pos_integer(), Column :: pos_integer()}.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Returns a new source map.
|
|
||||||
""".
|
|
||||||
-spec new() -> source_map().
|
|
||||||
new() -> {gb_trees:empty(), 1}.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Insert the next line break at byte offset `Offset`.
|
|
||||||
""".
|
|
||||||
-spec insert(Offset :: non_neg_integer(), SourceMap :: source_map()) -> source_map().
|
|
||||||
insert(Offset, {Tree, Line}) -> {gb_trees:insert(Offset - 1, Line + 1, Tree), Line + 1}.
|
|
||||||
|
|
||||||
-doc """
|
|
||||||
Get line and column info for byte offset `Offset`.
|
|
||||||
""".
|
|
||||||
-spec location(Offset :: non_neg_integer(), SourceMap :: source_map()) -> location().
|
|
||||||
location(Offset, {Tree, _}) ->
|
|
||||||
case gb_trees:smaller(Offset, Tree) of
|
|
||||||
{Start, Line} ->
|
|
||||||
{Line, Offset - Start};
|
|
||||||
none ->
|
|
||||||
{1, Offset}
|
|
||||||
end.
|
|
|
@ -7,12 +7,11 @@ compile_file(Path) ->
|
||||||
compile_binary(Source).
|
compile_binary(Source).
|
||||||
|
|
||||||
compile_binary(Source) ->
|
compile_binary(Source) ->
|
||||||
Lx = akh_lex:new(Source),
|
Lx = akh_lexer:new(Source),
|
||||||
{Tokens, SourceMap} = collect(Lx, []),
|
collect(Lx, []).
|
||||||
lists:map(fun({T, O, _}) -> {T, akh_source_map:location(O, SourceMap)} end, Tokens).
|
|
||||||
|
|
||||||
collect(Lx, Acc) ->
|
collect(Lx, Acc) ->
|
||||||
case akh_lex:next(Lx) of
|
case akh_lexer:next(Lx) of
|
||||||
none -> {Acc, akh_lex:source_map(Lx)};
|
none -> Acc;
|
||||||
{ok, T, L} -> collect(L, [T | Acc])
|
{ok, T, L} -> collect(L, [T | Acc])
|
||||||
end.
|
end.
|
||||||
|
|
Loading…
Add table
Reference in a new issue