add operator parsing
This commit is contained in:
parent
aa0fae3b38
commit
a0c03e8cfe
2 changed files with 53 additions and 13 deletions
|
@ -15,10 +15,38 @@ This module contains functions for tokenizing Akhamoth source code.
|
||||||
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
|
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
|
||||||
-define(is_space(C), C =:= $\s; C =:= $\t).
|
-define(is_space(C), C =:= $\s; C =:= $\t).
|
||||||
|
|
||||||
|
-define(is_op2(T), T =:= <<"|>">>; T =:= <<"=>">>; T =:= <<"->">>; T =:= <<"==">>).
|
||||||
|
-define(is_op1(T),
|
||||||
|
T =:= <<"+">>;
|
||||||
|
T =:= <<"-">>;
|
||||||
|
T =:= <<"*">>;
|
||||||
|
T =:= <<"/">>;
|
||||||
|
T =:= <<"=">>;
|
||||||
|
T =:= <<".">>
|
||||||
|
).
|
||||||
|
|
||||||
-doc """
|
-doc """
|
||||||
Tokens for which the category is the same as the content.
|
Tokens for which the category is the same as the content.
|
||||||
""".
|
""".
|
||||||
-type token_simple() :: '(' | ')' | '[' | ']' | '{' | '}'.
|
-type token_simple() ::
|
||||||
|
'.'
|
||||||
|
| '+'
|
||||||
|
| '-'
|
||||||
|
| '*'
|
||||||
|
| '/'
|
||||||
|
| '='
|
||||||
|
| '|>'
|
||||||
|
| '=>'
|
||||||
|
| '->'
|
||||||
|
| '=='
|
||||||
|
| ','
|
||||||
|
| ':'
|
||||||
|
| '('
|
||||||
|
| ')'
|
||||||
|
| '['
|
||||||
|
| ']'
|
||||||
|
| '{'
|
||||||
|
| '}'.
|
||||||
|
|
||||||
-doc """
|
-doc """
|
||||||
Tokens for which there is content beyond the category.
|
Tokens for which there is content beyond the category.
|
||||||
|
@ -62,16 +90,20 @@ next(Lexer)
|
||||||
Attempts to get the next token in the input.
|
Attempts to get the next token in the input.
|
||||||
""".
|
""".
|
||||||
-spec next(lexer()) -> none | {ok, token(), lexer()}.
|
-spec next(lexer()) -> none | {ok, token(), lexer()}.
|
||||||
next(#lexer{source = <<C, Rest/bytes>>} = Lx) when ?is_id_start(C) ->
|
next(#lexer{source = <<C, Rest/binary>>} = Lx) when ?is_id_start(C) ->
|
||||||
lex_id(Lx#lexer{source = Rest}, 1);
|
lex_id(Lx#lexer{source = Rest}, 1);
|
||||||
next(#lexer{source = <<C, Rest/bytes>>} = Lx) when ?is_digit(C) ->
|
next(#lexer{source = <<C, Rest/binary>>} = Lx) when ?is_digit(C) ->
|
||||||
lex_number(Lx#lexer{source = Rest}, 1);
|
lex_number(Lx#lexer{source = Rest}, 1);
|
||||||
next(#lexer{source = <<C, Rest/bytes>>, offset = Offset} = Lx) when ?is_space(C) ->
|
next(#lexer{source = <<C, Rest/binary>>} = Lx) when ?is_space(C) ->
|
||||||
next(Lx#lexer{source = Rest, offset = Offset + 1});
|
next(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 1});
|
||||||
next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lx) ->
|
next(#lexer{source = <<T:2/binary, Rest/binary>>, offset = Offset} = Lx) when ?is_op2(T) ->
|
||||||
new_line(Lx#lexer{source = Rest, offset = Offset + 1});
|
{ok, {binary_to_atom(T), Offset}, Lx#lexer{source = Rest, offset = Offset + 2}};
|
||||||
next(#lexer{source = <<$\r, $\n, Rest/bytes>>, offset = Offset} = Lx) ->
|
next(#lexer{source = <<T:1/binary, Rest/binary>>, offset = Offset} = Lx) when ?is_op1(T) ->
|
||||||
new_line(Lx#lexer{source = Rest, offset = Offset + 2});
|
{ok, {binary_to_atom(T), Offset}, Lx#lexer{source = Rest, offset = Offset + 1}};
|
||||||
|
next(#lexer{source = <<$\n, Rest/binary>>} = Lx) ->
|
||||||
|
new_line(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 1});
|
||||||
|
next(#lexer{source = <<$\r, $\n, Rest/binary>>} = Lx) ->
|
||||||
|
new_line(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 2});
|
||||||
next(#lexer{source = <<>>}) ->
|
next(#lexer{source = <<>>}) ->
|
||||||
none.
|
none.
|
||||||
|
|
||||||
|
@ -79,7 +111,7 @@ next(#lexer{source = <<>>}) ->
|
||||||
|
|
||||||
-spec lex_id(#lexer{}, non_neg_integer()) -> return().
|
-spec lex_id(#lexer{}, non_neg_integer()) -> return().
|
||||||
lex_id(
|
lex_id(
|
||||||
#lexer{source = <<C, Rest/bytes>>} = Lx,
|
#lexer{source = <<C, Rest/binary>>} = Lx,
|
||||||
Len
|
Len
|
||||||
) when ?is_id_start(C); ?is_digit(C) ->
|
) when ?is_id_start(C); ?is_digit(C) ->
|
||||||
lex_id(Lx#lexer{source = Rest}, Len + 1);
|
lex_id(Lx#lexer{source = Rest}, Len + 1);
|
||||||
|
@ -88,7 +120,7 @@ lex_id(#lexer{offset = Offset} = Lx, Len) ->
|
||||||
|
|
||||||
-spec lex_number(#lexer{}, non_neg_integer()) -> return().
|
-spec lex_number(#lexer{}, non_neg_integer()) -> return().
|
||||||
lex_number(
|
lex_number(
|
||||||
#lexer{source = <<C, Rest/bytes>>} = Lx,
|
#lexer{source = <<C, Rest/binary>>} = Lx,
|
||||||
Len
|
Len
|
||||||
) when ?is_digit(C); C =:= $_ ->
|
) when ?is_digit(C); C =:= $_ ->
|
||||||
lex_number(Lx#lexer{source = Rest}, Len + 1);
|
lex_number(Lx#lexer{source = Rest}, Len + 1);
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
-module(akhamoth).
|
-module(akhamoth).
|
||||||
|
|
||||||
-export([compile_file/1, compile_binary/1, collect/2]).
|
-export([
|
||||||
|
compile_file/1,
|
||||||
|
compile_binary/1
|
||||||
|
]).
|
||||||
|
|
||||||
compile_file(Path) ->
|
compile_file(Path) ->
|
||||||
{ok, Source} = file:read_file(Path),
|
{ok, Source} = file:read_file(Path),
|
||||||
|
@ -9,10 +12,15 @@ compile_file(Path) ->
|
||||||
compile_binary(Source) ->
|
compile_binary(Source) ->
|
||||||
Lx = akh_lex:new(Source),
|
Lx = akh_lex:new(Source),
|
||||||
{Tokens, SourceMap} = collect(Lx, []),
|
{Tokens, SourceMap} = collect(Lx, []),
|
||||||
lists:map(fun({T, O, _}) -> {T, akh_source_map:location(O, SourceMap)} end, Tokens).
|
lists:map(fun(T) -> location(T, SourceMap) end, Tokens).
|
||||||
|
|
||||||
collect(Lx, Acc) ->
|
collect(Lx, Acc) ->
|
||||||
case akh_lex:next(Lx) of
|
case akh_lex:next(Lx) of
|
||||||
none -> {Acc, akh_lex:source_map(Lx)};
|
none -> {Acc, akh_lex:source_map(Lx)};
|
||||||
{ok, T, L} -> collect(L, [T | Acc])
|
{ok, T, L} -> collect(L, [T | Acc])
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
location({T, O, _}, SourceMap) ->
|
||||||
|
{T, akh_source_map:location(O, SourceMap)};
|
||||||
|
location({T, O}, SourceMap) ->
|
||||||
|
{T, akh_source_map:location(O, SourceMap)}.
|
||||||
|
|
Loading…
Add table
Reference in a new issue