Compare commits

..

2 commits

Author SHA1 Message Date
74ee1e374e
first draft of string handling 2025-03-12 13:09:22 -04:00
294239abb9
recognize all simple tokens
still need to do delimiters but those require some special handling
2025-03-12 13:08:11 -04:00

View file

@ -16,13 +16,16 @@ This module contains functions for tokenizing Akhamoth source code.
-define(is_space(C), C =:= $\s; C =:= $\t).
-define(is_op2(T), T =:= <<"|>">>; T =:= <<"=>">>; T =:= <<"->">>; T =:= <<"==">>).
-define(is_op1(T),
-define(is_single(T),
T =:= <<"+">>;
T =:= <<"-">>;
T =:= <<"*">>;
T =:= <<"/">>;
T =:= <<"=">>;
T =:= <<".">>
T =:= <<".">>;
T =:= <<",">>;
T =:= <<";">>;
T =:= <<":">>
).
-doc """
@ -40,6 +43,7 @@ Tokens for which the category is the same as the content.
| '->'
| '=='
| ','
| ';'
| ':'
| '('
| ')'
@ -94,11 +98,13 @@ next(#lexer{source = <<C, Rest/binary>>} = Lx) when ?is_id_start(C) ->
lex_id(Lx#lexer{source = Rest}, 1);
next(#lexer{source = <<C, Rest/binary>>} = Lx) when ?is_digit(C) ->
lex_number(Lx#lexer{source = Rest}, 1);
next(#lexer{source = <<$", Rest/binary>>} = Lx) ->
handle_string(Lx#lexer{source = Rest}, 1);
next(#lexer{source = <<C, Rest/binary>>} = Lx) when ?is_space(C) ->
next(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 1});
next(#lexer{source = <<T:2/binary, Rest/binary>>, offset = Offset} = Lx) when ?is_op2(T) ->
{ok, {binary_to_atom(T), Offset}, Lx#lexer{source = Rest, offset = Offset + 2}};
next(#lexer{source = <<T:1/binary, Rest/binary>>, offset = Offset} = Lx) when ?is_op1(T) ->
next(#lexer{source = <<T:1/binary, Rest/binary>>, offset = Offset} = Lx) when ?is_single(T) ->
{ok, {binary_to_atom(T), Offset}, Lx#lexer{source = Rest, offset = Offset + 1}};
next(#lexer{source = <<$\n, Rest/binary>>} = Lx) ->
new_line(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 1});
@ -127,6 +133,14 @@ lex_number(
lex_number(#lexer{offset = Offset} = Lx, Len) ->
{ok, {number, Offset, Len}, Lx#lexer{offset = Offset + Len}}.
-spec handle_string(#lexer{}, non_neg_integer()) -> return().
handle_string(#lexer{source = <<$\\, $", Rest/binary>>} = Lx, Len) ->
handle_string(Lx#lexer{source = Rest}, Len + 2);
handle_string(#lexer{source = <<$", Rest/binary>>, offset = Offset} = Lx, Len) ->
{ok, {string, Offset, Len + 1}, Lx#lexer{source = Rest, offset = Offset + Len + 1}};
handle_string(#lexer{source = <<C, Rest/binary>>} = Lx, Len) when C =/= $" ->
handle_string(Lx#lexer{source = Rest}, Len + 1).
-spec new_line(#lexer{}) -> return().
new_line(#lexer{source_map = SourceMap} = Lx) ->
next(Lx#lexer{source_map = akh_source_map:insert(Lx#lexer.offset, SourceMap)}).