From a0c03e8cfe9a86667ea17a19901fe5534147a094 Mon Sep 17 00:00:00 2001 From: wires Date: Wed, 12 Mar 2025 11:15:08 -0400 Subject: [PATCH] add operator parsing --- src/akh_lex.erl | 54 ++++++++++++++++++++++++++++++++++++++---------- src/akhamoth.erl | 12 +++++++++-- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/akh_lex.erl b/src/akh_lex.erl index d186cfe..c35dca5 100644 --- a/src/akh_lex.erl +++ b/src/akh_lex.erl @@ -15,10 +15,38 @@ This module contains functions for tokenizing Akhamoth source code. -define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_). -define(is_space(C), C =:= $\s; C =:= $\t). +-define(is_op2(T), T =:= <<"|>">>; T =:= <<"=>">>; T =:= <<"->">>; T =:= <<"==">>). +-define(is_op1(T), + T =:= <<"+">>; + T =:= <<"-">>; + T =:= <<"*">>; + T =:= <<"/">>; + T =:= <<"=">>; + T =:= <<".">> +). + -doc """ Tokens for which the category is the same as the content. """. --type token_simple() :: '(' | ')' | '[' | ']' | '{' | '}'. +-type token_simple() :: + '.' + | '+' + | '-' + | '*' + | '/' + | '=' + | '|>' + | '=>' + | '->' + | '==' + | ',' + | ':' + | '(' + | ')' + | '[' + | ']' + | '{' + | '}'. -doc """ Tokens for which there is content beyond the category. @@ -62,16 +90,20 @@ next(Lexer) Attempts to get the next token in the input. """. -spec next(lexer()) -> none | {ok, token(), lexer()}. -next(#lexer{source = <>} = Lx) when ?is_id_start(C) -> +next(#lexer{source = <>} = Lx) when ?is_id_start(C) -> lex_id(Lx#lexer{source = Rest}, 1); -next(#lexer{source = <>} = Lx) when ?is_digit(C) -> +next(#lexer{source = <>} = Lx) when ?is_digit(C) -> lex_number(Lx#lexer{source = Rest}, 1); -next(#lexer{source = <>, offset = Offset} = Lx) when ?is_space(C) -> - next(Lx#lexer{source = Rest, offset = Offset + 1}); -next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lx) -> - new_line(Lx#lexer{source = Rest, offset = Offset + 1}); -next(#lexer{source = <<$\r, $\n, Rest/bytes>>, offset = Offset} = Lx) -> - new_line(Lx#lexer{source = Rest, offset = Offset + 2}); +next(#lexer{source = <>} = Lx) when ?is_space(C) -> + next(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 1}); +next(#lexer{source = <>, offset = Offset} = Lx) when ?is_op2(T) -> + {ok, {binary_to_atom(T), Offset}, Lx#lexer{source = Rest, offset = Offset + 2}}; +next(#lexer{source = <>, offset = Offset} = Lx) when ?is_op1(T) -> + {ok, {binary_to_atom(T), Offset}, Lx#lexer{source = Rest, offset = Offset + 1}}; +next(#lexer{source = <<$\n, Rest/binary>>} = Lx) -> + new_line(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 1}); +next(#lexer{source = <<$\r, $\n, Rest/binary>>} = Lx) -> + new_line(Lx#lexer{source = Rest, offset = Lx#lexer.offset + 2}); next(#lexer{source = <<>>}) -> none. @@ -79,7 +111,7 @@ next(#lexer{source = <<>>}) -> -spec lex_id(#lexer{}, non_neg_integer()) -> return(). lex_id( - #lexer{source = <>} = Lx, + #lexer{source = <>} = Lx, Len ) when ?is_id_start(C); ?is_digit(C) -> lex_id(Lx#lexer{source = Rest}, Len + 1); @@ -88,7 +120,7 @@ lex_id(#lexer{offset = Offset} = Lx, Len) -> -spec lex_number(#lexer{}, non_neg_integer()) -> return(). lex_number( - #lexer{source = <>} = Lx, + #lexer{source = <>} = Lx, Len ) when ?is_digit(C); C =:= $_ -> lex_number(Lx#lexer{source = Rest}, Len + 1); diff --git a/src/akhamoth.erl b/src/akhamoth.erl index 57caf83..867aabc 100644 --- a/src/akhamoth.erl +++ b/src/akhamoth.erl @@ -1,6 +1,9 @@ -module(akhamoth). --export([compile_file/1, compile_binary/1, collect/2]). +-export([ + compile_file/1, + compile_binary/1 +]). compile_file(Path) -> {ok, Source} = file:read_file(Path), @@ -9,10 +12,15 @@ compile_file(Path) -> compile_binary(Source) -> Lx = akh_lex:new(Source), {Tokens, SourceMap} = collect(Lx, []), - lists:map(fun({T, O, _}) -> {T, akh_source_map:location(O, SourceMap)} end, Tokens). + lists:map(fun(T) -> location(T, SourceMap) end, Tokens). collect(Lx, Acc) -> case akh_lex:next(Lx) of none -> {Acc, akh_lex:source_map(Lx)}; {ok, T, L} -> collect(L, [T | Acc]) end. + +location({T, O, _}, SourceMap) -> + {T, akh_source_map:location(O, SourceMap)}; +location({T, O}, SourceMap) -> + {T, akh_source_map:location(O, SourceMap)}.