start writing up some docs

mostly to help me remember what the plan is
more rewrite of lexer, add simple ints
2025-03-11 23:02:07 -04:00 · 2025-03-11 23:00:55 -04:00 · 2025-03-11 22:59:44 -04:00 · 2025-03-11 22:59:16 -04:00
7 changed files with 129 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,4 @@
 # Akhamoth
-statically typed functional language
+An attempt at making something very much like Elixir, but with syntax I prefer
 and static typing as a mandatory part of the language from the beginning.
--- a/pages/syntax_reference.md
+++ b/pages/syntax_reference.md
@ -0,0 +1,66 @@
 # Syntax Reference
 In Akhamoth, everything is an expression.
 ## Literals
 ### Identifiers
 Identifiers are composed of ASCII letters, digits, and underscores and may not
 begin with a digit. The preferred naming convention is `UpperCamelCase` for type
 names, `snake_case` for variables, functions, and modules, and
 `SCREAMING_SNAKE_CASE` for global constants. Identifiers, beginning with an
 underscore are typically intentionally unused variables and will silence
 warnings about this from the compiler.
 > #### Unicode Identifiers {: .info}
 >
 > For version 1.0 of Akhamoth, the goal is only to support ASCII identifiers in
 > order to make parsing easier. In the future however, it would be good to look
 > at implementing [UAX #31](https://www.unicode.org/reports/tr31/) unicode
 > identifiers.
 ### Atoms
 Atoms have the exact same syntax as identifiers, but are prefixed with `:`, e.g.
 `:akhamoth`.
 ### Strings
 A String literal consists of `"` followed by any number of other characters and
 then another `"`. Currently there is no support for character escapes of any
 kind. This will be rectified before version 1.0.
 ### Integers
 Integer literals must begin with a digit, followed by any number of digits and
 underscores. Underscores are intended to be used for grouping digits in long
 numbers, e.g. `1_000_000_000`.
 ## Expressions
 ### Operators
 The following is a list of all operators in Akhamoth, ordered from highest
 precedence to lowest, along with their associativity:
 Operator | Associativity
 -------- | -------------
 `.`      | left
 `-`      | unary
 `*` `/`  | left
 `+` `-`  | left
 `\|>`    | left
 `==`     | requires parens
 `..`     | requires parens
 `=>`     | right
 `->`     | right
 `=`      | right
 ### Function Calls
 ## AST
 The design of Akhamoth's AST is essentially the same as [Elixir's][1].
 [1]: https://hexdocs.pm/elixir/main/syntax-reference.html#the-elixir-ast
--- a/rebar.config
+++ b/rebar.config
@ -1,3 +1,12 @@
 {erl_opts, [debug_info]}.
 {project_plugins, [rebar3_ex_doc]}.
 {deps, []}.
 {ex_doc, [
    {extras, [
        {"README.md", #{title => <<"Overview">>}},
        "pages/syntax_reference.md"
    ]},
    {main, "README.md"},
    {source_url, "https://git.wires.systems/wires/akhamoth"},
    {api_reference, false}
 ]}.
--- a/src/akh_lexer.erl
+++ b/src/akh_lexer.erl
@ -1,54 +1,64 @@
 -module(akh_lexer).
-include("akhamoth.hrl").
+-moduledoc """
-export([
+This module contains functions for tokenizing Akhamoth source code.
-    new/1,
+""".
    next/1
 ]).
-type token_kind() ::
+-export([new/1, next/1]).
    binary()
    | '{'
    | '}'
    | '['
    | ']'
    | '('
    | ')'
    | ','
    | ';'
    | ':'
    | '.'
    | '->'
    | '+'.
-type token() :: {token_kind(), non_neg_integer() | inserted}.
+-define(is_digit(C), C >= $0, C =< $9).
 -define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
-define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_).
+-doc """
 A token in the input stream
 """.
 -type token() :: any().
 -record(lexer, {
    source :: binary(),
    offset = 0 :: non_neg_integer()
 }).
 %%% exports
 -doc """
 Initializes a lexer to tokenize the given binary.
 """.
 -spec new(binary()) -> #lexer{}.
 new(Source) -> #lexer{source = Source}.
 -doc """
 Attempts to get the next token in the input.
 """.
 -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
-next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) ->
+next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_id_start(C) ->
-    next(Lexer#lexer{source = Rest, offset = Offset + 1});
+    lex_id(Lx, 1);
-next(#lexer{source = <<C, _/bytes>>} = Lexer) when ?is_id(C) ->
+next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_digit(C) ->
-    lex_id(Lexer, 1);
+    lex_number(Lx, 1, C - $0);
 next(#lexer{source = <<>>}) ->
    none.
-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}.
+%%% local functions
-lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) ->
+
 lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) ->
    C = binary:at(Source, Len),
    if
-        ?is_id(C) ->
+        ?is_id_start(C); ?is_digit(C) ->
-            lex_id(Lexer, Len + 1);
+            lex_id(Lx, Len + 1);
        true ->
            {Id, Rest} = split_binary(Source, Len),
-            {ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}}
+            {ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}}
    end;
-lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) ->
+lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) ->
-    {ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}.
+    {ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
 lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) ->
    C = binary:at(Source, Len),
    if
        ?is_digit(C) ->
            lex_number(Lx, Len + 1, Acc * 10 + C - $0);
        true ->
            {_, Rest} = split_binary(Source, Len),
            {ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}}
    end;
 lex_number(#lexer{offset = Offset} = Lx, Len, Acc) ->
    {ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
--- a/src/akh_source_map.erl
+++ b/src/akh_source_map.erl
@ -1,21 +0,0 @@
 -module(akh_source_map).
 -include("akh_source_map.hrl").
 -export([
    empty/0,
    insert/3,
    get_loc/2
 ]).
 -spec empty() -> source_map().
 empty() -> gb_trees:empty().
 -spec insert(Offset :: non_neg_integer(), Line :: pos_integer(), source_map()) ->
    source_map().
 insert(Offset, Line, SourceMap) -> gb_trees:insert(Offset, Line, SourceMap).
 -spec get_loc(Offset :: non_neg_integer(), source_map()) -> #loc{}.
 get_loc(Offset, SourceMap) ->
    case gb_trees:smaller(Offset, SourceMap) of
        {Start, Line} -> #loc{line = Line, col = Offset - Start - 1};
        none -> #loc{line = 0, col = Offset}
    end.
--- a/src/akh_source_map.hrl
+++ b/src/akh_source_map.hrl
@ -1,5 +0,0 @@
 -type source_map() :: gb_trees:tree(non_neg_integer(), pos_integer()).
 -type span() :: {non_neg_integer(), pos_integer()}.
 -record(loc, {line :: pos_integer(), col :: non_neg_integer()}).
--- a/src/akhamoth.erl
+++ b/src/akhamoth.erl
@ -1,14 +1,17 @@
 -module(akhamoth).
-export([compile/1, collect/2]).
+-export([compile_file/1, compile_binary/1, collect/2]).
-compile(Path) ->
+compile_file(Path) ->
-    {ok, Src} = file:read_file(Path),
+    {ok, Source} = file:read_file(Path),
-    Lexer = akh_lexer:new(Src),
+    compile_binary(Source).
    collect(Lexer, []).
-collect(Lexer, Acc) ->
+compile_binary(Source) ->
-    case akh_lexer:next(Lexer) of
+    Lx = akh_lexer:new(Source),
    collect(Lx, []).
 collect(Lx, Acc) ->
    case akh_lexer:next(Lx) of
        none -> Acc;
        {ok, T, L} -> collect(L, [T | Acc])
    end.
Author	SHA1	Message	Date
wires	1c5e487e61	start writing up some docs mostly to help me remember what the plan is	2025-03-11 23:02:07 -04:00
wires	e2ba4d16f4	more rewrite of lexer, add simple ints	2025-03-11 23:00:55 -04:00
wires	5af4838924	delete unused module it will likely be back at some point, but not worth worrying about for now.	2025-03-11 22:59:44 -04:00
wires	e7a7a40251	reorganization for easier testing	2025-03-11 22:59:16 -04:00