Compare commits

..

4 commits

Author SHA1 Message Date
1c5e487e61
start writing up some docs
mostly to help me remember what the plan is
2025-03-11 23:02:07 -04:00
e2ba4d16f4
more rewrite of lexer, add simple ints 2025-03-11 23:00:55 -04:00
5af4838924
delete unused module
it will likely be back at some point, but not worth worrying about for
now.
2025-03-11 22:59:44 -04:00
e7a7a40251
reorganization for easier testing 2025-03-11 22:59:16 -04:00
7 changed files with 129 additions and 66 deletions

View file

@ -1,3 +1,4 @@
# Akhamoth # Akhamoth
statically typed functional language An attempt at making something very much like Elixir, but with syntax I prefer
and static typing as a mandatory part of the language from the beginning.

66
pages/syntax_reference.md Normal file
View file

@ -0,0 +1,66 @@
# Syntax Reference
In Akhamoth, everything is an expression.
## Literals
### Identifiers
Identifiers are composed of ASCII letters, digits, and underscores and may not
begin with a digit. The preferred naming convention is `UpperCamelCase` for type
names, `snake_case` for variables, functions, and modules, and
`SCREAMING_SNAKE_CASE` for global constants. Identifiers, beginning with an
underscore are typically intentionally unused variables and will silence
warnings about this from the compiler.
> #### Unicode Identifiers {: .info}
>
> For version 1.0 of Akhamoth, the goal is only to support ASCII identifiers in
> order to make parsing easier. In the future however, it would be good to look
> at implementing [UAX #31](https://www.unicode.org/reports/tr31/) unicode
> identifiers.
### Atoms
Atoms have the exact same syntax as identifiers, but are prefixed with `:`, e.g.
`:akhamoth`.
### Strings
A String literal consists of `"` followed by any number of other characters and
then another `"`. Currently there is no support for character escapes of any
kind. This will be rectified before version 1.0.
### Integers
Integer literals must begin with a digit, followed by any number of digits and
underscores. Underscores are intended to be used for grouping digits in long
numbers, e.g. `1_000_000_000`.
## Expressions
### Operators
The following is a list of all operators in Akhamoth, ordered from highest
precedence to lowest, along with their associativity:
Operator | Associativity
-------- | -------------
`.` | left
`-` | unary
`*` `/` | left
`+` `-` | left
`\|>` | left
`==` | requires parens
`..` | requires parens
`=>` | right
`->` | right
`=` | right
### Function Calls
## AST
The design of Akhamoth's AST is essentially the same as [Elixir's][1].
[1]: https://hexdocs.pm/elixir/main/syntax-reference.html#the-elixir-ast

View file

@ -1,3 +1,12 @@
{erl_opts, [debug_info]}. {erl_opts, [debug_info]}.
{project_plugins, [rebar3_ex_doc]}. {project_plugins, [rebar3_ex_doc]}.
{deps, []}. {deps, []}.
{ex_doc, [
{extras, [
{"README.md", #{title => <<"Overview">>}},
"pages/syntax_reference.md"
]},
{main, "README.md"},
{source_url, "https://git.wires.systems/wires/akhamoth"},
{api_reference, false}
]}.

View file

@ -1,54 +1,64 @@
-module(akh_lexer). -module(akh_lexer).
-include("akhamoth.hrl"). -moduledoc """
-export([ This module contains functions for tokenizing Akhamoth source code.
new/1, """.
next/1
]).
-type token_kind() :: -export([new/1, next/1]).
binary()
| '{'
| '}'
| '['
| ']'
| '('
| ')'
| ','
| ';'
| ':'
| '.'
| '->'
| '+'.
-type token() :: {token_kind(), non_neg_integer() | inserted}. -define(is_digit(C), C >= $0, C =< $9).
-define(is_id_start(C), C >= $a, C =< $z; C >= $A, C =< $Z; C =:= $_).
-define(is_id(C), (C >= $A andalso C =< $Z); (C >= $a andalso C =< $z); C =:= $_). -doc """
A token in the input stream
""".
-type token() :: any().
-record(lexer, { -record(lexer, {
source :: binary(), source :: binary(),
offset = 0 :: non_neg_integer() offset = 0 :: non_neg_integer()
}). }).
%%% exports
-doc """
Initializes a lexer to tokenize the given binary.
""".
-spec new(binary()) -> #lexer{}. -spec new(binary()) -> #lexer{}.
new(Source) -> #lexer{source = Source}. new(Source) -> #lexer{source = Source}.
-doc """
Attempts to get the next token in the input.
""".
-spec next(#lexer{}) -> none | {ok, token(), #lexer{}}. -spec next(#lexer{}) -> none | {ok, token(), #lexer{}}.
next(#lexer{source = <<$\n, Rest/bytes>>, offset = Offset} = Lexer) -> next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_id_start(C) ->
next(Lexer#lexer{source = Rest, offset = Offset + 1}); lex_id(Lx, 1);
next(#lexer{source = <<C, _/bytes>>} = Lexer) when ?is_id(C) -> next(#lexer{source = <<C, _/bytes>>} = Lx) when ?is_digit(C) ->
lex_id(Lexer, 1); lex_number(Lx, 1, C - $0);
next(#lexer{source = <<>>}) -> next(#lexer{source = <<>>}) ->
none. none.
-spec lex_id(#lexer{}, pos_integer()) -> {ok, token(), #lexer{}}. %%% local functions
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) when Len < byte_size(Source) ->
lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) when Len < byte_size(Source) ->
C = binary:at(Source, Len), C = binary:at(Source, Len),
if if
?is_id(C) -> ?is_id_start(C); ?is_digit(C) ->
lex_id(Lexer, Len + 1); lex_id(Lx, Len + 1);
true -> true ->
{Id, Rest} = split_binary(Source, Len), {Id, Rest} = split_binary(Source, Len),
{ok, {Id, Offset}, Lexer#lexer{source = Rest, offset = Offset + Len}} {ok, {id, Offset, Id}, Lx#lexer{source = Rest, offset = Offset + Len}}
end; end;
lex_id(#lexer{source = Source, offset = Offset} = Lexer, Len) -> lex_id(#lexer{source = Source, offset = Offset} = Lx, Len) ->
{ok, {Source, Offset}, Lexer#lexer{source = <<>>, offset = Offset + Len}}. {ok, {id, Offset, Source}, Lx#lexer{source = <<>>, offset = Offset + Len}}.
lex_number(#lexer{source = Source, offset = Offset} = Lx, Len, Acc) when Len < byte_size(Source) ->
C = binary:at(Source, Len),
if
?is_digit(C) ->
lex_number(Lx, Len + 1, Acc * 10 + C - $0);
true ->
{_, Rest} = split_binary(Source, Len),
{ok, {number, Offset, Acc}, Lx#lexer{source = Rest, offset = Offset + Len}}
end;
lex_number(#lexer{offset = Offset} = Lx, Len, Acc) ->
{ok, {number, Offset, Acc}, Lx#lexer{source = <<>>, offset = Offset + Len}}.

View file

@ -1,21 +0,0 @@
-module(akh_source_map).
-include("akh_source_map.hrl").
-export([
empty/0,
insert/3,
get_loc/2
]).
-spec empty() -> source_map().
empty() -> gb_trees:empty().
-spec insert(Offset :: non_neg_integer(), Line :: pos_integer(), source_map()) ->
source_map().
insert(Offset, Line, SourceMap) -> gb_trees:insert(Offset, Line, SourceMap).
-spec get_loc(Offset :: non_neg_integer(), source_map()) -> #loc{}.
get_loc(Offset, SourceMap) ->
case gb_trees:smaller(Offset, SourceMap) of
{Start, Line} -> #loc{line = Line, col = Offset - Start - 1};
none -> #loc{line = 0, col = Offset}
end.

View file

@ -1,5 +0,0 @@
-type source_map() :: gb_trees:tree(non_neg_integer(), pos_integer()).
-type span() :: {non_neg_integer(), pos_integer()}.
-record(loc, {line :: pos_integer(), col :: non_neg_integer()}).

View file

@ -1,14 +1,17 @@
-module(akhamoth). -module(akhamoth).
-export([compile/1, collect/2]). -export([compile_file/1, compile_binary/1, collect/2]).
compile(Path) -> compile_file(Path) ->
{ok, Src} = file:read_file(Path), {ok, Source} = file:read_file(Path),
Lexer = akh_lexer:new(Src), compile_binary(Source).
collect(Lexer, []).
collect(Lexer, Acc) -> compile_binary(Source) ->
case akh_lexer:next(Lexer) of Lx = akh_lexer:new(Source),
collect(Lx, []).
collect(Lx, Acc) ->
case akh_lexer:next(Lx) of
none -> Acc; none -> Acc;
{ok, T, L} -> collect(L, [T | Acc]) {ok, T, L} -> collect(L, [T | Acc])
end. end.