parser round 1

This commit is contained in:
wires 2025-07-11 11:21:14 -04:00
parent d35daf8ec1
commit b01b560d31
Signed by: wires
SSH key fingerprint: SHA256:9GtP+M3O2IivPDlw1UY872UPUuJH2gI0yG6ExBxaaiM
6 changed files with 376 additions and 14 deletions

37
Cargo.lock generated
View file

@ -41,6 +41,12 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]] [[package]]
name = "errno" name = "errno"
version = "0.3.13" version = "0.3.13"
@ -68,6 +74,12 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
[[package]] [[package]]
name = "home" name = "home"
version = "0.5.11" version = "0.5.11"
@ -77,6 +89,16 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "indexmap"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.174" version = "0.2.174"
@ -185,6 +207,12 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "scoped-tls"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.15.1" version = "1.15.1"
@ -222,6 +250,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "typed-arena"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.18" version = "1.0.18"
@ -397,6 +431,9 @@ name = "wires_lisp"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"indexmap",
"rustyline", "rustyline",
"scoped-tls",
"thiserror", "thiserror",
"typed-arena",
] ]

View file

@ -6,4 +6,8 @@ license = "MIT"
[dependencies] [dependencies]
anyhow = "1.0.98" anyhow = "1.0.98"
indexmap = "2.10.0"
rustyline = "16.0.0" rustyline = "16.0.0"
scoped-tls = "1.0.1"
thiserror = "2.0.12"
typed-arena = "2.0.2"

View file

@ -1,19 +1,49 @@
use rustyline::error::ReadlineError; use std::cell::RefCell;
fn main() -> anyhow::Result<()> { use rustyline::error::ReadlineError;
let mut rl = rustyline::DefaultEditor::new()?; use scoped_tls::scoped_thread_local;
loop {
match rl.readline("> ") { use crate::parsing::{Interner, parse};
Ok(line) => {
eval(&line); mod parsing;
rl.add_history_entry(line)?;
} #[derive(Default)]
Err(ReadlineError::Eof) | Err(ReadlineError::Interrupted) => break Ok(()), struct Session {
Err(e) => break Err(e.into()), interner: RefCell<Interner>,
} }
scoped_thread_local!(static SESSION: Session);
fn with_session<R>(f: impl FnOnce(&Session) -> R) -> R {
SESSION.with(f)
}
fn create_session_then<R>(f: impl FnOnce() -> R) -> R {
assert!(!SESSION.is_set());
let session = Default::default();
SESSION.set(&session, f)
}
fn eval(line: &str) {
for expr in parse(line) {
println!("{expr:?}");
} }
} }
fn eval(input: &str) { fn main() -> anyhow::Result<()> {
println!("{input}"); create_session_then(|| {
let mut rl = rustyline::DefaultEditor::new()?;
loop {
match rl.readline("> ") {
Ok(line) => {
rl.add_history_entry(line.clone())?;
eval(&line);
}
Err(ReadlineError::Interrupted) => (),
Err(ReadlineError::Eof) => break Ok(()),
Err(e) => break Err(e.into()),
}
}
})
} }

179
src/parsing.rs Normal file
View file

@ -0,0 +1,179 @@
mod cursor;
mod symbol;
use std::num::ParseIntError;
pub use cursor::Cursor;
pub use symbol::Interner;
use symbol::Symbol;
fn is_atom_start(c: char) -> bool {
matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '*' | '/' | '=' | '<' | '>')
}
fn is_atom_continue(c: char) -> bool {
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '*' | '/' | '=' | '<' | '>' | '-' | '+')
}
#[derive(Debug)]
enum TokenKind {
OpenParen,
CloseParen,
Atom,
Number,
}
#[derive(Debug)]
pub struct Span {
start: usize,
len: usize,
}
#[derive(Debug)]
struct Token {
kind: TokenKind,
span: Span,
}
impl<'a> Cursor<'a> {
fn next_token(&mut self) -> Option<Token> {
self.reset_span();
let kind = match self.bump()? {
c if c.is_whitespace() => {
self.eat_while(|c| c.is_whitespace());
return self.next_token();
}
c if is_atom_start(c) => {
self.eat_while(is_atom_continue);
TokenKind::Atom
}
'0'..='9' => self.number_or_atom(),
'-' | '+' => {
if self.peek().is_ascii_digit() {
self.number_or_atom()
} else {
self.eat_while(is_atom_continue);
TokenKind::Atom
}
}
'(' => TokenKind::OpenParen,
')' => TokenKind::CloseParen,
_ => todo!(),
};
Some(Token {
kind,
span: self.cur_span(),
})
}
fn number_or_atom(&mut self) -> TokenKind {
self.eat_while(|c| c.is_ascii_digit());
if is_atom_continue(self.peek()) {
self.eat_while(is_atom_continue);
TokenKind::Atom
} else {
TokenKind::Number
}
}
}
#[derive(Debug)]
pub enum Expr {
Atom(Symbol),
Number(i32),
List(Vec<Expr>),
}
impl Expr {
fn atom(string: &str) -> Self {
Self::Atom(Symbol::new(string))
}
fn parse_int(string: &str) -> Result<Self, ParseIntError> {
string.parse::<i32>().map(Self::Number)
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("incomplete form")]
Incomplete,
#[error("Unexpected token")]
Unexpected,
#[error(transparent)]
ParseInt(#[from] ParseIntError),
}
pub struct ParseIter<'a> {
src: &'a str,
cursor: Cursor<'a>,
}
impl<'a> ParseIter<'a> {
fn new(src: &'a str) -> Self {
Self {
src,
cursor: Cursor::new(src),
}
}
fn parse_list(&mut self) -> Result<Expr, Error> {
let mut res = vec![];
while let Some(term) = self.parse_list_helper() {
res.push(term?);
}
Ok(Expr::List(res))
}
fn parse_list_helper(&mut self) -> Option<Result<Expr, Error>> {
let Self { src, cursor } = self;
let Token {
kind,
span: Span { start, len },
} = match cursor.next_token() {
None => return Some(Err(Error::Incomplete)),
Some(t) => t,
};
let end = start + len;
let src_str = &src[start..end];
match kind {
TokenKind::Atom => Some(Ok(Expr::atom(src_str))),
TokenKind::Number => Some(Expr::parse_int(src_str).map_err(Into::into)),
TokenKind::OpenParen => Some(self.parse_list()),
TokenKind::CloseParen => None,
}
}
}
impl<'a> Iterator for ParseIter<'a> {
type Item = Result<Expr, Error>;
fn next(&mut self) -> Option<Self::Item> {
let Self { src, cursor } = self;
let Token {
kind,
span: Span { start, len },
} = cursor.next_token()?;
let end = start + len;
let src_str = &src[start..end];
Some(match kind {
TokenKind::Atom => Ok(Expr::atom(src_str)),
TokenKind::Number => Expr::parse_int(src_str).map_err(Into::into),
TokenKind::OpenParen => self.parse_list(),
_ => Err(Error::Unexpected),
})
}
}
pub fn parse<'a>(input: &'a str) -> ParseIter<'a> {
ParseIter::new(input)
}

50
src/parsing/cursor.rs Normal file
View file

@ -0,0 +1,50 @@
use std::str::Chars;
use super::Span;
pub struct Cursor<'a> {
chars: Chars<'a>,
start: usize,
len: usize,
}
impl<'a> Cursor<'a> {
pub fn new(input: &'a str) -> Self {
Self {
chars: input.chars(),
start: 0,
len: 0,
}
}
pub fn bump(&mut self) -> Option<char> {
self.len += 1;
self.chars.next()
}
pub fn peek(&self) -> char {
self.chars.clone().next().unwrap_or('\0')
}
pub fn is_empty(&self) -> bool {
self.chars.as_str().is_empty()
}
pub fn eat_while(&mut self, pred: impl Fn(char) -> bool) {
while pred(self.peek()) && !self.is_empty() {
self.bump();
}
}
pub fn cur_span(&self) -> Span {
Span {
start: self.start,
len: self.len,
}
}
pub fn reset_span(&mut self) {
self.start += self.len;
self.len = 0;
}
}

62
src/parsing/symbol.rs Normal file
View file

@ -0,0 +1,62 @@
use std::fmt::{self, Formatter};
use indexmap::IndexSet;
use typed_arena::Arena;
use crate::with_session;
#[derive(Default)]
pub struct Interner {
arena: Arena<u8>,
strings: IndexSet<&'static str>,
}
impl Interner {
fn intern(&mut self, string: &str) -> Symbol {
if let Some(idx) = self.strings.get_index_of(string) {
return Symbol(idx as u32);
}
let string = self.arena.alloc_str(string);
// spooky
let string: &'static str = unsafe { &*(string as *const str) };
let (i, new) = self.strings.insert_full(string);
debug_assert!(new);
Symbol(i as u32)
}
fn get(&self, symbol: Symbol) -> Option<&str> {
self.strings.get_index(symbol.0 as usize).copied()
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct Symbol(u32);
impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl Symbol {
pub fn new(string: &str) -> Self {
with_session(|session| session.interner.borrow_mut().intern(string))
}
fn as_str(&self) -> &str {
// again, spooky
with_session(|session| unsafe {
std::mem::transmute::<&str, &str>(session.interner.borrow().get(*self).unwrap())
})
}
}