forth-riscv

My forth
git clone git://git.electrosoup.com/forth-riscv
Log | Files | Refs

commit df1e0180ae8dce560bd9cf9c1f2498ba7e8dcc29
parent 8a5d7259aa0ecfc12fc023f049ac99202078d6de
Author: Christian Ermann <christianermann@gmail.com>
Date:   Sun, 30 Mar 2025 13:05:13 -0700

Replace prefix handlers with more flexible parsing

Diffstat:
Msrc/forth.s | 361++++++++++++++++++++++++++-----------------------------------------------------
1 file changed, 120 insertions(+), 241 deletions(-)

diff --git a/src/forth.s b/src/forth.s @@ -180,63 +180,6 @@ defcode "bye", bye, 0x71F39F63 next # ----------------------------------------------------------------------------- -# built-in prefixes -# ----------------------------------------------------------------------------- - -defvar "#", prefix_decimal, 0x260C9112 - .int 0x23 - .int 0 - .int string_to_decimal_comma - -defvar "$", prefix_hex, 0x210C8933 - .int 0x24 - .int _prefix_decimal - .int string_to_hex_comma - -defvar "%", prefix_binary, 0x200C87A0 - .int 0x25 - .int _prefix_hex - .int string_to_binary_comma - -defvar "'", prefix_address, 0x220C8AC6 - .int 0x27 - .int _prefix_binary - .int string_to_address_comma - -defvar "^", prefix_postpone, 0xDB0C1B01 - .int 0x5E - .int _prefix_address - .int string_to_postpone - -defvar "*", prefix_execute, 0x2F0C9F3D - .int 0x2A - .int _prefix_postpone - .int string_to_execute - -defvar ":", prefix_define, 0x3F0CB86D - .int 0x3A - .int _prefix_execute - .int string_to_define - -defvar "\"", prefix_string, 0x270C92A5 - .int 0x22 - .int _prefix_define - .int token_to_string_comma - -defvar "|", prefix_comment, 0xF90C4A3B - .int 0x7C - .int _prefix_string - .int token_to_comment - -defvar "(", prefix_stack_comment, 0x2D0C9C17 - .int 0x28 - .int _prefix_comment - .int token_to_stack_comment - -defvar "latest-prefix", latest_prefix, 0x86C823C5 - .int _prefix_stack_comment - -# ----------------------------------------------------------------------------- # constants and variables # ----------------------------------------------------------------------------- @@ -388,9 +331,23 @@ _refill_fail: _refill_dispatch_table: .int accept_tib +defcode "apply-offset", apply_offset, 0x00000000 + # ( addr len offset -- addr len ) + lw x, 0*cell(psp) + lw y, 1*cell(psp) + add y, y, w + sub w, x, w + sw y, 1*cell(psp) + addi psp, psp, 1*cell + next + defword "source@", source_fetch, 0x5F1E7A48 + # ( -- addr len ) jal tick_source jal two_fetch + jal source_offset + jal fetch + jal apply_offset exit defword "source!", source_store, 0xC01F12FB @@ -463,37 +420,28 @@ _skip_until_done: defword "parse", parse, 0x423B42EC # ( delimiter -- addr len ) - # set delimiter - la x, _delimiter - sw w, 0(x) - # load source - la x, _tick_source - lw w, 0*cell(x) - lw x, 1*cell(x) - # apply offset - la y, _source_offset - lw y, 0*cell(y) - add x, x, y - sub w, w, y + jal delimiter + jal store + jal source_fetch # find start of token - push x + jal over + jal to_ret jal skip_while - lw x, 0*cell(psp) - push_ret x # find end of token + jal over + jal to_ret jal skip_until - # compute offset - lw w, 0*cell(psp) - la x, _tick_source - lw x, 1*cell(x) - sub y, w, x - addi y, y, 1 - la x, _source_offset - sw y, 0(x) - # set outputs - pop_ret x - sw x, 0*cell(psp) - sub w, w, x + # compute lengths + jal drop + jal from_ret + jal tuck # addr + jal minus # len + # advance source to start of token + jal over + jal from_ret + jal minus # n skipped + jal source_offset + jal plus_store exit defword "parse-until", parse_until, 0x6266DA65 @@ -533,23 +481,23 @@ defword "parse-word", parse_word, 0xB218226F jal parse exit -defword "token>string", token_to_string, 0x7EB3F3A5 - # ( addr len -- addr len ) - addi psp, psp, 1*cell - # rewind to beginning of string - la x, _source_offset - lw y, 0(x) - sub y, y, w - addi y, y, -1 - sw y, 0(x) - # parse string - li w, 0x22 +defword "parse-word-advance", parse_word_advance, 0x50B4A758 + jal parse_word + jal dup + jal source_offset + jal plus_store + exit + +defword "parse-string", parse_string, 0x1740A6F6 + # ( -- addr len ) + push_imm 0x22 jal parse_until jal translate_escape exit -defword "token>string,", token_to_string_comma, 0xFE4860AB - jal token_to_string +defword "\"", double_quote, 0x270C92A5, flags=flag_immediate + # ( -- ) + jal parse_string # lit-string push w la w, lit_string @@ -570,7 +518,6 @@ defword "token>string,", token_to_string_comma, 0xFE4860AB add t0, t0, x sw t0, 0*cell(y) jal align_code - push_imm -1 exit defcode "translate-escape", translate_escape, 0x913F36AB @@ -609,36 +556,16 @@ _translate_escape_done: sub w, y, x next -defword "token>comment", token_to_comment, 0x49ACBA31 - # ( addr len -- addr len ) - addi psp, psp, 1*cell - # rewind to beginning of comment - la x, _source_offset - lw y, 0(x) - sub y, y, w - addi y, y, -1 - sw y, 0(x) - # skip comment - li w, 0 +defword "|", line_comment, 0xF90C4A3B, flags=flag_immediate + push_imm 0x0A # '\n' jal parse_until - addi psp, psp, 1*cell - li w, -1 + jal two_drop exit -defword "token>stack-comment", token_to_stack_comment, 0x8B17CDB4 - # ( addr len -- addr len ) - addi psp, psp, 1*cell - # rewind to beginning of comment - la x, _source_offset - lw y, 0(x) - sub y, y, w - addi y, y, -1 - sw y, 0(x) - # skip comment - li w, 0x29 +defword "(", stack_comment, 0x2D0C9C17, flags=flag_immediate + push_imm 0x29 # ')' jal parse_until - addi psp, psp, 1*cell - li w, -1 + jal two_drop exit defcode ">number", to_number, 0x2F770E4C @@ -696,85 +623,57 @@ _to_number_done: mv w, x next -defword "string>#", string_to_decimal, 0x75831783 +defcode "#", base_10, 0x260C9112, flags=flag_immediate la x, _base li y, 10 sw y, 0(x) - jal to_number - exit + next -defword "string>$", string_to_hex, 0x7A831F62 +defcode "$", base_16, 0x210C8933, flags=flag_immediate la x, _base li y, 16 sw y, 0(x) - jal to_number - exit + next -defword "string>%", string_to_binary, 0x7B8320F5 +defcode "%", base_2, 0x200C87A0, flags=flag_immediate la x, _base li y, 2 sw y, 0(x) - jal to_number - exit - -defword "number,", number_comma, 0x5E932C64 - bnez w, _number_comma_error - not w, w - jal swap - jal literal - exit -_number_comma_error: - mv w, zero - exit - -defword "string>#,", string_to_decimal_comma, 0xAC5E487D - jal string_to_decimal - jal number_comma - exit - -defword "string>$,", string_to_hex_comma, 0x2A6A47CA - jal string_to_hex - jal number_comma - exit - -defword "string>%,", string_to_binary_comma, 0x486CB59B - jal string_to_binary - jal number_comma - exit + next -defword "string>address", string_to_address, 0xFDC3CE9A +defword "'", tick, 0x220C8AC6, flags=flag_immediate + jal parse_word_advance jal find - addi x, w, code_offset - push x - exit - -defword "string>address,", string_to_address_comma, 0x313E6882 - jal string_to_address - beqz w, _string_to_address_comma_fail - jal swap + beqz w, _tick_done +_tick_found_word: + jal to_cfa + jal fetch jal literal - exit -_string_to_address_comma_fail: +_tick_done: exit -defword "string>postpone", string_to_postpone, 0x9EFD2390 +defword "...", postpone, 0x0AC31C19, flags=flag_immediate + jal parse_word_advance jal find - beqz w, _string_to_postpone_fail - addi x, w, flag_offset - lb x, 0(x) - andi x, x, flag_immediate - addi w, w, code_offset - lw w, 0(w) - beqz x, _string_to_postpone_compile -_string_to_postpone_execute: + jal dup + jal q_branch + .int _postpone_fail +_postpone_found_word: + jal dup + jal macro_q + jal q_branch + .int _postpone_normal +_postpone_macro: # stores `jal-to <xt>` into the current definition + jal to_cfa + jal fetch jal jal_to jal compile_comma - push w - li w, -1 - exit -_string_to_postpone_compile: + j _postpone_done +_postpone_normal: # stores `literal<xt> jal-to compile,` into the current definition + jal to_cfa + jal fetch jal literal push w la w, jal_to @@ -784,10 +683,10 @@ _string_to_postpone_compile: la w, compile_comma jal jal_to jal compile_comma - push w - li w, -1 - exit -_string_to_postpone_fail: + j _postpone_done +_postpone_fail: + jal drop +_postpone_done: exit defword "literal", literal, 0xECB9D8E4, flags=flag_immediate @@ -827,16 +726,6 @@ _literal_small_2: jal compile_comma exit -defword "string>execute", string_to_execute, 0xC1A5C011 - jal string_to_address - beqz w, _string_to_execute_done - pop w - jal execute - push w - li w, 1 -_string_to_execute_done: - exit - defcode "word>hash", word_to_hash, 0x50E0A245 load_cell w, hash_offset(w) next @@ -883,44 +772,20 @@ defcode "execute", execute, 0xA01E3D98 jr x # 'next' should be called by the executed word -defcode "match-prefix", match_prefix, 0x2C1AB495 - lw x, 0(psp) - lb x, 0(x) - la y, _latest_prefix - lw y, 0(y) - push w -_match_prefix_loop: - lb w, 0(y) - beq w, x, _match_prefix_done - addi y, y, cell - lw y, 0(y) - bnez y, _match_prefix_loop -_match_prefix_fail: - mv w, zero - next -_match_prefix_done: - # decrement string length - lw x, 0(psp) - addi x, x, -1 - sw x, 0(psp) - # increment string address - lw x, cell(psp) - addi x, x, 1 - sw x, cell(psp) - # code address - addi w, y, 2*cell - next defword "interpret", interpret, 0x1F98C57A _interpret_start: + # ( -- addr len ) jal parse_word beqz w, _interpret_parse_area_empty - jal match_prefix - bnez w, _interpret_prefix _interpret_find_word: - pop w + jal two_dup jal find - beqz w, _interpret_retry + beqz w, _interpret_number + jal swap + jal source_offset + jal plus_store + jal nip _interpret_word_found: addi w, w, flag_offset lb x, 0(w) @@ -937,18 +802,27 @@ _interpret_macro: pop w jalr x j _interpret_start -_interpret_prefix: - load_cell x, 0(w) - pop w - jalr x - pop x - beqz w, _interpret_retry - mv w, x +_interpret_number: + jal drop + jal two_dup + jal to_number + bnez w, _interpret_number_fail + jal drop + jal swap + jal source_offset + jal plus_store + jal nip + jal literal + jal base_10 j _interpret_start +_interpret_number_fail: + jal two_drop _interpret_retry: - pop w - j _interpret_start + # ( addr len -- addr len ) + jal one_minus + bnez w, _interpret_find_word _interpret_parse_area_empty: + # ( addr len=0 -- ) addi psp, psp, cell pop w jal interpret_execute @@ -1124,7 +998,7 @@ defcode "@2", fetch_second, 0x9ED81397 lw w, 1*cell(w) next -defcode "+!", add_store, 0x08DC01D1 +defcode "+!", plus_store, 0x08DC01D1 pop x load_cell y, 0(w) add y, y, x @@ -1201,7 +1075,7 @@ defcode "lit-string", lit_string, 0xC7BE567C # TODO: ip -> ra, w=tos defword "allot", allot, 0xADB1A69F jal data_pointer - jal add_store + jal plus_store exit defcode ",", comma, 0x290C95CB @@ -1367,6 +1241,10 @@ defcode "-", minus, 0x280C9438 sub w, x, w next +defcode "1-", one_minus, 0x00000000 + addi w, w, -1 + next + defcode "abs", abs, 0x2A48023B bgtz w, _abs_positive not w, w @@ -1608,7 +1486,8 @@ defword "string>header", string_to_header, 0x16FBE3AB jal comma exit -defword "string>define", string_to_define, 0x888D086D +defword ":", define, 0x3F0CB86D, flags=flag_immediate + jal parse_word_advance jal string_to_header jal latest jal fetch @@ -1620,7 +1499,7 @@ defword "string>define", string_to_define, 0x888D086D exit defword "create", create, 0x26BB595D, flags=flag_immediate - jal parse_word + jal parse_word_advance jal string_to_header jal r_bracket jal dovar_comma @@ -1736,7 +1615,7 @@ defword "variable", variable, 0x19385305, flags=flag_immediate exit defword "constant", constant, 0x0691EA25, flags=flag_immediate - jal parse_word + jal parse_word_advance jal string_to_header jal r_bracket jal docon_comma