commit df1e0180ae8dce560bd9cf9c1f2498ba7e8dcc29
parent 8a5d7259aa0ecfc12fc023f049ac99202078d6de
Author: Christian Ermann <christianermann@gmail.com>
Date: Sun, 30 Mar 2025 13:05:13 -0700
Replace prefix handlers with more flexible parsing
Diffstat:
| M | src/forth.s | | | 361 | ++++++++++++++++++++++++++----------------------------------------------------- |
1 file changed, 120 insertions(+), 241 deletions(-)
diff --git a/src/forth.s b/src/forth.s
@@ -180,63 +180,6 @@ defcode "bye", bye, 0x71F39F63
next
# -----------------------------------------------------------------------------
-# built-in prefixes
-# -----------------------------------------------------------------------------
-
-defvar "#", prefix_decimal, 0x260C9112
- .int 0x23
- .int 0
- .int string_to_decimal_comma
-
-defvar "$", prefix_hex, 0x210C8933
- .int 0x24
- .int _prefix_decimal
- .int string_to_hex_comma
-
-defvar "%", prefix_binary, 0x200C87A0
- .int 0x25
- .int _prefix_hex
- .int string_to_binary_comma
-
-defvar "'", prefix_address, 0x220C8AC6
- .int 0x27
- .int _prefix_binary
- .int string_to_address_comma
-
-defvar "^", prefix_postpone, 0xDB0C1B01
- .int 0x5E
- .int _prefix_address
- .int string_to_postpone
-
-defvar "*", prefix_execute, 0x2F0C9F3D
- .int 0x2A
- .int _prefix_postpone
- .int string_to_execute
-
-defvar ":", prefix_define, 0x3F0CB86D
- .int 0x3A
- .int _prefix_execute
- .int string_to_define
-
-defvar "\"", prefix_string, 0x270C92A5
- .int 0x22
- .int _prefix_define
- .int token_to_string_comma
-
-defvar "|", prefix_comment, 0xF90C4A3B
- .int 0x7C
- .int _prefix_string
- .int token_to_comment
-
-defvar "(", prefix_stack_comment, 0x2D0C9C17
- .int 0x28
- .int _prefix_comment
- .int token_to_stack_comment
-
-defvar "latest-prefix", latest_prefix, 0x86C823C5
- .int _prefix_stack_comment
-
-# -----------------------------------------------------------------------------
# constants and variables
# -----------------------------------------------------------------------------
@@ -388,9 +331,23 @@ _refill_fail:
_refill_dispatch_table:
.int accept_tib
+defcode "apply-offset", apply_offset, 0x00000000
+ # ( addr len offset -- addr len )
+ lw x, 0*cell(psp)
+ lw y, 1*cell(psp)
+ add y, y, w
+ sub w, x, w
+ sw y, 1*cell(psp)
+ addi psp, psp, 1*cell
+ next
+
defword "source@", source_fetch, 0x5F1E7A48
+ # ( -- addr len )
jal tick_source
jal two_fetch
+ jal source_offset
+ jal fetch
+ jal apply_offset
exit
defword "source!", source_store, 0xC01F12FB
@@ -463,37 +420,28 @@ _skip_until_done:
defword "parse", parse, 0x423B42EC
# ( delimiter -- addr len )
- # set delimiter
- la x, _delimiter
- sw w, 0(x)
- # load source
- la x, _tick_source
- lw w, 0*cell(x)
- lw x, 1*cell(x)
- # apply offset
- la y, _source_offset
- lw y, 0*cell(y)
- add x, x, y
- sub w, w, y
+ jal delimiter
+ jal store
+ jal source_fetch
# find start of token
- push x
+ jal over
+ jal to_ret
jal skip_while
- lw x, 0*cell(psp)
- push_ret x
# find end of token
+ jal over
+ jal to_ret
jal skip_until
- # compute offset
- lw w, 0*cell(psp)
- la x, _tick_source
- lw x, 1*cell(x)
- sub y, w, x
- addi y, y, 1
- la x, _source_offset
- sw y, 0(x)
- # set outputs
- pop_ret x
- sw x, 0*cell(psp)
- sub w, w, x
+ # compute lengths
+ jal drop
+ jal from_ret
+ jal tuck # addr
+ jal minus # len
+ # advance source to start of token
+ jal over
+ jal from_ret
+ jal minus # n skipped
+ jal source_offset
+ jal plus_store
exit
defword "parse-until", parse_until, 0x6266DA65
@@ -533,23 +481,23 @@ defword "parse-word", parse_word, 0xB218226F
jal parse
exit
-defword "token>string", token_to_string, 0x7EB3F3A5
- # ( addr len -- addr len )
- addi psp, psp, 1*cell
- # rewind to beginning of string
- la x, _source_offset
- lw y, 0(x)
- sub y, y, w
- addi y, y, -1
- sw y, 0(x)
- # parse string
- li w, 0x22
+defword "parse-word-advance", parse_word_advance, 0x50B4A758
+ jal parse_word
+ jal dup
+ jal source_offset
+ jal plus_store
+ exit
+
+defword "parse-string", parse_string, 0x1740A6F6
+ # ( -- addr len )
+ push_imm 0x22
jal parse_until
jal translate_escape
exit
-defword "token>string,", token_to_string_comma, 0xFE4860AB
- jal token_to_string
+defword "\"", double_quote, 0x270C92A5, flags=flag_immediate
+ # ( -- )
+ jal parse_string
# lit-string
push w
la w, lit_string
@@ -570,7 +518,6 @@ defword "token>string,", token_to_string_comma, 0xFE4860AB
add t0, t0, x
sw t0, 0*cell(y)
jal align_code
- push_imm -1
exit
defcode "translate-escape", translate_escape, 0x913F36AB
@@ -609,36 +556,16 @@ _translate_escape_done:
sub w, y, x
next
-defword "token>comment", token_to_comment, 0x49ACBA31
- # ( addr len -- addr len )
- addi psp, psp, 1*cell
- # rewind to beginning of comment
- la x, _source_offset
- lw y, 0(x)
- sub y, y, w
- addi y, y, -1
- sw y, 0(x)
- # skip comment
- li w, 0
+defword "|", line_comment, 0xF90C4A3B, flags=flag_immediate
+ push_imm 0x0A # '\n'
jal parse_until
- addi psp, psp, 1*cell
- li w, -1
+ jal two_drop
exit
-defword "token>stack-comment", token_to_stack_comment, 0x8B17CDB4
- # ( addr len -- addr len )
- addi psp, psp, 1*cell
- # rewind to beginning of comment
- la x, _source_offset
- lw y, 0(x)
- sub y, y, w
- addi y, y, -1
- sw y, 0(x)
- # skip comment
- li w, 0x29
+defword "(", stack_comment, 0x2D0C9C17, flags=flag_immediate
+ push_imm 0x29 # ')'
jal parse_until
- addi psp, psp, 1*cell
- li w, -1
+ jal two_drop
exit
defcode ">number", to_number, 0x2F770E4C
@@ -696,85 +623,57 @@ _to_number_done:
mv w, x
next
-defword "string>#", string_to_decimal, 0x75831783
+defcode "#", base_10, 0x260C9112, flags=flag_immediate
la x, _base
li y, 10
sw y, 0(x)
- jal to_number
- exit
+ next
-defword "string>$", string_to_hex, 0x7A831F62
+defcode "$", base_16, 0x210C8933, flags=flag_immediate
la x, _base
li y, 16
sw y, 0(x)
- jal to_number
- exit
+ next
-defword "string>%", string_to_binary, 0x7B8320F5
+defcode "%", base_2, 0x200C87A0, flags=flag_immediate
la x, _base
li y, 2
sw y, 0(x)
- jal to_number
- exit
-
-defword "number,", number_comma, 0x5E932C64
- bnez w, _number_comma_error
- not w, w
- jal swap
- jal literal
- exit
-_number_comma_error:
- mv w, zero
- exit
-
-defword "string>#,", string_to_decimal_comma, 0xAC5E487D
- jal string_to_decimal
- jal number_comma
- exit
-
-defword "string>$,", string_to_hex_comma, 0x2A6A47CA
- jal string_to_hex
- jal number_comma
- exit
-
-defword "string>%,", string_to_binary_comma, 0x486CB59B
- jal string_to_binary
- jal number_comma
- exit
+ next
-defword "string>address", string_to_address, 0xFDC3CE9A
+defword "'", tick, 0x220C8AC6, flags=flag_immediate
+ jal parse_word_advance
jal find
- addi x, w, code_offset
- push x
- exit
-
-defword "string>address,", string_to_address_comma, 0x313E6882
- jal string_to_address
- beqz w, _string_to_address_comma_fail
- jal swap
+ beqz w, _tick_done
+_tick_found_word:
+ jal to_cfa
+ jal fetch
jal literal
- exit
-_string_to_address_comma_fail:
+_tick_done:
exit
-defword "string>postpone", string_to_postpone, 0x9EFD2390
+defword "...", postpone, 0x0AC31C19, flags=flag_immediate
+ jal parse_word_advance
jal find
- beqz w, _string_to_postpone_fail
- addi x, w, flag_offset
- lb x, 0(x)
- andi x, x, flag_immediate
- addi w, w, code_offset
- lw w, 0(w)
- beqz x, _string_to_postpone_compile
-_string_to_postpone_execute:
+ jal dup
+ jal q_branch
+ .int _postpone_fail
+_postpone_found_word:
+ jal dup
+ jal macro_q
+ jal q_branch
+ .int _postpone_normal
+_postpone_macro:
# stores `jal-to <xt>` into the current definition
+ jal to_cfa
+ jal fetch
jal jal_to
jal compile_comma
- push w
- li w, -1
- exit
-_string_to_postpone_compile:
+ j _postpone_done
+_postpone_normal:
# stores `literal<xt> jal-to compile,` into the current definition
+ jal to_cfa
+ jal fetch
jal literal
push w
la w, jal_to
@@ -784,10 +683,10 @@ _string_to_postpone_compile:
la w, compile_comma
jal jal_to
jal compile_comma
- push w
- li w, -1
- exit
-_string_to_postpone_fail:
+ j _postpone_done
+_postpone_fail:
+ jal drop
+_postpone_done:
exit
defword "literal", literal, 0xECB9D8E4, flags=flag_immediate
@@ -827,16 +726,6 @@ _literal_small_2:
jal compile_comma
exit
-defword "string>execute", string_to_execute, 0xC1A5C011
- jal string_to_address
- beqz w, _string_to_execute_done
- pop w
- jal execute
- push w
- li w, 1
-_string_to_execute_done:
- exit
-
defcode "word>hash", word_to_hash, 0x50E0A245
load_cell w, hash_offset(w)
next
@@ -883,44 +772,20 @@ defcode "execute", execute, 0xA01E3D98
jr x
# 'next' should be called by the executed word
-defcode "match-prefix", match_prefix, 0x2C1AB495
- lw x, 0(psp)
- lb x, 0(x)
- la y, _latest_prefix
- lw y, 0(y)
- push w
-_match_prefix_loop:
- lb w, 0(y)
- beq w, x, _match_prefix_done
- addi y, y, cell
- lw y, 0(y)
- bnez y, _match_prefix_loop
-_match_prefix_fail:
- mv w, zero
- next
-_match_prefix_done:
- # decrement string length
- lw x, 0(psp)
- addi x, x, -1
- sw x, 0(psp)
- # increment string address
- lw x, cell(psp)
- addi x, x, 1
- sw x, cell(psp)
- # code address
- addi w, y, 2*cell
- next
defword "interpret", interpret, 0x1F98C57A
_interpret_start:
+ # ( -- addr len )
jal parse_word
beqz w, _interpret_parse_area_empty
- jal match_prefix
- bnez w, _interpret_prefix
_interpret_find_word:
- pop w
+ jal two_dup
jal find
- beqz w, _interpret_retry
+ beqz w, _interpret_number
+ jal swap
+ jal source_offset
+ jal plus_store
+ jal nip
_interpret_word_found:
addi w, w, flag_offset
lb x, 0(w)
@@ -937,18 +802,27 @@ _interpret_macro:
pop w
jalr x
j _interpret_start
-_interpret_prefix:
- load_cell x, 0(w)
- pop w
- jalr x
- pop x
- beqz w, _interpret_retry
- mv w, x
+_interpret_number:
+ jal drop
+ jal two_dup
+ jal to_number
+ bnez w, _interpret_number_fail
+ jal drop
+ jal swap
+ jal source_offset
+ jal plus_store
+ jal nip
+ jal literal
+ jal base_10
j _interpret_start
+_interpret_number_fail:
+ jal two_drop
_interpret_retry:
- pop w
- j _interpret_start
+ # ( addr len -- addr len )
+ jal one_minus
+ bnez w, _interpret_find_word
_interpret_parse_area_empty:
+ # ( addr len=0 -- )
addi psp, psp, cell
pop w
jal interpret_execute
@@ -1124,7 +998,7 @@ defcode "@2", fetch_second, 0x9ED81397
lw w, 1*cell(w)
next
-defcode "+!", add_store, 0x08DC01D1
+defcode "+!", plus_store, 0x08DC01D1
pop x
load_cell y, 0(w)
add y, y, x
@@ -1201,7 +1075,7 @@ defcode "lit-string", lit_string, 0xC7BE567C # TODO: ip -> ra, w=tos
defword "allot", allot, 0xADB1A69F
jal data_pointer
- jal add_store
+ jal plus_store
exit
defcode ",", comma, 0x290C95CB
@@ -1367,6 +1241,10 @@ defcode "-", minus, 0x280C9438
sub w, x, w
next
+defcode "1-", one_minus, 0x00000000
+ addi w, w, -1
+ next
+
defcode "abs", abs, 0x2A48023B
bgtz w, _abs_positive
not w, w
@@ -1608,7 +1486,8 @@ defword "string>header", string_to_header, 0x16FBE3AB
jal comma
exit
-defword "string>define", string_to_define, 0x888D086D
+defword ":", define, 0x3F0CB86D, flags=flag_immediate
+ jal parse_word_advance
jal string_to_header
jal latest
jal fetch
@@ -1620,7 +1499,7 @@ defword "string>define", string_to_define, 0x888D086D
exit
defword "create", create, 0x26BB595D, flags=flag_immediate
- jal parse_word
+ jal parse_word_advance
jal string_to_header
jal r_bracket
jal dovar_comma
@@ -1736,7 +1615,7 @@ defword "variable", variable, 0x19385305, flags=flag_immediate
exit
defword "constant", constant, 0x0691EA25, flags=flag_immediate
- jal parse_word
+ jal parse_word_advance
jal string_to_header
jal r_bracket
jal docon_comma