commit c0fec2a510ce08c3a9d6888931b97d1bc18f59f7
parent 5cd67bcd8e97d189c0c5b87b43cdf1cde0fcfef2
Author: Christian Ermann <christianermann@gmail.com>
Date: Sun, 27 Oct 2024 11:21:55 -0700
Replace names with hashes in dictionary
Diffstat:
| M | forth.s | | | 88 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
| A | hash.c | | | 19 | +++++++++++++++++++ |
2 files changed, 62 insertions(+), 45 deletions(-)
diff --git a/forth.s b/forth.s
@@ -11,9 +11,10 @@
#define cell 4
#define dcell 8
-#define flag_offset 4
-#define name_len_offset 5
-#define name_offset 6
+#define hash_offset 4
+#define meta_offset 8
+#define flag_offset 12
+#define code_offset 16
#define load_cell lw
#define store_cell sw
@@ -55,33 +56,48 @@
addi rsp, rsp, cell
.endm
-.macro defcode name, name_length, flags, label, last
- .section .data
+.macro defcode name, name_length, flags, hash, label, last
+ .section ".rodata"
.balign cell
.globl name_\label
name_\label:
.int name_\last # 1. Link to the previously defined word.
+ .int \hash
+ .int meta_\label
.byte \flags # 3. Set the flags.
- .byte \name_length # 4. Set the name length.
- .ascii "\name" # 5. Set the name.
+
.balign cell # 6. Add any padding we may need.
.globl \label
\label:
.int code_\label # 7. Set the codeword.
+
+ .section ".data.meta"
+ .global meta_\label
+meta_\label:
+ .byte \name_length
+ .ascii "\name"
+
.section ".text"
.globl code_\label
code_\label: # 8. This is where our assembly code will go.
.endm
-.macro defword name, name_length, flags, label, last
+.macro defword name, name_length, flags, hash, label, last
.section ".rodata"
.balign cell
.globl name_\label
name_\label:
.int name_\last
+ .int \hash
+ .int meta_\label
.byte \flags
+
+ .section ".data.meta"
+ .global meta_\label
+meta_\label:
.byte \name_length
.ascii "\name"
+
.balign cell
.globl \label
\label:
@@ -95,27 +111,27 @@ docol:
.equ name_null, 0
-defcode "exit", 4, 0, exit, null
+defcode "exit", 4, 0, 0xCDED1A85, exit, null
pop_ret ip
next
-defcode "type", 4, 0, type, exit
+defcode "type", 4, 0, 0x5127F14D, type, exit
pop a1 # length
pop a0 # address
jal uart_put_string
next
-defcode "emit", 4, 0, emit, type
+defcode "emit", 4, 0, 0x2D88474A, emit, type
pop a0
jal uart_put_char
next
-defcode "key", 3, 0, key, emit
+defcode "key", 3, 0, 0x6815C86C, key, emit
jal uart_get_char
push a0
next
-defcode "word", 4, 0, word, key
+defcode "word", 4, 0, 0x6A98E9FD, word, key
jal word_impl
push w
push x
@@ -146,7 +162,7 @@ _word_end:
addi ra, s6, 0
ret
-defcode "find", 4, 0, find, word
+defcode "find", 4, 0, 0xBDF0855A, find, word
pop x
pop w
jal find_impl # w = word address or 0, if not found
@@ -156,71 +172,53 @@ defcode "find", 4, 0, find, word
find_impl:
# w = word name address
# x = word name length
+ push_ret ra
+ jal hash_impl # w = word hash
la s3, latest
load_cell s3, 0(s3)
_check_word_hidden:
lb t0, flag_offset(s3)
# beq t0, ?, _next_word
-_check_word_length:
- lb t0, name_len_offset(s3)
- bne t0, x, _next_word
-_check_word_match:
- mv t0, w
- mv t1, x
- addi t2, s3, name_offset
-_check_char_match:
- lb t3, 0(t0)
- lb t4, 0(t2)
- bne t3, t4, _next_word
- addi t0, t0, 1
- addi t2, t2, 1
- addi t1, t1, -1
- bgtz t1, _check_char_match
+_check_word_hash:
+ load_cell t0, hash_offset(s3)
+ bne t0, w, _next_word
j _found_or_out_of_words
_next_word:
load_cell s3, 0(s3)
bnez s3, _check_word_hidden
_found_or_out_of_words:
mv w, s3
+ pop_ret ra
ret
-defcode ">cfa", 4, 0, to_cfa, find
+defcode ">cfa", 4, 0, 0x8CAC3233, to_cfa, find
pop w
- jal to_cfa_impl
+ addi w, w, code_offset
push w
next
-to_cfa_impl:
- li t0, name_offset
- lb t1, name_len_offset(w)
- add t0, t0, t1
- add w, w, t0
- addi w, w, 3
- andi w, w, 0xFFFFFFFC
- ret
-
-defcode "execute", 7, 0, execute, to_cfa
+defcode "execute", 7, 0, 0xA01E3D98, execute, to_cfa
pop w
load_cell x, 0(w)
jr x
# 'next' should be called by the executed word
-defcode "interpret", 9, 0, interpret, execute
+defcode "interpret", 9, 0, 0x1F98C57A, interpret, execute
jal word_impl
jal find_impl
beqz w, _not_found
- jal to_cfa_impl
+ addi w, w, code_offset
load_cell x, 0(w)
jr x
_not_found:
next
-defcode "branch", 6, 0, branch, interpret
+defcode "branch", 6, 0, 0xB6873945, branch, interpret
load_cell t0, 0(ip)
add ip, ip, t0
next
-defword "quit", 4, 0, quit, branch
+defword "quit", 4, 0, 0x47878736, quit, branch
.int interpret
.int branch
.int -dcell
diff --git a/hash.c b/hash.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <string.h>
+
+unsigned int fnv1a(unsigned char* str) {
+ unsigned int hash = 2166136261;
+ unsigned int len = strlen(str);
+ for (int i = 0; i < len; i += 1) {
+ hash = hash ^ (int)str[i];
+ hash = hash * 16777619;
+ }
+ return hash;
+}
+
+int main(int arcg, char** argv) {
+ unsigned char* str = argv[1];
+ unsigned int hash = fnv1a(str);
+ printf("hash: 0x%08X\n", hash);
+ return 0;
+}