forth-riscv

My forth
git clone git://git.electrosoup.com/forth-riscv
Log | Files | Refs

commit c0fec2a510ce08c3a9d6888931b97d1bc18f59f7
parent 5cd67bcd8e97d189c0c5b87b43cdf1cde0fcfef2
Author: Christian Ermann <christianermann@gmail.com>
Date:   Sun, 27 Oct 2024 11:21:55 -0700

Replace names with hashes in dictionary

Diffstat:
Mforth.s | 88+++++++++++++++++++++++++++++++++++++++----------------------------------------
Ahash.c | 19+++++++++++++++++++
2 files changed, 62 insertions(+), 45 deletions(-)

diff --git a/forth.s b/forth.s @@ -11,9 +11,10 @@ #define cell 4 #define dcell 8 -#define flag_offset 4 -#define name_len_offset 5 -#define name_offset 6 +#define hash_offset 4 +#define meta_offset 8 +#define flag_offset 12 +#define code_offset 16 #define load_cell lw #define store_cell sw @@ -55,33 +56,48 @@ addi rsp, rsp, cell .endm -.macro defcode name, name_length, flags, label, last - .section .data +.macro defcode name, name_length, flags, hash, label, last + .section ".rodata" .balign cell .globl name_\label name_\label: .int name_\last # 1. Link to the previously defined word. + .int \hash + .int meta_\label .byte \flags # 3. Set the flags. - .byte \name_length # 4. Set the name length. - .ascii "\name" # 5. Set the name. + .balign cell # 6. Add any padding we may need. .globl \label \label: .int code_\label # 7. Set the codeword. + + .section ".data.meta" + .global meta_\label +meta_\label: + .byte \name_length + .ascii "\name" + .section ".text" .globl code_\label code_\label: # 8. This is where our assembly code will go. .endm -.macro defword name, name_length, flags, label, last +.macro defword name, name_length, flags, hash, label, last .section ".rodata" .balign cell .globl name_\label name_\label: .int name_\last + .int \hash + .int meta_\label .byte \flags + + .section ".data.meta" + .global meta_\label +meta_\label: .byte \name_length .ascii "\name" + .balign cell .globl \label \label: @@ -95,27 +111,27 @@ docol: .equ name_null, 0 -defcode "exit", 4, 0, exit, null +defcode "exit", 4, 0, 0xCDED1A85, exit, null pop_ret ip next -defcode "type", 4, 0, type, exit +defcode "type", 4, 0, 0x5127F14D, type, exit pop a1 # length pop a0 # address jal uart_put_string next -defcode "emit", 4, 0, emit, type +defcode "emit", 4, 0, 0x2D88474A, emit, type pop a0 jal uart_put_char next -defcode "key", 3, 0, key, emit +defcode "key", 3, 0, 0x6815C86C, key, emit jal uart_get_char push a0 next -defcode "word", 4, 0, word, key +defcode "word", 4, 0, 0x6A98E9FD, word, key jal word_impl push w push x @@ -146,7 +162,7 @@ _word_end: addi ra, s6, 0 ret -defcode "find", 4, 0, find, word +defcode "find", 4, 0, 0xBDF0855A, find, word pop x pop w jal find_impl # w = word address or 0, if not found @@ -156,71 +172,53 @@ defcode "find", 4, 0, find, word find_impl: # w = word name address # x = word name length + push_ret ra + jal hash_impl # w = word hash la s3, latest load_cell s3, 0(s3) _check_word_hidden: lb t0, flag_offset(s3) # beq t0, ?, _next_word -_check_word_length: - lb t0, name_len_offset(s3) - bne t0, x, _next_word -_check_word_match: - mv t0, w - mv t1, x - addi t2, s3, name_offset -_check_char_match: - lb t3, 0(t0) - lb t4, 0(t2) - bne t3, t4, _next_word - addi t0, t0, 1 - addi t2, t2, 1 - addi t1, t1, -1 - bgtz t1, _check_char_match +_check_word_hash: + load_cell t0, hash_offset(s3) + bne t0, w, _next_word j _found_or_out_of_words _next_word: load_cell s3, 0(s3) bnez s3, _check_word_hidden _found_or_out_of_words: mv w, s3 + pop_ret ra ret -defcode ">cfa", 4, 0, to_cfa, find +defcode ">cfa", 4, 0, 0x8CAC3233, to_cfa, find pop w - jal to_cfa_impl + addi w, w, code_offset push w next -to_cfa_impl: - li t0, name_offset - lb t1, name_len_offset(w) - add t0, t0, t1 - add w, w, t0 - addi w, w, 3 - andi w, w, 0xFFFFFFFC - ret - -defcode "execute", 7, 0, execute, to_cfa +defcode "execute", 7, 0, 0xA01E3D98, execute, to_cfa pop w load_cell x, 0(w) jr x # 'next' should be called by the executed word -defcode "interpret", 9, 0, interpret, execute +defcode "interpret", 9, 0, 0x1F98C57A, interpret, execute jal word_impl jal find_impl beqz w, _not_found - jal to_cfa_impl + addi w, w, code_offset load_cell x, 0(w) jr x _not_found: next -defcode "branch", 6, 0, branch, interpret +defcode "branch", 6, 0, 0xB6873945, branch, interpret load_cell t0, 0(ip) add ip, ip, t0 next -defword "quit", 4, 0, quit, branch +defword "quit", 4, 0, 0x47878736, quit, branch .int interpret .int branch .int -dcell diff --git a/hash.c b/hash.c @@ -0,0 +1,19 @@ +#include <stdio.h> +#include <string.h> + +unsigned int fnv1a(unsigned char* str) { + unsigned int hash = 2166136261; + unsigned int len = strlen(str); + for (int i = 0; i < len; i += 1) { + hash = hash ^ (int)str[i]; + hash = hash * 16777619; + } + return hash; +} + +int main(int arcg, char** argv) { + unsigned char* str = argv[1]; + unsigned int hash = fnv1a(str); + printf("hash: 0x%08X\n", hash); + return 0; +}