commit 676971dd70dfd07dea8b1a180fdce0925c002b62
parent 75eff4436ebf71ec9dc4dfc7849c56b23e41845b
Author: Christian Ermann <christianermann@gmail.com>
Date: Sat, 14 Mar 2026 10:59:04 -0700
temp
Diffstat:
| M | build.zig | | | 23 | +++++++++++++++++++++++ |
| M | src/Scanner.zig | | | 94 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
| M | src/main.zig | | | 54 | ++++++++++++++++++++++++++++++++---------------------- |
| A | src/static.zig | | | 358 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | test/yaml_test_suite.zig | | | 108 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | webgpu.json | | | 18 | ++++++++++++++++++ |
6 files changed, 632 insertions(+), 23 deletions(-)
diff --git a/build.zig b/build.zig
@@ -113,4 +113,27 @@ pub fn build(b: *std.Build) void {
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_lib_unit_tests.step);
test_step.dependOn(&run_exe_unit_tests.step);
+
+ const test_suite_exe = b.addExecutable(.{
+ .name = "yaml-test-suite",
+ .root_module = b.createModule(.{
+ .root_source_file = b.path("test/yaml_test_suite.zig"),
+ .target = target,
+ .optimize = optimize,
+ .imports = &.{
+ .{ .name = "yaml", .module = lib_mod },
+ },
+ }),
+ });
+ b.installArtifact(test_suite_exe);
+ const run_test_suite = b.addRunArtifact(test_suite_exe);
+ run_test_suite.step.dependOn(b.getInstallStep());
+ if (b.args) |args| {
+ run_test_suite.addArgs(args);
+ }
+ const test_suite_step = b.step(
+ "test-suite",
+ "Run official YAML test suite",
+ );
+ test_suite_step.dependOn(&run_test_suite.step);
}
diff --git a/src/Scanner.zig b/src/Scanner.zig
@@ -1,3 +1,4 @@
+const std = @import("std");
input: []const u8 = "",
cursor: usize = 0,
@@ -7,6 +8,7 @@ flow_level: usize = 0,
block_indent: usize = 0,
state: State = .value,
value_start: usize = 0,
+next_token: ?Token = null,
pub const Token = union(enum) {
// Structural tokens
@@ -31,7 +33,10 @@ pub const Token = union(enum) {
false,
null,
- number: []const u8,
+ integer_dec: []const u8,
+ integer_hex: []const u8,
+ float: []const u8,
+ float_scientific: []const u8,
string: []const u8,
partial_string: []const u8,
@@ -46,6 +51,11 @@ pub const Token = union(enum) {
comment: []const u8,
};
+pub const AllocWhen = enum {
+ alloc_if_needed,
+ alloc_always,
+};
+
const State = enum {
value,
post_value,
@@ -79,7 +89,82 @@ pub fn initCompleteInput(complete_input: []const u8) @This() {
};
}
+pub fn nextAllocMax(
+ self: *@This(),
+ allocator: std.mem.Allocator,
+ when: AllocWhen,
+ max_value_len: usize,
+) !Token {
+ // This function is not available in streaming mode.
+ // assert(self.is_end_of_input);
+ switch (try self.peekNextToken()) {
+ .string => {
+ var value_list = std.array_list.Managed(u8).init(allocator);
+ errdefer value_list.deinit();
+ const maybe_slice = try self.nextAllocIntoArrayListMax(
+ &value_list,
+ when,
+ max_value_len,
+ );
+ if (maybe_slice) |slice| {
+ return .{ .string = slice };
+ } else {
+ return .{ .allocated_string = try value_list.toOwnedSlice() };
+ }
+ },
+ else => return error.Unimplemented,
+ }
+}
+
+pub fn nextAllocIntoArrayListMax(
+ self: *@This(),
+ value_list: *std.array_list.Managed(u8),
+ when: AllocWhen,
+ max_value_len: usize,
+) !?[]const u8 {
+ while (true) {
+ switch(try self.next()) {
+ .partial_string => |slice| {
+ try appendSlice(value_list, slice, max_value_len);
+ },
+ .partial_string_escaped_1 => |buf| {
+ try appendSlice(value_list, buf[0..], max_value_len);
+ },
+ .string => |slice| {
+ if (when == .alloc_if_needed and value_list.items.len == 0) {
+ // No alloc necessary
+ return slice;
+ }
+ try appendSlice(value_list, slice, max_value_len);
+ // The token is complete.
+ return null;
+ },
+ else => return error.Unimplemented,
+ }
+ }
+}
+
+fn appendSlice(
+ list: *std.array_list.Managed(u8),
+ slice: []const u8,
+ max_value_len: usize,
+) !void {
+ const new_len = std.math.add(
+ usize,
+ list.items.len,
+ slice.len,
+ ) catch return error.ValueTooLong;
+ if (new_len > max_value_len) return error.ValueTooLong;
+ try list.appendSlice(slice);
+}
+
+
pub fn next(self: *@This()) !Token {
+ if (self.next_token) |token| {
+ self.next_token = null;
+ return token;
+ }
+
state_loop: switch (self.state) {
.value => {
switch (try self.skipWhitespaceExpectByte()) {
@@ -446,6 +531,13 @@ pub fn next(self: *@This()) !Token {
unreachable;
}
+pub fn peekNextToken(self: *@This()) !Token {
+ if (self.next_token == null) {
+ self.next_token = try self.next();
+ }
+ return self.next_token.?;
+}
+
fn expectByte(self: *const @This()) !u8 {
if (self.cursor < self.input.len) {
return self.input[self.cursor];
diff --git a/src/main.zig b/src/main.zig
@@ -1,6 +1,14 @@
const std = @import("std");
const Scanner = @import("Scanner.zig");
+const parseFromSlice = @import("static.zig").parseFromSlice;
+
+const Api = struct {
+ copyright: []const u8,
+ name: []const u8,
+ enum_prefix: i64,
+ doc: []const u8,
+};
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
@@ -14,26 +22,28 @@ pub fn main() !void {
);
defer allocator.free(data);
- var scanner = Scanner.initCompleteInput(data);
- while (true) {
- const token = try scanner.next();
- switch (token) {
- .string => |s| std.debug.print("string = {s}\n", .{s}),
- .partial_string => |s| std.debug.print("partial string = {s}\n", .{s}),
- .partial_string_escaped_1 => |s| std.debug.print("escaped = {x}\n", .{s[0]}),
- .block_entry => std.debug.print("block entry\n", .{}),
- .flow_sequence_start => std.debug.print("flow sequence start\n", .{}),
- .flow_sequence_end => std.debug.print("flow sequence end\n", .{}),
- .null => std.debug.print("null\n", .{}),
- .value => std.debug.print(":\n", .{}),
- .document_end => {
- std.debug.print("document end\n", .{});
- break;
- },
- else => {
- std.debug.print("unexpected token returned\n", .{});
- break;
- },
- }
- }
+ _ = try parseFromSlice(Api, allocator, data, .{});
+
+ //var scanner = Scanner.initCompleteInput(data);
+ //while (true) {
+ // const token = try scanner.next();
+ // switch (token) {
+ // .string => |s| std.debug.print("string = {s}\n", .{s}),
+ // .partial_string => |s| std.debug.print("partial string = {s}\n", .{s}),
+ // .partial_string_escaped_1 => |s| std.debug.print("escaped = {x}\n", .{s[0]}),
+ // .block_entry => std.debug.print("block entry\n", .{}),
+ // .flow_sequence_start => std.debug.print("flow sequence start\n", .{}),
+ // .flow_sequence_end => std.debug.print("flow sequence end\n", .{}),
+ // .null => std.debug.print("null\n", .{}),
+ // .value => std.debug.print(":\n", .{}),
+ // .document_end => {
+ // std.debug.print("document end\n", .{});
+ // break;
+ // },
+ // else => {
+ // std.debug.print("unexpected token returned\n", .{});
+ // break;
+ // },
+ // }
+ //}
}
diff --git a/src/static.zig b/src/static.zig
@@ -0,0 +1,358 @@
+const std = @import("std");
+
+const Scanner = @import("Scanner.zig");
+const AllocWhen = Scanner.AllocWhen;
+const Token = Scanner.Token;
+const default_max_value_len = Scanner.default_max_value_len;
+
+/// Controls how to deal with various inconsistencies between the YAML
+/// document and the Zig struct type passed in.
+/// For unknown fields, set options in this struct.
+/// For missing fields, give the Zig struct fields default values.
+pub const ParseOptions = struct {
+ /// If false, finding an unknown field returns `error.UnknownField`
+ ignore_unknown_fields: bool = false,
+
+ /// Passed to `yaml.Scanner.nextAllocMax`.
+ /// The default for `parseFromSlice` or `parseFromTokenSource` with a
+ /// `*yaml.Scanner` input is the length of the input slice, which means
+ /// `error.ValueTooLong` will never be returned.
+ max_value_len: ?usize = null,
+
+ /// This determines whether strings should always be copied, or if a
+ /// reference to the given buffer should be preferred if possible.
+ /// The default for `parseFromSlice` or `parseFromTokenSource` with a
+ /// `*yaml.Scanner` input is `.alloc_if_needed`.
+ allocate: ?AllocWhen = null,
+
+ /// When parsing to a `yaml.Value`, set this option to false to always
+ /// emit YAML numbers as unparsed `yaml.Value.number_string`. Otherwise,
+ /// YAML numbers are parsed as either `yaml.Value.integer`,
+ /// `yaml.Value.float`, or left unparsed as `yaml.Value.number_string`.
+ /// When this option is true, YAML numbers encoded as floats may lose
+ /// precision when being parsed into `yaml.Value.float`.
+ parse_numbers: bool = true,
+};
+
+pub fn Parsed(comptime T: type) type {
+ return struct{
+ arena: *std.heap.ArenaAllocator,
+ value: T,
+
+ pub fn deinit(self: @This()) void {
+ const allocator = self.arena.child_allocator;
+ self.arena.deinit();
+ allocator.destroy(self.arena);
+ }
+ };
+}
+
+pub fn parseFromSlice(
+ comptime T: type,
+ allocator: std.mem.Allocator,
+ s: []const u8,
+ options: ParseOptions,
+) !Parsed(T) {
+ var scanner = Scanner.initCompleteInput(s);
+ //defer scanner.deinit();
+
+ return parseFromTokenSource(T, allocator, &scanner, options);
+}
+
+pub fn parseFromTokenSource(
+ comptime T: type,
+ allocator: std.mem.Allocator,
+ scanner_or_reader: anytype,
+ options: ParseOptions,
+) !Parsed(T) {
+ var parsed = Parsed(T){
+ .arena = try allocator.create(std.heap.ArenaAllocator),
+ .value = undefined,
+ };
+ errdefer allocator.destroy(parsed.arena);
+ parsed.arena.* = std.heap.ArenaAllocator.init(allocator);
+ errdefer parsed.arena.deinit();
+
+ parsed.value = try parseFromTokenSourceLeaky(
+ T,
+ parsed.arena.allocator(),
+ scanner_or_reader,
+ options,
+ );
+
+ return parsed;
+}
+
+pub fn parseFromTokenSourceLeaky(
+ comptime T: type,
+ allocator: std.mem.Allocator,
+ scanner_or_reader: anytype,
+ options: ParseOptions,
+) !T {
+ //if (@TypeOf(scanner_or_reader.*) == Scanner) {
+ // std.debug.assert(scanner_or_reader.is_end_of_input);
+ //}
+ var resolved_options = options;
+ if (resolved_options.max_value_len == null) {
+ if (@TypeOf(scanner_or_reader.*) == Scanner) {
+ resolved_options.max_value_len = scanner_or_reader.input.len;
+ } else {
+ resolved_options.max_value_len = default_max_value_len;
+ }
+ }
+ if (resolved_options.allocate == null) {
+ if (@TypeOf(scanner_or_reader.*) == Scanner) {
+ resolved_options.allocate = .alloc_if_needed;
+ } else {
+ resolved_options.allocate = .alloc_always;
+ }
+ }
+
+ const value = try innerParse(
+ T,
+ allocator,
+ scanner_or_reader,
+ resolved_options,
+ );
+
+ std.debug.assert(try scanner_or_reader.next() == .document_end);
+
+ return value;
+}
+
+/// This is an internal function called recursively during the implementation
+/// of `parseFromTokenSourceLeaky` and similar.
+/// It is exposed primarily to enable custom `yamlParse()` methods to call
+/// back into the `parseFrom*` system such as if you're implementing a custom
+/// container of type `T`; you can call `innerParse(T, ...)` for each of the
+/// container's items. Note that `null` fields are not allowed on the
+/// `options` when calling this function. (The `options` you get in your
+/// `yamlParse` method has no `null` fields.)
+pub fn innerParse(
+ comptime T: type,
+ allocator: std.mem.Allocator,
+ source: anytype,
+ options: ParseOptions,
+) !T {
+ switch (@typeInfo(T)) {
+ .bool => {
+ return switch (try source.next()) {
+ .true => true,
+ .false => false,
+ else => error.UnexpectedToken,
+ };
+ },
+ .optional => |optional_info| {
+ switch (try source.peekNextTokenType()) {
+ .null => {
+ _ = try source.next();
+ return null;
+ },
+ else => {
+ return try innerParse(
+ optional_info.child,
+ allocator,
+ source,
+ options,
+ );
+ },
+ }
+ },
+ .@"enum" => {
+ if (std.meta.hasFn(T, "yamlParse")) {
+ return T.yamlParse(allocator, source, options);
+ }
+
+ // Default parsing
+ const token = try source.nextAllocMax(
+ allocator,
+ .alloc_if_needed,
+ options.max_value_len.?,
+ );
+ defer freeIfAlloc(allocator, token);
+ // defer free
+ const slice = switch (token) {
+ inline
+ .number,
+ .allocated_number,
+ .string,
+ .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+ return sliceToEnum(T, slice);
+ },
+ .@"union" => |union_info| {
+ if (std.meta.hasFn(T, "yamlParse")) {
+ return T.yamlParse(allocator, source, options);
+ }
+ if (union_info.tag_type == null) {
+ @compileError(
+ "Unable to parse into untagged union '" ++ @typeName(T) ++ "'"
+ );
+ }
+
+ var result: ?T = null;
+ const name_token = try source.nextAllocMax(
+ allocator,
+ .alloc_if_needed,
+ options.max_value_len.?,
+ );
+ defer freeIfAlloc(allocator, name_token);
+
+ const field_name = switch(name_token.?) {
+ inline .string, .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+
+ inline for (union_info.fields) |field| {
+ if (std.mem.eql(u8, field.name, field_name)) {
+ if (field.type == void) {
+ result = @unionInit(T, field.name, {});
+ } else {
+ // Recurse
+ const token = try innerParse(
+ field.type,
+ allocator,
+ source,
+ options,
+ );
+ result = @unionInit(
+ T,
+ field.name,
+ token,
+ );
+ }
+ break;
+ }
+ } else {
+ // Didn't match anything
+ return error.UnknownField;
+ }
+
+ return result.?;
+ },
+ .@"struct" => |struct_info| {
+ if (struct_info.is_tuple) {
+ return error.Unimplemented;
+ }
+ if (std.meta.hasFn(T, "yamlParse")) {
+ return T.yamlParse(allocator, source, options);
+ }
+
+ var result: T = undefined;
+ var fields_seen = [_]bool{false} ** struct_info.fields.len;
+
+ while (true) {
+ const name_token = try source.nextAllocMax(
+ allocator,
+ .alloc_if_needed,
+ options.max_value_len.?,
+ );
+ defer freeIfAlloc(allocator, name_token);
+ const field_name = switch(name_token) {
+ inline .string, .allocated_string => |slice| slice,
+ else => return error.UnexpectedToken,
+ };
+ inline for (struct_info.fields, 0..) |field, i| {
+ if (field.is_comptime) {
+ @compileError(
+ "comptile fields are not supported: " ++ @typeName(T) ++ "." ++ field.name
+ );
+ }
+ if (std.mem.eql(u8, field.name, field_name)) {
+ if (fields_seen[i]) return error.DuplicateField;
+ @field(result, field.name) = try innerParse(
+ field.type,
+ allocator,
+ source,
+ options,
+ );
+ fields_seen[i] = true;
+ break;
+ }
+ } else {
+ // Didn't match anything
+ return error.UnknownField;
+ }
+ }
+ },
+ .array => |array_info| {
+ switch (try source.peek()) {
+ .string => {
+ if (array_info.child != u8) return error.UnexpectedToken;
+ // Fixed-length string
+ },
+ else => return error.Unimplemented,
+ }
+ },
+ .pointer => |ptr_info| {
+ switch (ptr_info.size) {
+ .one => {
+ const result = try allocator.create(ptr_info.child);
+ result.* = try innerParse(
+ ptr_info.child,
+ allocator,
+ source,
+ options,
+ );
+ return result;
+ },
+ .slice => {
+ switch (try source.peek()) {
+ .string,
+ .partial_string,
+ .partial_string_escaped_1 => {
+ if (ptr_info.child != u8) {
+ return error.UnexpectedToken;
+ }
+ // Dynamic-length string
+ if (ptr_info.sentinel()) |_| {
+ // Use our own array list so we can append
+ // the sentinel.
+ return error.Unimplemented;
+ }
+ var alloc = options.allocate.?;
+ if (!ptr_info.is_const) {
+ // Have to allocate to get a mutable copy
+ alloc = .alloc_always;
+ }
+ const result = try source.nextAllocMax(
+ allocator,
+ alloc,
+ options.max_value_len.?,
+ );
+ return switch (result) {
+ inline
+ .string,
+ .allocated_string => |slice| slice,
+ else => unreachable
+ };
+ },
+ else => return error.UnexpectedToken,
+ }
+ },
+ }
+ },
+ else => return error.Unimplemented,
+ }
+}
+
+fn sliceToEnum(comptime T: type, slice: []const u8) !T {
+ if (std.meta.stringToEnum(T, slice)) |value| return value;
+ const n = std.fmt.parseInt(
+ @typeInfo(T).@"enum".tag_type,
+ slice,
+ 10
+ ) catch return error.InvalidEnumTag;
+ return std.enums.fromInt(T, n) orelse return error.InvalidEnumTag;
+}
+
+fn freeIfAlloc(allocator: std.mem.Allocator, token: Token) void {
+ switch (token) {
+ inline .allocated_number, .allocated_string => |slice| {
+ allocator.free(slice);
+ },
+ else => {},
+ }
+}
+
diff --git a/test/yaml_test_suite.zig b/test/yaml_test_suite.zig
@@ -0,0 +1,108 @@
+const std = @import("std");
+
+const yaml = @import("yaml");
+
+pub fn main() !void {
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ defer _ = gpa.deinit();
+ const allocator = gpa.allocator();
+
+ const args = try std.process.argsAlloc(allocator);
+ defer std.process.argsFree(allocator, args);
+
+ if (args.len < 2) {
+ std.debug.print(
+ "Usage: {s} <yaml-test-suite-directory>\n",
+ .{ args[0] },
+ );
+ std.process.exit(1);
+ }
+ const test_suite_dir = args[1];
+
+ try runTestSuite(allocator, test_suite_dir);
+}
+
+fn runTestSuite(allocator: std.mem.Allocator, suite_dir: []const u8) !void {
+ var dir = try std.fs.cwd().openDir(suite_dir, .{ .iterate = true });
+ defer dir.close();
+
+ var it = dir.iterate();
+ while (try it.next()) |entry| {
+ switch (entry.kind) {
+ .directory => {
+ // it seems like these directories are primarily symlinks to
+ // tests in the root directory. there seems to be a few tests
+ // that are just in these folders, but we'll ignore them for
+ // now
+ if (std.mem.eql(u8, entry.name, "tags")) {
+ continue;
+ }
+ if (std.mem.eql(u8, entry.name, "name")) {
+ continue;
+ }
+ try runTestSet(allocator, dir, entry.name);
+ },
+ else => continue,
+ }
+ }
+}
+
+fn runTestSet(
+ allocator: std.mem.Allocator,
+ suite_dir: std.fs.Dir,
+ test_set_id: []const u8,
+) !void {
+ var dir = try suite_dir.openDir(test_set_id, .{ .iterate = true });
+ defer dir.close();
+
+ // a set may contain multiple tests
+ dir.access("in.yaml", .{}) catch {
+ var it = dir.iterate();
+ while (try it.next()) |entry| {
+ switch (entry.kind) {
+ .directory => try runTest(allocator, dir, entry.name),
+ else => continue,
+ }
+ }
+ return;
+ };
+ // or a single test case
+ try runTestCase(allocator, dir);
+}
+
+fn runTest(
+ allocator: std.mem.Allocator,
+ test_set_dir: std.fs.Dir,
+ test_id: []const u8,
+) !void {
+ var dir = try test_set_dir.openDir(test_id, .{ .iterate = true });
+ defer dir.close();
+
+ try runTestCase(allocator, dir);
+}
+
+fn runTestCase(
+ allocator: std.mem.Allocator,
+ test_dir: std.fs.Dir,
+) !void {
+ const input_path = "in.yaml";
+ const error_path = "error";
+
+ const expect_parse_error = blk: {
+ test_dir.access(error_path, .{}) catch |e| {
+ switch (e) {
+ error.FileNotFound => break :blk false,
+ else => return e,
+ }
+ };
+ break :blk true;
+ };
+ _ = expect_parse_error;
+
+ const yaml_data = try test_dir.readFileAlloc(
+ allocator,
+ input_path,
+ 1024*1024
+ );
+ defer allocator.free(yaml_data);
+}
diff --git a/webgpu.json b/webgpu.json
@@ -0,0 +1,17 @@
+{
+ "constants": [
+ {
+ "value": "uint32_max",
+ "doc": " TODO\n",
+ "name": "array_layer_count_undefined"
+ },
+ {
+ "name": "copy_stride_undefined"
+ }
+ ],
+ "value": "uint32_max",
+ "doc": " TODO\n",
+ "copyright": " Copyright 2019-2023 WebGPU-Native developers\n\n SPDX-License-Identifier: BSD-3-Clause\n",
+ "name": "webgpu",
+ "enum_prefix": 0
+}
+\ No newline at end of file