yaml-z

git clone git://git.electrosoup.com/yaml-z
Log | Files | Refs | Submodules

commit 676971dd70dfd07dea8b1a180fdce0925c002b62
parent 75eff4436ebf71ec9dc4dfc7849c56b23e41845b
Author: Christian Ermann <christianermann@gmail.com>
Date:   Sat, 14 Mar 2026 10:59:04 -0700

temp

Diffstat:
Mbuild.zig | 23+++++++++++++++++++++++
Msrc/Scanner.zig | 94++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/main.zig | 54++++++++++++++++++++++++++++++++----------------------
Asrc/static.zig | 358+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/yaml_test_suite.zig | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Awebgpu.json | 18++++++++++++++++++
6 files changed, 632 insertions(+), 23 deletions(-)

diff --git a/build.zig b/build.zig @@ -113,4 +113,27 @@ pub fn build(b: *std.Build) void { const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_lib_unit_tests.step); test_step.dependOn(&run_exe_unit_tests.step); + + const test_suite_exe = b.addExecutable(.{ + .name = "yaml-test-suite", + .root_module = b.createModule(.{ + .root_source_file = b.path("test/yaml_test_suite.zig"), + .target = target, + .optimize = optimize, + .imports = &.{ + .{ .name = "yaml", .module = lib_mod }, + }, + }), + }); + b.installArtifact(test_suite_exe); + const run_test_suite = b.addRunArtifact(test_suite_exe); + run_test_suite.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_test_suite.addArgs(args); + } + const test_suite_step = b.step( + "test-suite", + "Run official YAML test suite", + ); + test_suite_step.dependOn(&run_test_suite.step); } diff --git a/src/Scanner.zig b/src/Scanner.zig @@ -1,3 +1,4 @@ +const std = @import("std"); input: []const u8 = "", cursor: usize = 0, @@ -7,6 +8,7 @@ flow_level: usize = 0, block_indent: usize = 0, state: State = .value, value_start: usize = 0, +next_token: ?Token = null, pub const Token = union(enum) { // Structural tokens @@ -31,7 +33,10 @@ pub const Token = union(enum) { false, null, - number: []const u8, + integer_dec: []const u8, + integer_hex: []const u8, + float: []const u8, + float_scientific: []const u8, string: []const u8, partial_string: []const u8, @@ -46,6 +51,11 @@ pub const Token = union(enum) { comment: []const u8, }; +pub const AllocWhen = enum { + alloc_if_needed, + alloc_always, +}; + const State = enum { value, post_value, @@ -79,7 +89,82 @@ pub fn initCompleteInput(complete_input: []const u8) @This() { }; } +pub fn nextAllocMax( + self: *@This(), + allocator: std.mem.Allocator, + when: AllocWhen, + max_value_len: usize, +) !Token { + // This function is not available in streaming mode. + // assert(self.is_end_of_input); + switch (try self.peekNextToken()) { + .string => { + var value_list = std.array_list.Managed(u8).init(allocator); + errdefer value_list.deinit(); + const maybe_slice = try self.nextAllocIntoArrayListMax( + &value_list, + when, + max_value_len, + ); + if (maybe_slice) |slice| { + return .{ .string = slice }; + } else { + return .{ .allocated_string = try value_list.toOwnedSlice() }; + } + }, + else => return error.Unimplemented, + } +} + +pub fn nextAllocIntoArrayListMax( + self: *@This(), + value_list: *std.array_list.Managed(u8), + when: AllocWhen, + max_value_len: usize, +) !?[]const u8 { + while (true) { + switch(try self.next()) { + .partial_string => |slice| { + try appendSlice(value_list, slice, max_value_len); + }, + .partial_string_escaped_1 => |buf| { + try appendSlice(value_list, buf[0..], max_value_len); + }, + .string => |slice| { + if (when == .alloc_if_needed and value_list.items.len == 0) { + // No alloc necessary + return slice; + } + try appendSlice(value_list, slice, max_value_len); + // The token is complete. + return null; + }, + else => return error.Unimplemented, + } + } +} + +fn appendSlice( + list: *std.array_list.Managed(u8), + slice: []const u8, + max_value_len: usize, +) !void { + const new_len = std.math.add( + usize, + list.items.len, + slice.len, + ) catch return error.ValueTooLong; + if (new_len > max_value_len) return error.ValueTooLong; + try list.appendSlice(slice); +} + + pub fn next(self: *@This()) !Token { + if (self.next_token) |token| { + self.next_token = null; + return token; + } + state_loop: switch (self.state) { .value => { switch (try self.skipWhitespaceExpectByte()) { @@ -446,6 +531,13 @@ pub fn next(self: *@This()) !Token { unreachable; } +pub fn peekNextToken(self: *@This()) !Token { + if (self.next_token == null) { + self.next_token = try self.next(); + } + return self.next_token.?; +} + fn expectByte(self: *const @This()) !u8 { if (self.cursor < self.input.len) { return self.input[self.cursor]; diff --git a/src/main.zig b/src/main.zig @@ -1,6 +1,14 @@ const std = @import("std"); const Scanner = @import("Scanner.zig"); +const parseFromSlice = @import("static.zig").parseFromSlice; + +const Api = struct { + copyright: []const u8, + name: []const u8, + enum_prefix: i64, + doc: []const u8, +}; pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; @@ -14,26 +22,28 @@ pub fn main() !void { ); defer allocator.free(data); - var scanner = Scanner.initCompleteInput(data); - while (true) { - const token = try scanner.next(); - switch (token) { - .string => |s| std.debug.print("string = {s}\n", .{s}), - .partial_string => |s| std.debug.print("partial string = {s}\n", .{s}), - .partial_string_escaped_1 => |s| std.debug.print("escaped = {x}\n", .{s[0]}), - .block_entry => std.debug.print("block entry\n", .{}), - .flow_sequence_start => std.debug.print("flow sequence start\n", .{}), - .flow_sequence_end => std.debug.print("flow sequence end\n", .{}), - .null => std.debug.print("null\n", .{}), - .value => std.debug.print(":\n", .{}), - .document_end => { - std.debug.print("document end\n", .{}); - break; - }, - else => { - std.debug.print("unexpected token returned\n", .{}); - break; - }, - } - } + _ = try parseFromSlice(Api, allocator, data, .{}); + + //var scanner = Scanner.initCompleteInput(data); + //while (true) { + // const token = try scanner.next(); + // switch (token) { + // .string => |s| std.debug.print("string = {s}\n", .{s}), + // .partial_string => |s| std.debug.print("partial string = {s}\n", .{s}), + // .partial_string_escaped_1 => |s| std.debug.print("escaped = {x}\n", .{s[0]}), + // .block_entry => std.debug.print("block entry\n", .{}), + // .flow_sequence_start => std.debug.print("flow sequence start\n", .{}), + // .flow_sequence_end => std.debug.print("flow sequence end\n", .{}), + // .null => std.debug.print("null\n", .{}), + // .value => std.debug.print(":\n", .{}), + // .document_end => { + // std.debug.print("document end\n", .{}); + // break; + // }, + // else => { + // std.debug.print("unexpected token returned\n", .{}); + // break; + // }, + // } + //} } diff --git a/src/static.zig b/src/static.zig @@ -0,0 +1,358 @@ +const std = @import("std"); + +const Scanner = @import("Scanner.zig"); +const AllocWhen = Scanner.AllocWhen; +const Token = Scanner.Token; +const default_max_value_len = Scanner.default_max_value_len; + +/// Controls how to deal with various inconsistencies between the YAML +/// document and the Zig struct type passed in. +/// For unknown fields, set options in this struct. +/// For missing fields, give the Zig struct fields default values. +pub const ParseOptions = struct { + /// If false, finding an unknown field returns `error.UnknownField` + ignore_unknown_fields: bool = false, + + /// Passed to `yaml.Scanner.nextAllocMax`. + /// The default for `parseFromSlice` or `parseFromTokenSource` with a + /// `*yaml.Scanner` input is the length of the input slice, which means + /// `error.ValueTooLong` will never be returned. + max_value_len: ?usize = null, + + /// This determines whether strings should always be copied, or if a + /// reference to the given buffer should be preferred if possible. + /// The default for `parseFromSlice` or `parseFromTokenSource` with a + /// `*yaml.Scanner` input is `.alloc_if_needed`. + allocate: ?AllocWhen = null, + + /// When parsing to a `yaml.Value`, set this option to false to always + /// emit YAML numbers as unparsed `yaml.Value.number_string`. Otherwise, + /// YAML numbers are parsed as either `yaml.Value.integer`, + /// `yaml.Value.float`, or left unparsed as `yaml.Value.number_string`. + /// When this option is true, YAML numbers encoded as floats may lose + /// precision when being parsed into `yaml.Value.float`. + parse_numbers: bool = true, +}; + +pub fn Parsed(comptime T: type) type { + return struct{ + arena: *std.heap.ArenaAllocator, + value: T, + + pub fn deinit(self: @This()) void { + const allocator = self.arena.child_allocator; + self.arena.deinit(); + allocator.destroy(self.arena); + } + }; +} + +pub fn parseFromSlice( + comptime T: type, + allocator: std.mem.Allocator, + s: []const u8, + options: ParseOptions, +) !Parsed(T) { + var scanner = Scanner.initCompleteInput(s); + //defer scanner.deinit(); + + return parseFromTokenSource(T, allocator, &scanner, options); +} + +pub fn parseFromTokenSource( + comptime T: type, + allocator: std.mem.Allocator, + scanner_or_reader: anytype, + options: ParseOptions, +) !Parsed(T) { + var parsed = Parsed(T){ + .arena = try allocator.create(std.heap.ArenaAllocator), + .value = undefined, + }; + errdefer allocator.destroy(parsed.arena); + parsed.arena.* = std.heap.ArenaAllocator.init(allocator); + errdefer parsed.arena.deinit(); + + parsed.value = try parseFromTokenSourceLeaky( + T, + parsed.arena.allocator(), + scanner_or_reader, + options, + ); + + return parsed; +} + +pub fn parseFromTokenSourceLeaky( + comptime T: type, + allocator: std.mem.Allocator, + scanner_or_reader: anytype, + options: ParseOptions, +) !T { + //if (@TypeOf(scanner_or_reader.*) == Scanner) { + // std.debug.assert(scanner_or_reader.is_end_of_input); + //} + var resolved_options = options; + if (resolved_options.max_value_len == null) { + if (@TypeOf(scanner_or_reader.*) == Scanner) { + resolved_options.max_value_len = scanner_or_reader.input.len; + } else { + resolved_options.max_value_len = default_max_value_len; + } + } + if (resolved_options.allocate == null) { + if (@TypeOf(scanner_or_reader.*) == Scanner) { + resolved_options.allocate = .alloc_if_needed; + } else { + resolved_options.allocate = .alloc_always; + } + } + + const value = try innerParse( + T, + allocator, + scanner_or_reader, + resolved_options, + ); + + std.debug.assert(try scanner_or_reader.next() == .document_end); + + return value; +} + +/// This is an internal function called recursively during the implementation +/// of `parseFromTokenSourceLeaky` and similar. +/// It is exposed primarily to enable custom `yamlParse()` methods to call +/// back into the `parseFrom*` system such as if you're implementing a custom +/// container of type `T`; you can call `innerParse(T, ...)` for each of the +/// container's items. Note that `null` fields are not allowed on the +/// `options` when calling this function. (The `options` you get in your +/// `yamlParse` method has no `null` fields.) +pub fn innerParse( + comptime T: type, + allocator: std.mem.Allocator, + source: anytype, + options: ParseOptions, +) !T { + switch (@typeInfo(T)) { + .bool => { + return switch (try source.next()) { + .true => true, + .false => false, + else => error.UnexpectedToken, + }; + }, + .optional => |optional_info| { + switch (try source.peekNextTokenType()) { + .null => { + _ = try source.next(); + return null; + }, + else => { + return try innerParse( + optional_info.child, + allocator, + source, + options, + ); + }, + } + }, + .@"enum" => { + if (std.meta.hasFn(T, "yamlParse")) { + return T.yamlParse(allocator, source, options); + } + + // Default parsing + const token = try source.nextAllocMax( + allocator, + .alloc_if_needed, + options.max_value_len.?, + ); + defer freeIfAlloc(allocator, token); + // defer free + const slice = switch (token) { + inline + .number, + .allocated_number, + .string, + .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + return sliceToEnum(T, slice); + }, + .@"union" => |union_info| { + if (std.meta.hasFn(T, "yamlParse")) { + return T.yamlParse(allocator, source, options); + } + if (union_info.tag_type == null) { + @compileError( + "Unable to parse into untagged union '" ++ @typeName(T) ++ "'" + ); + } + + var result: ?T = null; + const name_token = try source.nextAllocMax( + allocator, + .alloc_if_needed, + options.max_value_len.?, + ); + defer freeIfAlloc(allocator, name_token); + + const field_name = switch(name_token.?) { + inline .string, .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + + inline for (union_info.fields) |field| { + if (std.mem.eql(u8, field.name, field_name)) { + if (field.type == void) { + result = @unionInit(T, field.name, {}); + } else { + // Recurse + const token = try innerParse( + field.type, + allocator, + source, + options, + ); + result = @unionInit( + T, + field.name, + token, + ); + } + break; + } + } else { + // Didn't match anything + return error.UnknownField; + } + + return result.?; + }, + .@"struct" => |struct_info| { + if (struct_info.is_tuple) { + return error.Unimplemented; + } + if (std.meta.hasFn(T, "yamlParse")) { + return T.yamlParse(allocator, source, options); + } + + var result: T = undefined; + var fields_seen = [_]bool{false} ** struct_info.fields.len; + + while (true) { + const name_token = try source.nextAllocMax( + allocator, + .alloc_if_needed, + options.max_value_len.?, + ); + defer freeIfAlloc(allocator, name_token); + const field_name = switch(name_token) { + inline .string, .allocated_string => |slice| slice, + else => return error.UnexpectedToken, + }; + inline for (struct_info.fields, 0..) |field, i| { + if (field.is_comptime) { + @compileError( + "comptile fields are not supported: " ++ @typeName(T) ++ "." ++ field.name + ); + } + if (std.mem.eql(u8, field.name, field_name)) { + if (fields_seen[i]) return error.DuplicateField; + @field(result, field.name) = try innerParse( + field.type, + allocator, + source, + options, + ); + fields_seen[i] = true; + break; + } + } else { + // Didn't match anything + return error.UnknownField; + } + } + }, + .array => |array_info| { + switch (try source.peek()) { + .string => { + if (array_info.child != u8) return error.UnexpectedToken; + // Fixed-length string + }, + else => return error.Unimplemented, + } + }, + .pointer => |ptr_info| { + switch (ptr_info.size) { + .one => { + const result = try allocator.create(ptr_info.child); + result.* = try innerParse( + ptr_info.child, + allocator, + source, + options, + ); + return result; + }, + .slice => { + switch (try source.peek()) { + .string, + .partial_string, + .partial_string_escaped_1 => { + if (ptr_info.child != u8) { + return error.UnexpectedToken; + } + // Dynamic-length string + if (ptr_info.sentinel()) |_| { + // Use our own array list so we can append + // the sentinel. + return error.Unimplemented; + } + var alloc = options.allocate.?; + if (!ptr_info.is_const) { + // Have to allocate to get a mutable copy + alloc = .alloc_always; + } + const result = try source.nextAllocMax( + allocator, + alloc, + options.max_value_len.?, + ); + return switch (result) { + inline + .string, + .allocated_string => |slice| slice, + else => unreachable + }; + }, + else => return error.UnexpectedToken, + } + }, + } + }, + else => return error.Unimplemented, + } +} + +fn sliceToEnum(comptime T: type, slice: []const u8) !T { + if (std.meta.stringToEnum(T, slice)) |value| return value; + const n = std.fmt.parseInt( + @typeInfo(T).@"enum".tag_type, + slice, + 10 + ) catch return error.InvalidEnumTag; + return std.enums.fromInt(T, n) orelse return error.InvalidEnumTag; +} + +fn freeIfAlloc(allocator: std.mem.Allocator, token: Token) void { + switch (token) { + inline .allocated_number, .allocated_string => |slice| { + allocator.free(slice); + }, + else => {}, + } +} + diff --git a/test/yaml_test_suite.zig b/test/yaml_test_suite.zig @@ -0,0 +1,108 @@ +const std = @import("std"); + +const yaml = @import("yaml"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + if (args.len < 2) { + std.debug.print( + "Usage: {s} <yaml-test-suite-directory>\n", + .{ args[0] }, + ); + std.process.exit(1); + } + const test_suite_dir = args[1]; + + try runTestSuite(allocator, test_suite_dir); +} + +fn runTestSuite(allocator: std.mem.Allocator, suite_dir: []const u8) !void { + var dir = try std.fs.cwd().openDir(suite_dir, .{ .iterate = true }); + defer dir.close(); + + var it = dir.iterate(); + while (try it.next()) |entry| { + switch (entry.kind) { + .directory => { + // it seems like these directories are primarily symlinks to + // tests in the root directory. there seems to be a few tests + // that are just in these folders, but we'll ignore them for + // now + if (std.mem.eql(u8, entry.name, "tags")) { + continue; + } + if (std.mem.eql(u8, entry.name, "name")) { + continue; + } + try runTestSet(allocator, dir, entry.name); + }, + else => continue, + } + } +} + +fn runTestSet( + allocator: std.mem.Allocator, + suite_dir: std.fs.Dir, + test_set_id: []const u8, +) !void { + var dir = try suite_dir.openDir(test_set_id, .{ .iterate = true }); + defer dir.close(); + + // a set may contain multiple tests + dir.access("in.yaml", .{}) catch { + var it = dir.iterate(); + while (try it.next()) |entry| { + switch (entry.kind) { + .directory => try runTest(allocator, dir, entry.name), + else => continue, + } + } + return; + }; + // or a single test case + try runTestCase(allocator, dir); +} + +fn runTest( + allocator: std.mem.Allocator, + test_set_dir: std.fs.Dir, + test_id: []const u8, +) !void { + var dir = try test_set_dir.openDir(test_id, .{ .iterate = true }); + defer dir.close(); + + try runTestCase(allocator, dir); +} + +fn runTestCase( + allocator: std.mem.Allocator, + test_dir: std.fs.Dir, +) !void { + const input_path = "in.yaml"; + const error_path = "error"; + + const expect_parse_error = blk: { + test_dir.access(error_path, .{}) catch |e| { + switch (e) { + error.FileNotFound => break :blk false, + else => return e, + } + }; + break :blk true; + }; + _ = expect_parse_error; + + const yaml_data = try test_dir.readFileAlloc( + allocator, + input_path, + 1024*1024 + ); + defer allocator.free(yaml_data); +} diff --git a/webgpu.json b/webgpu.json @@ -0,0 +1,17 @@ +{ + "constants": [ + { + "value": "uint32_max", + "doc": " TODO\n", + "name": "array_layer_count_undefined" + }, + { + "name": "copy_stride_undefined" + } + ], + "value": "uint32_max", + "doc": " TODO\n", + "copyright": " Copyright 2019-2023 WebGPU-Native developers\n\n SPDX-License-Identifier: BSD-3-Clause\n", + "name": "webgpu", + "enum_prefix": 0 +} +\ No newline at end of file