Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 29 additions & 29 deletions DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ All examples are verified by running `zig build examples-check`
- `lastChild()`
- `nextSibling()`
- `prevSibling()`
- `children()` (borrowed `[]const u32` index view)
- `children()` (iterator of wrapped child nodes; `collect(allocator)` returns an owned `[]Node`)
- Text:
- `innerText(allocator)` (borrowed or allocated depending on shape)
- `innerTextWithOptions(allocator, TextOptions)`
Expand Down Expand Up @@ -193,49 +193,49 @@ Source: `bench/results/latest.json` (`stable` profile).

| Fixture | ours | lol-html | lexbor |
|---|---:|---:|---:|
| `rust-lang.html` | 1447.99 | 1474.65 | 332.72 |
| `wiki-html.html` | 1645.45 | 1215.04 | 271.24 |
| `mdn-html.html` | 2570.09 | 1879.00 | 404.50 |
| `w3-html52.html` | 1064.19 | 764.62 | 199.22 |
| `hn.html` | 1263.60 | 885.26 | 223.15 |
| `python-org.html` | 1549.02 | 1356.21 | 284.19 |
| `kernel-org.html` | 1440.47 | 1300.81 | 276.52 |
| `gnu-org.html` | 1917.36 | 1482.15 | 317.74 |
| `ziglang-org.html` | 1480.49 | 1257.62 | 291.72 |
| `ziglang-doc-master.html` | 1122.44 | 987.16 | 214.23 |
| `wikipedia-unicode-list.html` | 1247.00 | 1024.98 | 215.21 |
| `whatwg-html-spec.html` | 1113.73 | 841.16 | 210.83 |
| `synthetic-forms.html` | 1046.17 | 710.72 | 174.94 |
| `synthetic-table-grid.html` | 768.56 | 622.31 | 152.86 |
| `synthetic-list-nested.html` | 833.77 | 598.02 | 152.45 |
| `synthetic-comments-doctype.html` | 1200.72 | 827.66 | 212.09 |
| `synthetic-template-rich.html` | 628.02 | 444.34 | 134.10 |
| `synthetic-whitespace-noise.html` | 1104.21 | 919.69 | 170.33 |
| `synthetic-news-feed.html` | 835.27 | 577.95 | 144.46 |
| `synthetic-ecommerce.html` | 787.72 | 556.51 | 151.95 |
| `synthetic-forum-thread.html` | 839.48 | 579.84 | 143.06 |
| `rust-lang.html` | 2132.70 | 1501.81 | 324.53 |
| `wiki-html.html` | 1991.85 | 1220.28 | 267.50 |
| `mdn-html.html` | 2939.75 | 1894.70 | 408.76 |
| `w3-html52.html` | 956.25 | 754.98 | 196.30 |
| `hn.html` | 1595.94 | 893.48 | 221.75 |
| `python-org.html` | 2116.13 | 1354.79 | 280.79 |
| `kernel-org.html` | 1979.68 | 1335.72 | 290.65 |
| `gnu-org.html` | 2368.06 | 1490.41 | 313.42 |
| `ziglang-org.html` | 1874.52 | 1299.04 | 289.84 |
| `ziglang-doc-master.html` | 1431.90 | 1045.18 | 225.11 |
| `wikipedia-unicode-list.html` | 1647.71 | 1081.56 | 226.67 |
| `whatwg-html-spec.html` | 1344.31 | 892.66 | 219.37 |
| `synthetic-forms.html` | 1396.29 | 781.68 | 189.28 |
| `synthetic-table-grid.html` | 1086.37 | 724.33 | 169.46 |
| `synthetic-list-nested.html` | 1190.11 | 652.22 | 161.58 |
| `synthetic-comments-doctype.html` | 1862.19 | 943.51 | 223.61 |
| `synthetic-template-rich.html` | 913.72 | 465.79 | 143.20 |
| `synthetic-whitespace-noise.html` | 1592.74 | 1062.32 | 189.70 |
| `synthetic-news-feed.html` | 1179.12 | 646.78 | 157.61 |
| `synthetic-ecommerce.html` | 1134.48 | 637.67 | 163.21 |
| `synthetic-forum-thread.html` | 1145.80 | 643.29 | 161.14 |

#### Query Match Throughput (ours)

| Case | ours ops/s | ours ns/op |
|---|---:|---:|
| `attr-heavy-button` | 1148936.76 | 870.37 |
| `attr-heavy-nav` | 1130790.00 | 884.34 |
| `attr-heavy-button` | 191877.12 | 5211.67 |
| `attr-heavy-nav` | 87885.90 | 11378.39 |

#### Cached Query Throughput (ours)

| Case | ours ops/s | ours ns/op |
|---|---:|---:|
| `attr-heavy-button` | 1305257.78 | 766.13 |
| `attr-heavy-nav` | 1347173.46 | 742.29 |
| `attr-heavy-button` | 144352.80 | 6927.47 |
| `attr-heavy-nav` | 120324.49 | 8310.86 |

#### Query Parse Throughput (ours)

| Selector case | Ops/s | ns/op |
|---|---:|---:|
| `simple` | 17335919.85 | 57.68 |
| `complex` | 5836657.49 | 171.33 |
| `grouped` | 6396371.26 | 156.34 |
| `simple` | 10465915.34 | 95.55 |
| `complex` | 4910511.63 | 203.64 |
| `grouped` | 6290595.08 | 158.97 |

For full per-parser, per-fixture tables and gate output:
- `bench/results/latest.md`
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ Source: `bench/results/latest.json` (`stable` profile).
### Parse Throughput (Average Across Fixtures)

```text
ours │████████████████████│ 1233.61 MB/s (100.00%)
lol-html │████████████████░░░░│ 966.94 MB/s (78.38%)
lexbor │███░░░░░░░░░░░░░░░░│ 222.74 MB/s (18.06%)
ours │████████████████████│ 1613.31 MB/s (100.00%)
lol-html │█████████████░░░░░░░│ 1015.34 MB/s (62.94%)
lexbor │███░░░░░░░░░░░░░░░░░│ 229.69 MB/s (14.24%)
```

### Conformance Snapshot
Expand Down
48 changes: 48 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ pub fn build(b: *std.Build) void {
}),
});

const parse_mode_mod = b.createModule(.{
.root_source_file = b.path("tools/parse_mode.zig"),
.target = target,
.optimize = optimize,
.imports = &.{
.{ .name = "htmlparser", .module = mod },
},
});

const bench_exe = b.addExecutable(.{
.name = "htmlparser-bench",
.root_module = b.createModule(.{
Expand All @@ -30,6 +39,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.imports = &.{
.{ .name = "htmlparser", .module = mod },
.{ .name = "parse_mode", .module = parse_mode_mod },
},
}),
});
Expand Down Expand Up @@ -111,11 +121,13 @@ pub fn build(b: *std.Build) void {

const mod_tests = b.addTest(.{
.root_module = mod,
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
});
const run_mod_tests = b.addRunArtifact(mod_tests);

const exe_tests = b.addTest(.{
.root_module = exe.root_module,
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
});
const run_exe_tests = b.addRunArtifact(exe_tests);

Expand All @@ -126,8 +138,17 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.imports = &.{
.{ .name = "htmlparser", .module = mod },
.{ .name = "examples", .module = b.createModule(.{
.root_source_file = b.path("examples/examples.zig"),
.target = target,
.optimize = optimize,
.imports = &.{
.{ .name = "htmlparser", .module = mod },
},
}) },
},
}),
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
});
const run_examples_tests = b.addRunArtifact(examples_tests);

Expand All @@ -140,14 +161,41 @@ pub fn build(b: *std.Build) void {
.{ .name = "htmlparser", .module = mod },
},
}),
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
});
const run_behavioral_tests = b.addRunArtifact(behavioral_tests);

const scripts_tests = b.addTest(.{
.root_module = b.createModule(.{
.root_source_file = b.path("tools/scripts.zig"),
.target = target,
.optimize = optimize,
}),
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
});
const run_scripts_tests = b.addRunArtifact(scripts_tests);

const bench_tests = b.addTest(.{
.root_module = b.createModule(.{
.root_source_file = b.path("tools/bench/bench.zig"),
.target = target,
.optimize = optimize,
.imports = &.{
.{ .name = "htmlparser", .module = mod },
.{ .name = "parse_mode", .module = parse_mode_mod },
},
}),
.test_runner = .{ .path = b.path("tools/test_runner.zig"), .mode = .simple },
});
const run_bench_tests = b.addRunArtifact(bench_tests);

const test_step = b.step("test", "Run tests");
test_step.dependOn(&run_mod_tests.step);
test_step.dependOn(&run_exe_tests.step);
test_step.dependOn(&run_examples_tests.step);
test_step.dependOn(&run_behavioral_tests.step);
test_step.dependOn(&run_scripts_tests.step);
test_step.dependOn(&run_bench_tests.step);

const ship_check_step = b.step("ship-check", "Run release-readiness checks (test + docs + examples)");
ship_check_step.dependOn(test_step);
Expand Down
2 changes: 1 addition & 1 deletion examples/basic_parse_query.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

Expand Down
4 changes: 2 additions & 2 deletions examples/cached_selector.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

Expand All @@ -20,7 +20,7 @@ fn run() !void {
defer arena.deinit();

const sel = try html.Selector.compileRuntime(arena.allocator(), "a[href^=https][class~=button]");
const first = doc.queryOneCached(&sel) orelse return error.TestUnexpectedResult;
const first = doc.queryOneCached(sel) orelse return error.TestUnexpectedResult;
try std.testing.expectEqualStrings("a1", first.getAttributeValue("id").?);
}

Expand Down
14 changes: 7 additions & 7 deletions examples/debug_query_report.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

var input = "<div><a id='one' class='nav'></a><a id='two'></a></div>".*;
try doc.parse(&input, .{});

var report: html.QueryDebugReport = .{};
const node = try doc.queryOneRuntimeDebug("a[href^=https]", &report);
try std.testing.expect(node == null);
try std.testing.expect(report.visited_elements > 0);
try std.testing.expect(report.near_miss_len > 0);
try std.testing.expect(report.near_misses[0].reason.kind != .none);
const result = doc.queryOneRuntimeDebug("a[href^=https]");
try std.testing.expect(result.err == null);
try std.testing.expect(result.node == null);
try std.testing.expect(result.report.visited_elements > 0);
try std.testing.expect(result.report.near_miss_len > 0);
try std.testing.expect(result.report.near_misses[0].reason.kind != .none);
}

test "query debug report for selector mismatch" {
Expand Down
9 changes: 9 additions & 0 deletions examples/examples.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pub const basic_parse_query = @import("basic_parse_query.zig");
pub const runtime_selector = @import("runtime_selector.zig");
pub const cached_selector = @import("cached_selector.zig");
pub const navigation_and_children = @import("navigation_and_children.zig");
pub const inner_text_options = @import("inner_text_options.zig");
pub const strict_vs_fastest_parse = @import("strict_vs_fastest_parse.zig");
pub const debug_query_report = @import("debug_query_report.zig");
pub const instrumentation_hooks = @import("instrumentation_hooks.zig");
pub const query_time_decode = @import("query_time_decode.zig");
2 changes: 1 addition & 1 deletion examples/inner_text_options.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

Expand Down
6 changes: 3 additions & 3 deletions examples/instrumentation_hooks.zig
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ const Hooks = struct {
}
};

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

var hooks: Hooks = .{};
var input = "<div><span id='x'></span></div>".*;
try html.parseWithHooks(&doc, &input, .{}, &hooks);
try html.parseWithHooks(std.testing.io, &doc, &input, .{}, &hooks);
try std.testing.expectEqual(@as(usize, 1), hooks.parse_start_calls);
try std.testing.expectEqual(@as(usize, 1), hooks.parse_end_calls);

_ = try html.queryOneRuntimeWithHooks(&doc, "span#x", &hooks);
_ = try html.queryOneRuntimeWithHooks(std.testing.io, &doc, "span#x", &hooks);
try std.testing.expectEqual(@as(usize, 1), hooks.query_start_calls);
try std.testing.expectEqual(@as(usize, 1), hooks.query_end_calls);
}
Expand Down
13 changes: 5 additions & 8 deletions examples/navigation_and_children.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

Expand All @@ -18,13 +18,10 @@ fn run() !void {
try std.testing.expectEqualStrings("body", last.getAttributeValue("id").?);

var children = main.children();
var child_indexes: std.ArrayListUnmanaged(u32) = .{};
defer child_indexes.deinit(std.testing.allocator);
try children.collect(std.testing.allocator, &child_indexes);
try std.testing.expectEqual(@as(usize, 3), child_indexes.items.len);
const first_idx = child_indexes.items[0];
const first_via_index = main.doc.nodeAt(first_idx) orelse return error.TestUnexpectedResult;
try std.testing.expectEqualStrings("title", first_via_index.getAttributeValue("id").?);
const child_nodes = try children.collect(std.testing.allocator);
defer std.testing.allocator.free(child_nodes);
try std.testing.expectEqual(@as(usize, 3), child_nodes.len);
try std.testing.expectEqualStrings("title", child_nodes[0].getAttributeValue("id").?);
}

test "navigation and children iterator" {
Expand Down
2 changes: 1 addition & 1 deletion examples/query_time_decode.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

Expand Down
2 changes: 1 addition & 1 deletion examples/runtime_selector.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
var doc = Document.init(std.testing.allocator);
defer doc.deinit();

Expand Down
2 changes: 1 addition & 1 deletion examples/strict_vs_fastest_parse.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const html = @import("htmlparser");
const default_options: html.ParseOptions = .{};
const Document = default_options.GetDocument();

fn run() !void {
pub fn run() !void {
const fixture =
"<html><body>" ++
"<ul><li class='item'>A</li><li class='item'>B</li></ul>" ++
Expand Down
8 changes: 4 additions & 4 deletions src/common.zig
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub const DebugFailureKind = enum(u8) {
scope,

/// Formats this failure kind for human-readable output.
pub fn format(self: @This(), writer: *std.io.Writer) std.io.Writer.Error!void {
pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
try writer.writeAll(@tagName(self));
}
};
Expand All @@ -42,7 +42,7 @@ pub const Failure = struct {
}

/// Formats this failure record for human-readable output.
pub fn format(self: @This(), writer: *std.io.Writer) std.io.Writer.Error!void {
pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
try writer.print("Failure{{kind={s}, group_index={}, compound_index={}, predicate_index={}}}", .{
@tagName(self.kind),
self.group_index,
Expand All @@ -58,7 +58,7 @@ pub const NearMiss = struct {
reason: Failure = .{},

/// Formats this near-miss record for human-readable output.
pub fn format(self: @This(), writer: *std.io.Writer) std.io.Writer.Error!void {
pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
try writer.print("NearMiss{{node_index={}, reason=", .{self.node_index});
try self.reason.format(writer);
try writer.writeAll("}");
Expand Down Expand Up @@ -107,7 +107,7 @@ pub const QueryDebugReport = struct {
}

/// Formats summary debug report data for human-readable output.
pub fn format(self: @This(), writer: *std.io.Writer) std.io.Writer.Error!void {
pub fn format(self: @This(), writer: *std.Io.Writer) std.Io.Writer.Error!void {
try writer.print(
"QueryDebugReport{{selector_source=\"{s}\", scope_root={}, visited_elements={}, matched_index={}, matched_group={}, runtime_parse_error={}, group_count={}, near_miss_len={}}}",
.{
Expand Down
Loading
Loading