From b396d7d3719e316ddb53044e7456d6e3adaab579 Mon Sep 17 00:00:00 2001 From: Phil Scott Date: Sun, 24 May 2026 19:45:42 -0400 Subject: [PATCH 1/2] feat(yaml): replace YamlDotNet with SharpYaml Swap YamlDotNet 16.3.0 for SharpYaml 3.7.0 across front matter, data files, and the llms/redirect sidecar readers. Built-in front-matter records deserialize through a source-generated YamlSerializerContext (PenningtonYamlContext); the DocSite/BlogSite templates ship their own contexts; users opt in via AddPenningtonYamlContext. Any type no context covers falls back to reflection through an explicit ReflectionYamlTypeInfoResolver, so the fallback works regardless of SharpYaml's IsReflectionEnabledByDefault switch. FrontMatterParser enforces the anchor/alias/tag security policy with a single SharpYaml event pass (replacing SafeYamlParser/BufferedYamlParser) and reports unknown keys with line numbers. Strict mode throws from that scan after emitting warnings, so it works uniformly for source-gen and reflection types. --- CLAUDE.md | 2 +- Directory.Packages.props | 2 +- .../how-to/content-services/data-files.md | 4 +- .../BlogSiteServiceExtensions.cs | 3 + .../BlogSiteYamlContext.cs | 15 + .../Pennington.BlogSite.csproj | 2 + .../DocSiteServiceExtensions.cs | 3 + src/Pennington.DocSite/DocSiteYamlContext.cs | 16 ++ .../Pennington.DocSite.csproj | 2 + .../Content/MarkdownContentService.cs | 10 +- .../Content/RedirectContentService.cs | 11 +- src/Pennington/Data/DataFileLoader.cs | 36 ++- .../FrontMatter/BufferedYamlParser.cs | 24 -- .../FrontMatter/FrontMatterParser.cs | 259 +++++++++++------- src/Pennington/FrontMatter/PenningtonYaml.cs | 22 ++ .../FrontMatter/PenningtonYamlContext.cs | 17 ++ .../PenningtonYamlContextProvider.cs | 53 ++++ src/Pennington/FrontMatter/SafeYamlParser.cs | 48 ---- .../Infrastructure/PenningtonExtensions.cs | 20 ++ src/Pennington/Pennington.csproj | 2 +- .../FrontMatterParserDiagnosticsTests.cs | 4 +- .../FrontMatter/FrontMatterParserTests.cs | 8 +- 22 files changed, 347 insertions(+), 216 deletions(-) create mode 100644 src/Pennington.BlogSite/BlogSiteYamlContext.cs create mode 100644 src/Pennington.DocSite/DocSiteYamlContext.cs delete mode 100644 src/Pennington/FrontMatter/BufferedYamlParser.cs create mode 100644 src/Pennington/FrontMatter/PenningtonYaml.cs create mode 100644 src/Pennington/FrontMatter/PenningtonYamlContext.cs create mode 100644 src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs delete mode 100644 src/Pennington/FrontMatter/SafeYamlParser.cs diff --git a/CLAUDE.md b/CLAUDE.md index c3b68c79..619ca3e8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,7 +9,7 @@ Content engine library targeting .NET 11 / C# 15 with union types. - Run docs site: `dotnet run --project docs/Pennington.Docs` ## Project Structure -- `src/Pennington/` — Core library (Markdig, YamlDotNet, AngleSharp, TextMateSharp) +- `src/Pennington/` — Core library (Markdig, SharpYaml, AngleSharp, TextMateSharp) - `src/Pennington.UI/` — Razor component library (TableOfContentsNav, OutlineNav, Badge, Card, CodeBlock, etc.) - `src/Pennington.MonorailCss/` — MonorailCSS integration (utility-first CSS generation) - `src/Pennington.DocSite/` — Documentation site template (layout, pages, content resolver) diff --git a/Directory.Packages.props b/Directory.Packages.props index 18a7ea17..9c07cb77 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -23,6 +23,7 @@ + @@ -37,7 +38,6 @@ - diff --git a/docs/Pennington.Docs/Content/how-to/content-services/data-files.md b/docs/Pennington.Docs/Content/how-to/content-services/data-files.md index 48743164..4678f38b 100644 --- a/docs/Pennington.Docs/Content/how-to/content-services/data-files.md +++ b/docs/Pennington.Docs/Content/how-to/content-services/data-files.md @@ -11,7 +11,7 @@ When a piece of site content is structured data — sponsors, navigation, schedu ## Register the file -`AddDataFile(name, path)` deserializes `path` into `T` on first access and tracks the file for changes. Format is chosen from the extension: `.yml` and `.yaml` go through YamlDotNet, `.json` through `System.Text.Json`. Both deserializers use camelCase property naming with case-insensitive matching, mirroring how front matter is parsed. +`AddDataFile(name, path)` deserializes `path` into `T` on first access and tracks the file for changes. Format is chosen from the extension: `.yml` and `.yaml` go through SharpYaml, `.json` through `System.Text.Json`. Both deserializers use camelCase property naming with case-insensitive matching, mirroring how front matter is parsed. ```csharp builder.Services.AddDataFile>("sponsors", "data/sponsors.yml"); @@ -21,7 +21,7 @@ builder.Services.AddDataFile>("nav", "data/nav.json"); The lookup key (`"sponsors"`) is case-insensitive and must be unique across registrations. Paths are resolved against the current working directory if relative. -The target type needs a parameterless constructor — use a record with init-only properties so YamlDotNet can populate it: +The target type needs a parameterless constructor — use a record with init-only properties so SharpYaml can populate it: ```csharp public record Sponsor diff --git a/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs b/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs index 1e8f018d..dc7c7a39 100644 --- a/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs +++ b/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs @@ -49,6 +49,9 @@ public static IServiceCollection AddBlogSite(this IServiceCollection services, : [blogSiteAssembly, .. routingAssemblies]; }); + // Source-generated YAML metadata for BlogSite's front-matter type (reflection fallback otherwise). + services.AddPenningtonYamlContext(BlogSiteYamlContext.Default); + // Make Pennington.UI components available inline in markdown via Mdazor. // is intentionally excluded: markdown authors should use fenced // code blocks, not a component round-trip through Mdazor+Markdig. diff --git a/src/Pennington.BlogSite/BlogSiteYamlContext.cs b/src/Pennington.BlogSite/BlogSiteYamlContext.cs new file mode 100644 index 00000000..92d855d1 --- /dev/null +++ b/src/Pennington.BlogSite/BlogSiteYamlContext.cs @@ -0,0 +1,15 @@ +namespace Pennington.BlogSite; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for BlogSite's front-matter record, registered by +/// so it deserializes without reflection +/// (NativeAOT/trim-friendly). +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(BlogSiteFrontMatter))] +internal partial class BlogSiteYamlContext : YamlSerializerContext +{ +} diff --git a/src/Pennington.BlogSite/Pennington.BlogSite.csproj b/src/Pennington.BlogSite/Pennington.BlogSite.csproj index 18027761..e93fcc1f 100644 --- a/src/Pennington.BlogSite/Pennington.BlogSite.csproj +++ b/src/Pennington.BlogSite/Pennington.BlogSite.csproj @@ -12,6 +12,8 @@ + + diff --git a/src/Pennington.DocSite/DocSiteServiceExtensions.cs b/src/Pennington.DocSite/DocSiteServiceExtensions.cs index c2ee7f2c..d4a307be 100644 --- a/src/Pennington.DocSite/DocSiteServiceExtensions.cs +++ b/src/Pennington.DocSite/DocSiteServiceExtensions.cs @@ -100,6 +100,9 @@ public static IServiceCollection AddDocSite(this IServiceCollection services, options.ConfigurePennington?.Invoke(penn); }); + // Source-generated YAML metadata for DocSite's front-matter types (reflection fallback otherwise). + services.AddPenningtonYamlContext(DocSiteYamlContext.Default); + // Make Pennington.UI components available inline in markdown via Mdazor. // is intentionally excluded: markdown authors should use fenced // code blocks, not a component round-trip through Mdazor+Markdig. diff --git a/src/Pennington.DocSite/DocSiteYamlContext.cs b/src/Pennington.DocSite/DocSiteYamlContext.cs new file mode 100644 index 00000000..db5f2345 --- /dev/null +++ b/src/Pennington.DocSite/DocSiteYamlContext.cs @@ -0,0 +1,16 @@ +namespace Pennington.DocSite; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for DocSite's front-matter records, registered by +/// so they deserialize without reflection +/// (NativeAOT/trim-friendly). +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(DocSiteFrontMatter))] +[YamlSerializable(typeof(BlogPostFrontMatter))] +internal partial class DocSiteYamlContext : YamlSerializerContext +{ +} diff --git a/src/Pennington.DocSite/Pennington.DocSite.csproj b/src/Pennington.DocSite/Pennington.DocSite.csproj index e61056b3..7ae26d75 100644 --- a/src/Pennington.DocSite/Pennington.DocSite.csproj +++ b/src/Pennington.DocSite/Pennington.DocSite.csproj @@ -12,6 +12,8 @@ + + diff --git a/src/Pennington/Content/MarkdownContentService.cs b/src/Pennington/Content/MarkdownContentService.cs index be659f2c..0f42f5d3 100644 --- a/src/Pennington/Content/MarkdownContentService.cs +++ b/src/Pennington/Content/MarkdownContentService.cs @@ -7,8 +7,7 @@ namespace Pennington.Content; using LlmsTxt; using Pipeline; using Routing; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using SharpYaml; /// /// Discovers and provides markdown content from a directory. @@ -573,11 +572,6 @@ private async Task> LoadSubtreesAsync() } var basePrefix = NormalizeBasePageUrl(_options.BasePageUrl.Value); - var deserializer = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .IgnoreUnmatchedProperties() - .Build(); - var builder = ImmutableList.CreateBuilder(); foreach (var file in _fileSystem.Directory.EnumerateFiles( @@ -596,7 +590,7 @@ private async Task> LoadSubtreesAsync() LlmsSubtreeSidecar? sidecar; try { - sidecar = deserializer.Deserialize(content); + sidecar = YamlSerializer.Deserialize(content, PenningtonYaml.ReflectionOptions); } catch { diff --git a/src/Pennington/Content/RedirectContentService.cs b/src/Pennington/Content/RedirectContentService.cs index 92a1d501..7ee59194 100644 --- a/src/Pennington/Content/RedirectContentService.cs +++ b/src/Pennington/Content/RedirectContentService.cs @@ -7,9 +7,7 @@ namespace Pennington.Content; using Microsoft.Extensions.DependencyInjection; using Pipeline; using Routing; -using YamlDotNet.Core; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using SharpYaml; /// /// Holds the unified redirect map used by @@ -164,12 +162,7 @@ private async Task> LoadYamlAsync() try { - var deserializer = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .IgnoreUnmatchedProperties() - .Build(); - - var config = deserializer.Deserialize(yaml); + var config = YamlSerializer.Deserialize(yaml, PenningtonYaml.ReflectionOptions); if (config?.Redirects is null || config.Redirects.Count == 0) { return ImmutableDictionary.Empty; diff --git a/src/Pennington/Data/DataFileLoader.cs b/src/Pennington/Data/DataFileLoader.cs index 09711f9b..4b43e913 100644 --- a/src/Pennington/Data/DataFileLoader.cs +++ b/src/Pennington/Data/DataFileLoader.cs @@ -2,24 +2,19 @@ namespace Pennington.Data; using System.IO.Abstractions; using System.Text.Json; -using YamlDotNet.RepresentationModel; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using FrontMatter; +using SharpYaml; +using SharpYaml.Events; /// /// Deserializes a data file's bytes into a typed value. Format is chosen from the -/// extension (.yml / .yaml use YamlDotNet, .json uses System.Text.Json), +/// extension (.yml / .yaml use SharpYaml, .json uses System.Text.Json), /// both configured with camelCase naming and case-insensitive property matching to mirror -/// 's behavior. +/// 's behavior. Arbitrary data types deserialize +/// via reflection (no source-gen context covers them). /// public static class DataFileLoader { - private static readonly IDeserializer YamlDeserializer = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .WithCaseInsensitivePropertyMatching() - .IgnoreUnmatchedProperties() - .Build(); - private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, @@ -93,9 +88,20 @@ private static bool RootIsYamlSequence(string content, string path) { try { - var stream = new YamlStream(); - stream.Load(new StringReader(content)); - return stream.Documents.Count > 0 && stream.Documents[0].RootNode is YamlSequenceNode; + // The first node-start event after the stream/document preamble is the root node. + var parser = Parser.CreateParser(new StringReader(content)); + while (parser.MoveNext()) + { + switch (parser.Current) + { + case SequenceStart: + return true; + case MappingStart or Scalar: + return false; + } + } + + return false; } catch (Exception ex) { @@ -129,7 +135,7 @@ private static T DeserializeYaml(string content, string path) { try { - return YamlDeserializer.Deserialize(content) + return YamlSerializer.Deserialize(content, PenningtonYaml.ReflectionOptions) ?? throw new InvalidDataException($"YAML in {path} deserialized to null"); } catch (Exception ex) when (ex is not InvalidDataException) diff --git a/src/Pennington/FrontMatter/BufferedYamlParser.cs b/src/Pennington/FrontMatter/BufferedYamlParser.cs deleted file mode 100644 index 1bab8285..00000000 --- a/src/Pennington/FrontMatter/BufferedYamlParser.cs +++ /dev/null @@ -1,24 +0,0 @@ -namespace Pennington.FrontMatter; - -using YamlDotNet.Core; -using YamlDotNet.Core.Events; - -/// -/// Replays a buffered list of s as an , -/// so a single character scan of the YAML can drive both the unknown-key diagnostic -/// pass and the deserialization pass without tokenizing the source twice. -/// -internal sealed class BufferedYamlParser(IReadOnlyList events) : IParser -{ - private int _index = -1; - - /// - public ParsingEvent? Current => _index >= 0 && _index < events.Count ? events[_index] : null; - - /// - public bool MoveNext() - { - _index++; - return _index < events.Count; - } -} diff --git a/src/Pennington/FrontMatter/FrontMatterParser.cs b/src/Pennington/FrontMatter/FrontMatterParser.cs index 06d8b301..214b6e88 100644 --- a/src/Pennington/FrontMatter/FrontMatterParser.cs +++ b/src/Pennington/FrontMatter/FrontMatterParser.cs @@ -3,14 +3,12 @@ namespace Pennington.FrontMatter; using System.Collections.Concurrent; using System.Collections.Frozen; using System.Reflection; +using System.Text.Json; using Diagnostics; using Microsoft.AspNetCore.Http; using Microsoft.Extensions.DependencyInjection; -using YamlDotNet.Core; -using YamlDotNet.Core.Events; -using YamlDotNet.RepresentationModel; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using SharpYaml; +using SharpYaml.Events; /// /// Parses YAML front matter from markdown content. @@ -19,33 +17,54 @@ public sealed class FrontMatterParser { private const string DiagnosticSource = "FrontMatterParser"; + // Standard YAML tags permitted in front matter. Any other (custom/local) tag is rejected, + // along with anchors and aliases, to block billion-laughs expansion and arbitrary type + // instantiation. SharpYaml's serializer does not reject these on its own, so a single + // event pass enforces the policy before deserialization. + private static readonly FrozenSet AllowedTags = new[] + { + "tag:yaml.org,2002:str", + "tag:yaml.org,2002:int", + "tag:yaml.org,2002:float", + "tag:yaml.org,2002:bool", + "tag:yaml.org,2002:null", + "tag:yaml.org,2002:seq", + "tag:yaml.org,2002:map", + "tag:yaml.org,2002:timestamp", + }.ToFrozenSet(StringComparer.Ordinal); + private readonly FrontMatterParserOptions _options; private readonly IHttpContextAccessor _httpContextAccessor; - private readonly IDeserializer _lenientDeserializer; - private readonly IDeserializer _strictDeserializer; + private readonly PenningtonYamlContextProvider _yaml; private readonly ConcurrentDictionary> _knownKeyCache = new(); /// - /// Initializes the parser with a camelCase YAML deserializer that matches keys case-insensitively. - /// In lenient mode (the default outside build) unknown keys are dropped silently after a warning is emitted; - /// in strict mode unknown keys also throw a . + /// Initializes the parser. Built-in front-matter types deserialize through the source-generated + /// ; other types fall back to reflection. Keys are camelCase + /// matched case-insensitively. In lenient mode (the default outside build) unknown keys are + /// dropped after a warning; in strict mode they additionally throw a . /// /// Parser options controlling strict-mode behavior. /// Used to resolve the request-scoped . - public FrontMatterParser(FrontMatterParserOptions options, IHttpContextAccessor httpContextAccessor) + /// Supplies source-generated contexts with reflection fallback for deserialization. + public FrontMatterParser( + FrontMatterParserOptions options, + IHttpContextAccessor httpContextAccessor, + PenningtonYamlContextProvider yaml) { _options = options; _httpContextAccessor = httpContextAccessor; - _lenientDeserializer = BuildDeserializer(strict: false); - _strictDeserializer = BuildDeserializer(strict: true); + _yaml = yaml; } /// /// Convenience constructor for direct instantiation (tests, scripts) that defaults to - /// lenient mode and emits no diagnostics. Production hosts should resolve the parser - /// from DI so the configured apply. + /// lenient mode, emits no diagnostics, and uses only the built-in serializer context. + /// Production hosts should resolve the parser from DI so the configured + /// and any registered contexts apply. /// - public FrontMatterParser() : this(new FrontMatterParserOptions(), NullHttpContextAccessor.Instance) { } + public FrontMatterParser() + : this(new FrontMatterParserOptions(), NullHttpContextAccessor.Instance, PenningtonYamlContextProvider.Default) { } private sealed class NullHttpContextAccessor : IHttpContextAccessor { @@ -53,19 +72,6 @@ private sealed class NullHttpContextAccessor : IHttpContextAccessor public HttpContext? HttpContext { get => null; set { } } } - private static IDeserializer BuildDeserializer(bool strict) - { - var builder = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .WithCaseInsensitivePropertyMatching(); - if (!strict) - { - builder = builder.IgnoreUnmatchedProperties(); - } - - return builder.Build(); - } - /// /// Parse front matter and return the metadata + remaining markdown body. /// Returns null metadata if no front matter block is present. @@ -106,120 +112,171 @@ public T DeserializeYaml(string yaml, string? sourcePath = null, DiagnosticCo return DeserializeWithScan(yaml, lineOffset: 0, sourcePath, diagnostics) ?? new T(); } - private T? SafeDeserialize(string yaml) - { - var parser = new SafeYamlParser(new Parser(new StringReader(yaml))); - var deserializer = _options.StrictUnknownKeys ? _strictDeserializer : _lenientDeserializer; - return deserializer.Deserialize(parser); - } - - // When diagnostics are inactive there is nothing to scan, so deserialize directly - // (one character scan). When they are active, tokenize once into a buffer and replay - // it for both the unknown-key scan and the deserialize, instead of scanning twice. + // A single event pass enforces the security policy (always) and, when diagnostics are active + // or strict mode is on, collects the top-level keys so unknown ones can be reported. The + // deserialize itself reads the text again (SharpYaml has no parse-from-events overload); for + // tiny front-matter blocks the second parse is negligible. private T? DeserializeWithScan(string yaml, int lineOffset, string? sourcePath, DiagnosticContext? diagnostics) where T : IFrontMatter, new() { - if (diagnostics is null || string.IsNullOrWhiteSpace(yaml)) + if (string.IsNullOrWhiteSpace(yaml)) { - return SafeDeserialize(yaml); + return default; } - List events; - try - { - events = BufferEvents(yaml); - } - catch (YamlException) + var collectKeys = diagnostics is not null || _options.StrictUnknownKeys; + var keys = ScanYaml(yaml, lineOffset, collectKeys, out var malformed); + + if (!malformed && keys is not null) { - // Malformed or security-rejected YAML — fall back to a direct deserialize so - // it surfaces the canonical error (matching the prior scan-then-deserialize flow). - return SafeDeserialize(yaml); + ReportUnknownKeys(keys, sourcePath, diagnostics); } - ScanUnknownKeys(events, lineOffset, sourcePath, diagnostics); - - var deserializer = _options.StrictUnknownKeys ? _strictDeserializer : _lenientDeserializer; - return deserializer.Deserialize(new BufferedYamlParser(events)); + return _yaml.Deserialize(yaml); } - // Drains a single SafeYamlParser pass into a replayable event buffer. The security - // checks (anchors/aliases/tags) run here, once, and a YamlException propagates to the caller. - private static List BufferEvents(string yaml) + private void ReportUnknownKeys(List<(string Name, int Line)> keys, string? sourcePath, DiagnosticContext? diagnostics) + where T : IFrontMatter, new() { - var events = new List(); - var source = new SafeYamlParser(new Parser(new StringReader(yaml))); - while (source.MoveNext()) + var known = _knownKeyCache.GetOrAdd(typeof(T), BuildKnownKeySet); + var location = sourcePath ?? ""; + var unknownFound = false; + + foreach (var (name, line) in keys) { - events.Add(source.Current!); + if (known.Contains(name)) + { + continue; + } + + unknownFound = true; + diagnostics?.AddWarning($"Unknown front-matter key '{name}' in {location}:{line}", DiagnosticSource); } - return events; + // Strict mode throws after the warnings are emitted, matching the prior scan-then-throw flow. + if (unknownFound && _options.StrictUnknownKeys) + { + throw new YamlException($"Unknown front-matter key(s) in {location}."); + } } - private DiagnosticContext? ResolveAmbientDiagnostics() - => _httpContextAccessor.HttpContext?.RequestServices.GetService(); - - private void ScanUnknownKeys(IReadOnlyList events, int lineOffset, string? sourcePath, DiagnosticContext diagnostics) + // One linear pass over the YAML events. Enforces the security policy on every event and, + // when requested, records the root mapping's keys with 1-based line numbers (SharpYaml marks + // are 0-based). A parser failure means malformed YAML — bail and let the deserialize step + // surface the canonical error; a security violation throws immediately. + private List<(string Name, int Line)>? ScanYaml(string yaml, int lineOffset, bool collectKeys, out bool malformed) { - YamlStream stream; - try - { - stream = new YamlStream(); - stream.Load(new BufferedYamlParser(events)); - } - catch (YamlException) - { - // Malformed YAML — let the deserialize step surface the canonical error - // rather than emitting a partial unknown-key list here. - return; - } + malformed = false; + var keys = collectKeys ? new List<(string, int)>() : null; + var parser = Parser.CreateParser(new StringReader(yaml)); - if (stream.Documents.Count == 0 || stream.Documents[0].RootNode is not YamlMappingNode mapping) + var depth = 0; + var rootIsMapping = false; + var rootExpectKey = false; + + while (true) { - return; - } + bool moved; + try + { + moved = parser.MoveNext(); + } + catch (YamlException) + { + malformed = true; + return null; + } - var known = _knownKeyCache.GetOrAdd(typeof(T), BuildKnownKeySet); - var location = sourcePath ?? ""; + if (!moved) + { + break; + } - foreach (var entry in mapping.Children) - { - if (entry.Key is not YamlScalarNode keyNode || keyNode.Value is null) + var current = parser.Current!; + EnforceSecurity(current); + + // Root-mapping children alternate key/value. A key is recorded only when we are + // directly inside the root mapping (depth == 1) and expecting a key. + if (collectKeys && rootIsMapping && depth == 1 && current is Scalar or MappingStart or SequenceStart) { - continue; + if (rootExpectKey) + { + if (current is Scalar { Value: { } value }) + { + keys!.Add((value, current.Start.Line + 1 + lineOffset)); + } + + rootExpectKey = false; + } + else + { + rootExpectKey = true; + } } - var keyName = keyNode.Value; - if (known.Contains(keyName)) + switch (current) { - continue; + case MappingStart: + if (depth == 0) + { + rootIsMapping = true; + rootExpectKey = true; + } + + depth++; + break; + case SequenceStart: + depth++; + break; + case MappingEnd: + case SequenceEnd: + depth--; + break; } + } + + return keys; + } - var line = keyNode.Start.Line + lineOffset; - diagnostics.AddWarning( - $"Unknown front-matter key '{keyName}' in {location}:{line}", - DiagnosticSource); + // Rejects YAML anchors, aliases, and non-standard type tags — preventing billion-laughs + // expansion and arbitrary type instantiation in front matter. + private static void EnforceSecurity(ParsingEvent current) + { + switch (current) + { + case AnchorAlias alias: + throw new YamlException(alias.Start, alias.End, + "YAML aliases are not permitted in front matter."); + + case NodeEvent { Anchor: { Length: > 0 } } node: + throw new YamlException(node.Start, node.End, + "YAML anchors are not permitted in front matter."); + + case NodeEvent node when node.Tag is { Length: > 0 } tag && !AllowedTags.Contains(tag): + throw new YamlException(node.Start, node.End, + $"YAML type tags are not permitted in front matter. Tag: {tag}"); } } + private DiagnosticContext? ResolveAmbientDiagnostics() + => _httpContextAccessor.HttpContext?.RequestServices.GetService(); + private static FrozenSet BuildKnownKeySet(Type t) { - // Mirror what WithNamingConvention(CamelCase) + WithCaseInsensitivePropertyMatching - // accept on the deserializer. Include declared and interface-default members from - // both T and any IFrontMatter capability mixins T implements. - var convention = CamelCaseNamingConvention.Instance; + // Mirror PropertyNamingPolicy.CamelCase + case-insensitive matching on the deserializer. + // Include declared and interface-default members from both T and any IFrontMatter mixins. var seen = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var prop in t.GetProperties(BindingFlags.Public | BindingFlags.Instance)) { - seen.Add(convention.Apply(prop.Name)); + seen.Add(JsonNamingPolicy.CamelCase.ConvertName(prop.Name)); } foreach (var iface in t.GetInterfaces()) { foreach (var prop in iface.GetProperties(BindingFlags.Public | BindingFlags.Instance)) { - seen.Add(convention.Apply(prop.Name)); + seen.Add(JsonNamingPolicy.CamelCase.ConvertName(prop.Name)); } } @@ -268,4 +325,4 @@ private static bool TryExtractYaml(string content, out string yaml, out string b /// /// Deserialized front matter, or null when the content had no front matter block. /// Markdown body with the front matter block stripped. -public record FrontMatterResult(T? Metadata, string Body) where T : IFrontMatter; \ No newline at end of file +public record FrontMatterResult(T? Metadata, string Body) where T : IFrontMatter; diff --git a/src/Pennington/FrontMatter/PenningtonYaml.cs b/src/Pennington/FrontMatter/PenningtonYaml.cs new file mode 100644 index 00000000..fda2b52b --- /dev/null +++ b/src/Pennington/FrontMatter/PenningtonYaml.cs @@ -0,0 +1,22 @@ +namespace Pennington.FrontMatter; + +using System.Text.Json; +using SharpYaml; + +/// +/// Shared SharpYaml configuration. mirrors the front-matter +/// conventions (camelCase keys, case-insensitive matching) and pins an explicit +/// so reflection-based deserialization works +/// regardless of SharpYaml's IsReflectionEnabledByDefault switch — this is the +/// reflection fallback used for any type no source-generated context covers. +/// +internal static class PenningtonYaml +{ + /// Reflection-backed options used for types not registered with a serializer context. + public static YamlSerializerOptions ReflectionOptions { get; } = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + TypeInfoResolver = ReflectionYamlTypeInfoResolver.Default, + }; +} diff --git a/src/Pennington/FrontMatter/PenningtonYamlContext.cs b/src/Pennington/FrontMatter/PenningtonYamlContext.cs new file mode 100644 index 00000000..c1b18cef --- /dev/null +++ b/src/Pennington/FrontMatter/PenningtonYamlContext.cs @@ -0,0 +1,17 @@ +namespace Pennington.FrontMatter; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for Pennington's built-in front-matter records, so they +/// deserialize without reflection (NativeAOT/trim-friendly). Registered automatically by +/// . Types not covered by any +/// registered context fall back to reflection — see . +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(DocFrontMatter))] +[YamlSerializable(typeof(BlogFrontMatter))] +internal partial class PenningtonYamlContext : YamlSerializerContext +{ +} diff --git a/src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs b/src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs new file mode 100644 index 00000000..c520c1b7 --- /dev/null +++ b/src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs @@ -0,0 +1,53 @@ +namespace Pennington.FrontMatter; + +using System.Collections.Concurrent; +using SharpYaml; +using SharpYaml.Serialization; + +/// +/// Routes a type to the registered that knows it — the +/// built-in , a satellite package context, or one a user +/// added via — and +/// falls back to reflection for everything else. A source-generated context only serves the +/// types it was generated for and rejects foreign options, so each type is dispatched to its +/// own context rather than combined into a single resolver. +/// +public sealed class PenningtonYamlContextProvider +{ + /// A provider seeded with only the built-in context, for non-DI use (tests, scripts). + public static PenningtonYamlContextProvider Default { get; } = new([PenningtonYamlContext.Default]); + + private readonly YamlSerializerContext[] _contexts; + private readonly ConcurrentDictionary _byType = new(); + + /// Initializes the provider with the serializer contexts registered in DI. + /// Registered contexts; the built-in is always present. + public PenningtonYamlContextProvider(IEnumerable contexts) + => _contexts = contexts as YamlSerializerContext[] ?? [.. contexts]; + + /// + /// Deserializes into using the source-generated + /// context that covers , or reflection when none does. + /// + /// Raw YAML text. + public T? Deserialize(string yaml) + { + var context = _byType.GetOrAdd(typeof(T), ResolveContext); + return context is not null + ? YamlSerializer.Deserialize(yaml, context) + : YamlSerializer.Deserialize(yaml, PenningtonYaml.ReflectionOptions); + } + + private YamlSerializerContext? ResolveContext(Type type) + { + foreach (var context in _contexts) + { + if (context.GetTypeInfo(type, context.Options) is not null) + { + return context; + } + } + + return null; + } +} diff --git a/src/Pennington/FrontMatter/SafeYamlParser.cs b/src/Pennington/FrontMatter/SafeYamlParser.cs deleted file mode 100644 index 6a7944da..00000000 --- a/src/Pennington/FrontMatter/SafeYamlParser.cs +++ /dev/null @@ -1,48 +0,0 @@ -namespace Pennington.FrontMatter; - -using YamlDotNet.Core; -using YamlDotNet.Core.Events; - -/// -/// Wraps an to reject YAML anchors, aliases, and non-standard type tags. -/// Prevents billion-laughs expansion attacks and arbitrary type instantiation. -/// -internal sealed class SafeYamlParser(IParser inner) : IParser -{ - private static readonly HashSet AllowedTags = - [ - "tag:yaml.org,2002:str", - "tag:yaml.org,2002:int", - "tag:yaml.org,2002:float", - "tag:yaml.org,2002:bool", - "tag:yaml.org,2002:null", - "tag:yaml.org,2002:seq", - "tag:yaml.org,2002:map", - "tag:yaml.org,2002:timestamp", - ]; - - public ParsingEvent? Current => inner.Current; - - public bool MoveNext() - { - var result = inner.MoveNext(); - - switch (inner.Current) - { - case AnchorAlias alias: - throw new YamlException(alias.Start, alias.End, - "YAML aliases are not permitted in front matter."); - - case NodeEvent { Anchor.IsEmpty: false } node: - throw new YamlException(node.Start, node.End, - "YAML anchors are not permitted in front matter."); - - case NodeEvent { Tag: { IsNonSpecific: false, IsEmpty: false } tag } node - when !AllowedTags.Contains(tag.Value): - throw new YamlException(node.Start, node.End, - $"YAML type tags are not permitted in front matter. Tag: {tag.Value}"); - } - - return result; - } -} \ No newline at end of file diff --git a/src/Pennington/Infrastructure/PenningtonExtensions.cs b/src/Pennington/Infrastructure/PenningtonExtensions.cs index a280407e..04f8d0fb 100644 --- a/src/Pennington/Infrastructure/PenningtonExtensions.cs +++ b/src/Pennington/Infrastructure/PenningtonExtensions.cs @@ -32,6 +32,7 @@ namespace Pennington.Infrastructure; using Pipeline; using Routing; using Search; +using SharpYaml.Serialization; using Testably.Abstractions; /// @@ -112,6 +113,13 @@ public static IServiceCollection AddPennington(this IServiceCollection services, } services.AddSingleton(options.FrontMatter); + + // YAML deserialization: register the built-in source-generated context and the provider + // that dispatches each type to its context (or reflection). Satellite packages and users + // add their own contexts via AddPenningtonYamlContext. + services.AddPenningtonYamlContext(PenningtonYamlContext.Default); + services.AddSingleton(); + services.AddSingleton(); services.AddFileWatched(); @@ -421,6 +429,18 @@ object Resolve(IServiceProvider sp) return services; } + /// + /// Register a source-generated so the types it covers + /// deserialize without reflection (NativeAOT/trim-friendly). Types not covered by any + /// registered context fall back to reflection. Satellite templates call this for their own + /// front-matter records; end users call it for theirs. + /// + public static IServiceCollection AddPenningtonYamlContext(this IServiceCollection services, YamlSerializerContext context) + { + services.AddSingleton(context); + return services; + } + /// /// Builds the assembly set scanned for routable @page components, /// always including the entry assembly (deduped) so a bare host's pages are diff --git a/src/Pennington/Pennington.csproj b/src/Pennington/Pennington.csproj index 58cdaa8a..04d2705a 100644 --- a/src/Pennington/Pennington.csproj +++ b/src/Pennington/Pennington.csproj @@ -18,7 +18,7 @@ - + diff --git a/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs b/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs index 5acf6ac5..3d1a0e82 100644 --- a/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs +++ b/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs @@ -1,5 +1,5 @@ using Pennington.FrontMatter; -using YamlDotNet.Core; +using SharpYaml; namespace Pennington.Tests.FrontMatter; @@ -135,7 +135,7 @@ public void Parse_WithoutSourcePath_FallsBackToUnknownLabel() } private static FrontMatterParser CreateParser(bool strict) - => new(new FrontMatterParserOptions { StrictUnknownKeys = strict }, new NoopHttpContextAccessor()); + => new(new FrontMatterParserOptions { StrictUnknownKeys = strict }, new NoopHttpContextAccessor(), PenningtonYamlContextProvider.Default); private sealed class NoopHttpContextAccessor : Microsoft.AspNetCore.Http.IHttpContextAccessor { diff --git a/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs b/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs index ee246e62..9141d62d 100644 --- a/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs +++ b/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs @@ -186,7 +186,7 @@ public void Parse_YamlAnchor_ThrowsToPreventBillionLaughs() { var content = "---\ntitle: &bomb payload\n---\nContent."; - Should.Throw( + Should.Throw( () => _parser.Parse(content)); } @@ -195,7 +195,7 @@ public void DeserializeYaml_YamlAnchor_ThrowsToPreventBillionLaughs() { var yaml = "title: &bomb payload"; - Should.Throw( + Should.Throw( () => _parser.DeserializeYaml(yaml)); } @@ -204,7 +204,7 @@ public void Parse_ArbitraryTypeTag_ThrowsToPreventTypeInstantiation() { var content = "---\ntitle: ! malicious\n---\nContent."; - Should.Throw( + Should.Throw( () => _parser.Parse(content)); } @@ -213,7 +213,7 @@ public void Parse_DotNetTypeTag_ThrowsToPreventTypeInstantiation() { var content = "---\ntitle: ! evil\n---\nContent."; - Should.Throw( + Should.Throw( () => _parser.Parse(content)); } From 8db2c4f913ef1bbcc4106badd040172b0da19f8c Mon Sep 17 00:00:00 2001 From: Phil Scott Date: Sun, 24 May 2026 19:45:47 -0400 Subject: [PATCH 2/2] docs(examples): demonstrate a custom front-matter YAML context MultipleSourcesExample registers BlogFrontMatterYamlContext via AddPenningtonYamlContext, exercising the user opt-in source-gen path alongside the built-in context (DocFrontMatter) and reflection fallback in one host. --- .../BlogFrontMatterYamlContext.cs | 15 +++++++++++++++ .../MultipleSourcesExample.csproj | 2 ++ examples/MultipleSourcesExample/Program.cs | 4 ++++ examples/MultipleSourcesExample/README.md | 1 + 4 files changed, 22 insertions(+) create mode 100644 examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs diff --git a/examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs b/examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs new file mode 100644 index 00000000..8c6a09c6 --- /dev/null +++ b/examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs @@ -0,0 +1,15 @@ +namespace MultipleSourcesExample; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for this host's custom , +/// registered via AddPenningtonYamlContext so it deserializes without reflection. Types +/// no registered context covers (here, anything but ) fall back to reflection. +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(BlogFrontMatter))] +internal partial class BlogFrontMatterYamlContext : YamlSerializerContext +{ +} diff --git a/examples/MultipleSourcesExample/MultipleSourcesExample.csproj b/examples/MultipleSourcesExample/MultipleSourcesExample.csproj index 3884722b..38b0e68d 100644 --- a/examples/MultipleSourcesExample/MultipleSourcesExample.csproj +++ b/examples/MultipleSourcesExample/MultipleSourcesExample.csproj @@ -7,6 +7,8 @@ MultipleSourcesExample + + diff --git a/examples/MultipleSourcesExample/Program.cs b/examples/MultipleSourcesExample/Program.cs index b3ec3888..e6e1aa14 100644 --- a/examples/MultipleSourcesExample/Program.cs +++ b/examples/MultipleSourcesExample/Program.cs @@ -32,6 +32,10 @@ penn.AddMarkdownContent(ServiceConfiguration.RegisterBlogSource); }); +// Opt the custom BlogFrontMatter into source-generated YAML metadata. DocFrontMatter is +// already covered by Pennington's built-in context; types with no context use reflection. +builder.Services.AddPenningtonYamlContext(BlogFrontMatterYamlContext.Default); + var app = builder.Build(); app.UsePennington(); diff --git a/examples/MultipleSourcesExample/README.md b/examples/MultipleSourcesExample/README.md index d1ee51ba..365f7a58 100644 --- a/examples/MultipleSourcesExample/README.md +++ b/examples/MultipleSourcesExample/README.md @@ -7,6 +7,7 @@ Bare `AddPennington` host with two `AddMarkdownContent` calls pointing at dif - Multiple markdown sources in one host - Per-source `ContentPath` / `BasePageUrl` / `ExcludePaths` - Per-source front-matter types +- Opt a custom front-matter type into source-generated YAML metadata via `AddPenningtonYamlContext` (`BlogFrontMatterYamlContext`); reflection remains the fallback for unregistered types - Overlap demonstration toggled by the `MULTIPLE_SOURCES_OVERLAP=1` env var ## Referenced from