diff --git a/CLAUDE.md b/CLAUDE.md index c3b68c79..619ca3e8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,7 +9,7 @@ Content engine library targeting .NET 11 / C# 15 with union types. - Run docs site: `dotnet run --project docs/Pennington.Docs` ## Project Structure -- `src/Pennington/` — Core library (Markdig, YamlDotNet, AngleSharp, TextMateSharp) +- `src/Pennington/` — Core library (Markdig, SharpYaml, AngleSharp, TextMateSharp) - `src/Pennington.UI/` — Razor component library (TableOfContentsNav, OutlineNav, Badge, Card, CodeBlock, etc.) - `src/Pennington.MonorailCss/` — MonorailCSS integration (utility-first CSS generation) - `src/Pennington.DocSite/` — Documentation site template (layout, pages, content resolver) diff --git a/Directory.Packages.props b/Directory.Packages.props index 18a7ea17..9c07cb77 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -23,6 +23,7 @@ + @@ -37,7 +38,6 @@ - diff --git a/docs/Pennington.Docs/Content/how-to/content-services/data-files.md b/docs/Pennington.Docs/Content/how-to/content-services/data-files.md index 48743164..4678f38b 100644 --- a/docs/Pennington.Docs/Content/how-to/content-services/data-files.md +++ b/docs/Pennington.Docs/Content/how-to/content-services/data-files.md @@ -11,7 +11,7 @@ When a piece of site content is structured data — sponsors, navigation, schedu ## Register the file -`AddDataFile(name, path)` deserializes `path` into `T` on first access and tracks the file for changes. Format is chosen from the extension: `.yml` and `.yaml` go through YamlDotNet, `.json` through `System.Text.Json`. Both deserializers use camelCase property naming with case-insensitive matching, mirroring how front matter is parsed. +`AddDataFile(name, path)` deserializes `path` into `T` on first access and tracks the file for changes. Format is chosen from the extension: `.yml` and `.yaml` go through SharpYaml, `.json` through `System.Text.Json`. Both deserializers use camelCase property naming with case-insensitive matching, mirroring how front matter is parsed. ```csharp builder.Services.AddDataFile>("sponsors", "data/sponsors.yml"); @@ -21,7 +21,7 @@ builder.Services.AddDataFile>("nav", "data/nav.json"); The lookup key (`"sponsors"`) is case-insensitive and must be unique across registrations. Paths are resolved against the current working directory if relative. -The target type needs a parameterless constructor — use a record with init-only properties so YamlDotNet can populate it: +The target type needs a parameterless constructor — use a record with init-only properties so SharpYaml can populate it: ```csharp public record Sponsor diff --git a/examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs b/examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs new file mode 100644 index 00000000..8c6a09c6 --- /dev/null +++ b/examples/MultipleSourcesExample/BlogFrontMatterYamlContext.cs @@ -0,0 +1,15 @@ +namespace MultipleSourcesExample; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for this host's custom , +/// registered via AddPenningtonYamlContext so it deserializes without reflection. Types +/// no registered context covers (here, anything but ) fall back to reflection. +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(BlogFrontMatter))] +internal partial class BlogFrontMatterYamlContext : YamlSerializerContext +{ +} diff --git a/examples/MultipleSourcesExample/MultipleSourcesExample.csproj b/examples/MultipleSourcesExample/MultipleSourcesExample.csproj index 3884722b..38b0e68d 100644 --- a/examples/MultipleSourcesExample/MultipleSourcesExample.csproj +++ b/examples/MultipleSourcesExample/MultipleSourcesExample.csproj @@ -7,6 +7,8 @@ MultipleSourcesExample + + diff --git a/examples/MultipleSourcesExample/Program.cs b/examples/MultipleSourcesExample/Program.cs index b3ec3888..e6e1aa14 100644 --- a/examples/MultipleSourcesExample/Program.cs +++ b/examples/MultipleSourcesExample/Program.cs @@ -32,6 +32,10 @@ penn.AddMarkdownContent(ServiceConfiguration.RegisterBlogSource); }); +// Opt the custom BlogFrontMatter into source-generated YAML metadata. DocFrontMatter is +// already covered by Pennington's built-in context; types with no context use reflection. +builder.Services.AddPenningtonYamlContext(BlogFrontMatterYamlContext.Default); + var app = builder.Build(); app.UsePennington(); diff --git a/examples/MultipleSourcesExample/README.md b/examples/MultipleSourcesExample/README.md index d1ee51ba..365f7a58 100644 --- a/examples/MultipleSourcesExample/README.md +++ b/examples/MultipleSourcesExample/README.md @@ -7,6 +7,7 @@ Bare `AddPennington` host with two `AddMarkdownContent` calls pointing at dif - Multiple markdown sources in one host - Per-source `ContentPath` / `BasePageUrl` / `ExcludePaths` - Per-source front-matter types +- Opt a custom front-matter type into source-generated YAML metadata via `AddPenningtonYamlContext` (`BlogFrontMatterYamlContext`); reflection remains the fallback for unregistered types - Overlap demonstration toggled by the `MULTIPLE_SOURCES_OVERLAP=1` env var ## Referenced from diff --git a/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs b/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs index 1e8f018d..dc7c7a39 100644 --- a/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs +++ b/src/Pennington.BlogSite/BlogSiteServiceExtensions.cs @@ -49,6 +49,9 @@ public static IServiceCollection AddBlogSite(this IServiceCollection services, : [blogSiteAssembly, .. routingAssemblies]; }); + // Source-generated YAML metadata for BlogSite's front-matter type (reflection fallback otherwise). + services.AddPenningtonYamlContext(BlogSiteYamlContext.Default); + // Make Pennington.UI components available inline in markdown via Mdazor. // is intentionally excluded: markdown authors should use fenced // code blocks, not a component round-trip through Mdazor+Markdig. diff --git a/src/Pennington.BlogSite/BlogSiteYamlContext.cs b/src/Pennington.BlogSite/BlogSiteYamlContext.cs new file mode 100644 index 00000000..92d855d1 --- /dev/null +++ b/src/Pennington.BlogSite/BlogSiteYamlContext.cs @@ -0,0 +1,15 @@ +namespace Pennington.BlogSite; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for BlogSite's front-matter record, registered by +/// so it deserializes without reflection +/// (NativeAOT/trim-friendly). +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(BlogSiteFrontMatter))] +internal partial class BlogSiteYamlContext : YamlSerializerContext +{ +} diff --git a/src/Pennington.BlogSite/Pennington.BlogSite.csproj b/src/Pennington.BlogSite/Pennington.BlogSite.csproj index 18027761..e93fcc1f 100644 --- a/src/Pennington.BlogSite/Pennington.BlogSite.csproj +++ b/src/Pennington.BlogSite/Pennington.BlogSite.csproj @@ -12,6 +12,8 @@ + + diff --git a/src/Pennington.DocSite/DocSiteServiceExtensions.cs b/src/Pennington.DocSite/DocSiteServiceExtensions.cs index c2ee7f2c..d4a307be 100644 --- a/src/Pennington.DocSite/DocSiteServiceExtensions.cs +++ b/src/Pennington.DocSite/DocSiteServiceExtensions.cs @@ -100,6 +100,9 @@ public static IServiceCollection AddDocSite(this IServiceCollection services, options.ConfigurePennington?.Invoke(penn); }); + // Source-generated YAML metadata for DocSite's front-matter types (reflection fallback otherwise). + services.AddPenningtonYamlContext(DocSiteYamlContext.Default); + // Make Pennington.UI components available inline in markdown via Mdazor. // is intentionally excluded: markdown authors should use fenced // code blocks, not a component round-trip through Mdazor+Markdig. diff --git a/src/Pennington.DocSite/DocSiteYamlContext.cs b/src/Pennington.DocSite/DocSiteYamlContext.cs new file mode 100644 index 00000000..db5f2345 --- /dev/null +++ b/src/Pennington.DocSite/DocSiteYamlContext.cs @@ -0,0 +1,16 @@ +namespace Pennington.DocSite; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for DocSite's front-matter records, registered by +/// so they deserialize without reflection +/// (NativeAOT/trim-friendly). +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(DocSiteFrontMatter))] +[YamlSerializable(typeof(BlogPostFrontMatter))] +internal partial class DocSiteYamlContext : YamlSerializerContext +{ +} diff --git a/src/Pennington.DocSite/Pennington.DocSite.csproj b/src/Pennington.DocSite/Pennington.DocSite.csproj index e61056b3..7ae26d75 100644 --- a/src/Pennington.DocSite/Pennington.DocSite.csproj +++ b/src/Pennington.DocSite/Pennington.DocSite.csproj @@ -12,6 +12,8 @@ + + diff --git a/src/Pennington/Content/MarkdownContentService.cs b/src/Pennington/Content/MarkdownContentService.cs index be659f2c..0f42f5d3 100644 --- a/src/Pennington/Content/MarkdownContentService.cs +++ b/src/Pennington/Content/MarkdownContentService.cs @@ -7,8 +7,7 @@ namespace Pennington.Content; using LlmsTxt; using Pipeline; using Routing; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using SharpYaml; /// /// Discovers and provides markdown content from a directory. @@ -573,11 +572,6 @@ private async Task> LoadSubtreesAsync() } var basePrefix = NormalizeBasePageUrl(_options.BasePageUrl.Value); - var deserializer = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .IgnoreUnmatchedProperties() - .Build(); - var builder = ImmutableList.CreateBuilder(); foreach (var file in _fileSystem.Directory.EnumerateFiles( @@ -596,7 +590,7 @@ private async Task> LoadSubtreesAsync() LlmsSubtreeSidecar? sidecar; try { - sidecar = deserializer.Deserialize(content); + sidecar = YamlSerializer.Deserialize(content, PenningtonYaml.ReflectionOptions); } catch { diff --git a/src/Pennington/Content/RedirectContentService.cs b/src/Pennington/Content/RedirectContentService.cs index 92a1d501..7ee59194 100644 --- a/src/Pennington/Content/RedirectContentService.cs +++ b/src/Pennington/Content/RedirectContentService.cs @@ -7,9 +7,7 @@ namespace Pennington.Content; using Microsoft.Extensions.DependencyInjection; using Pipeline; using Routing; -using YamlDotNet.Core; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using SharpYaml; /// /// Holds the unified redirect map used by @@ -164,12 +162,7 @@ private async Task> LoadYamlAsync() try { - var deserializer = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .IgnoreUnmatchedProperties() - .Build(); - - var config = deserializer.Deserialize(yaml); + var config = YamlSerializer.Deserialize(yaml, PenningtonYaml.ReflectionOptions); if (config?.Redirects is null || config.Redirects.Count == 0) { return ImmutableDictionary.Empty; diff --git a/src/Pennington/Data/DataFileLoader.cs b/src/Pennington/Data/DataFileLoader.cs index 09711f9b..4b43e913 100644 --- a/src/Pennington/Data/DataFileLoader.cs +++ b/src/Pennington/Data/DataFileLoader.cs @@ -2,24 +2,19 @@ namespace Pennington.Data; using System.IO.Abstractions; using System.Text.Json; -using YamlDotNet.RepresentationModel; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using FrontMatter; +using SharpYaml; +using SharpYaml.Events; /// /// Deserializes a data file's bytes into a typed value. Format is chosen from the -/// extension (.yml / .yaml use YamlDotNet, .json uses System.Text.Json), +/// extension (.yml / .yaml use SharpYaml, .json uses System.Text.Json), /// both configured with camelCase naming and case-insensitive property matching to mirror -/// 's behavior. +/// 's behavior. Arbitrary data types deserialize +/// via reflection (no source-gen context covers them). /// public static class DataFileLoader { - private static readonly IDeserializer YamlDeserializer = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .WithCaseInsensitivePropertyMatching() - .IgnoreUnmatchedProperties() - .Build(); - private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, @@ -93,9 +88,20 @@ private static bool RootIsYamlSequence(string content, string path) { try { - var stream = new YamlStream(); - stream.Load(new StringReader(content)); - return stream.Documents.Count > 0 && stream.Documents[0].RootNode is YamlSequenceNode; + // The first node-start event after the stream/document preamble is the root node. + var parser = Parser.CreateParser(new StringReader(content)); + while (parser.MoveNext()) + { + switch (parser.Current) + { + case SequenceStart: + return true; + case MappingStart or Scalar: + return false; + } + } + + return false; } catch (Exception ex) { @@ -129,7 +135,7 @@ private static T DeserializeYaml(string content, string path) { try { - return YamlDeserializer.Deserialize(content) + return YamlSerializer.Deserialize(content, PenningtonYaml.ReflectionOptions) ?? throw new InvalidDataException($"YAML in {path} deserialized to null"); } catch (Exception ex) when (ex is not InvalidDataException) diff --git a/src/Pennington/FrontMatter/BufferedYamlParser.cs b/src/Pennington/FrontMatter/BufferedYamlParser.cs deleted file mode 100644 index 1bab8285..00000000 --- a/src/Pennington/FrontMatter/BufferedYamlParser.cs +++ /dev/null @@ -1,24 +0,0 @@ -namespace Pennington.FrontMatter; - -using YamlDotNet.Core; -using YamlDotNet.Core.Events; - -/// -/// Replays a buffered list of s as an , -/// so a single character scan of the YAML can drive both the unknown-key diagnostic -/// pass and the deserialization pass without tokenizing the source twice. -/// -internal sealed class BufferedYamlParser(IReadOnlyList events) : IParser -{ - private int _index = -1; - - /// - public ParsingEvent? Current => _index >= 0 && _index < events.Count ? events[_index] : null; - - /// - public bool MoveNext() - { - _index++; - return _index < events.Count; - } -} diff --git a/src/Pennington/FrontMatter/FrontMatterParser.cs b/src/Pennington/FrontMatter/FrontMatterParser.cs index 06d8b301..214b6e88 100644 --- a/src/Pennington/FrontMatter/FrontMatterParser.cs +++ b/src/Pennington/FrontMatter/FrontMatterParser.cs @@ -3,14 +3,12 @@ namespace Pennington.FrontMatter; using System.Collections.Concurrent; using System.Collections.Frozen; using System.Reflection; +using System.Text.Json; using Diagnostics; using Microsoft.AspNetCore.Http; using Microsoft.Extensions.DependencyInjection; -using YamlDotNet.Core; -using YamlDotNet.Core.Events; -using YamlDotNet.RepresentationModel; -using YamlDotNet.Serialization; -using YamlDotNet.Serialization.NamingConventions; +using SharpYaml; +using SharpYaml.Events; /// /// Parses YAML front matter from markdown content. @@ -19,33 +17,54 @@ public sealed class FrontMatterParser { private const string DiagnosticSource = "FrontMatterParser"; + // Standard YAML tags permitted in front matter. Any other (custom/local) tag is rejected, + // along with anchors and aliases, to block billion-laughs expansion and arbitrary type + // instantiation. SharpYaml's serializer does not reject these on its own, so a single + // event pass enforces the policy before deserialization. + private static readonly FrozenSet AllowedTags = new[] + { + "tag:yaml.org,2002:str", + "tag:yaml.org,2002:int", + "tag:yaml.org,2002:float", + "tag:yaml.org,2002:bool", + "tag:yaml.org,2002:null", + "tag:yaml.org,2002:seq", + "tag:yaml.org,2002:map", + "tag:yaml.org,2002:timestamp", + }.ToFrozenSet(StringComparer.Ordinal); + private readonly FrontMatterParserOptions _options; private readonly IHttpContextAccessor _httpContextAccessor; - private readonly IDeserializer _lenientDeserializer; - private readonly IDeserializer _strictDeserializer; + private readonly PenningtonYamlContextProvider _yaml; private readonly ConcurrentDictionary> _knownKeyCache = new(); /// - /// Initializes the parser with a camelCase YAML deserializer that matches keys case-insensitively. - /// In lenient mode (the default outside build) unknown keys are dropped silently after a warning is emitted; - /// in strict mode unknown keys also throw a . + /// Initializes the parser. Built-in front-matter types deserialize through the source-generated + /// ; other types fall back to reflection. Keys are camelCase + /// matched case-insensitively. In lenient mode (the default outside build) unknown keys are + /// dropped after a warning; in strict mode they additionally throw a . /// /// Parser options controlling strict-mode behavior. /// Used to resolve the request-scoped . - public FrontMatterParser(FrontMatterParserOptions options, IHttpContextAccessor httpContextAccessor) + /// Supplies source-generated contexts with reflection fallback for deserialization. + public FrontMatterParser( + FrontMatterParserOptions options, + IHttpContextAccessor httpContextAccessor, + PenningtonYamlContextProvider yaml) { _options = options; _httpContextAccessor = httpContextAccessor; - _lenientDeserializer = BuildDeserializer(strict: false); - _strictDeserializer = BuildDeserializer(strict: true); + _yaml = yaml; } /// /// Convenience constructor for direct instantiation (tests, scripts) that defaults to - /// lenient mode and emits no diagnostics. Production hosts should resolve the parser - /// from DI so the configured apply. + /// lenient mode, emits no diagnostics, and uses only the built-in serializer context. + /// Production hosts should resolve the parser from DI so the configured + /// and any registered contexts apply. /// - public FrontMatterParser() : this(new FrontMatterParserOptions(), NullHttpContextAccessor.Instance) { } + public FrontMatterParser() + : this(new FrontMatterParserOptions(), NullHttpContextAccessor.Instance, PenningtonYamlContextProvider.Default) { } private sealed class NullHttpContextAccessor : IHttpContextAccessor { @@ -53,19 +72,6 @@ private sealed class NullHttpContextAccessor : IHttpContextAccessor public HttpContext? HttpContext { get => null; set { } } } - private static IDeserializer BuildDeserializer(bool strict) - { - var builder = new DeserializerBuilder() - .WithNamingConvention(CamelCaseNamingConvention.Instance) - .WithCaseInsensitivePropertyMatching(); - if (!strict) - { - builder = builder.IgnoreUnmatchedProperties(); - } - - return builder.Build(); - } - /// /// Parse front matter and return the metadata + remaining markdown body. /// Returns null metadata if no front matter block is present. @@ -106,120 +112,171 @@ public T DeserializeYaml(string yaml, string? sourcePath = null, DiagnosticCo return DeserializeWithScan(yaml, lineOffset: 0, sourcePath, diagnostics) ?? new T(); } - private T? SafeDeserialize(string yaml) - { - var parser = new SafeYamlParser(new Parser(new StringReader(yaml))); - var deserializer = _options.StrictUnknownKeys ? _strictDeserializer : _lenientDeserializer; - return deserializer.Deserialize(parser); - } - - // When diagnostics are inactive there is nothing to scan, so deserialize directly - // (one character scan). When they are active, tokenize once into a buffer and replay - // it for both the unknown-key scan and the deserialize, instead of scanning twice. + // A single event pass enforces the security policy (always) and, when diagnostics are active + // or strict mode is on, collects the top-level keys so unknown ones can be reported. The + // deserialize itself reads the text again (SharpYaml has no parse-from-events overload); for + // tiny front-matter blocks the second parse is negligible. private T? DeserializeWithScan(string yaml, int lineOffset, string? sourcePath, DiagnosticContext? diagnostics) where T : IFrontMatter, new() { - if (diagnostics is null || string.IsNullOrWhiteSpace(yaml)) + if (string.IsNullOrWhiteSpace(yaml)) { - return SafeDeserialize(yaml); + return default; } - List events; - try - { - events = BufferEvents(yaml); - } - catch (YamlException) + var collectKeys = diagnostics is not null || _options.StrictUnknownKeys; + var keys = ScanYaml(yaml, lineOffset, collectKeys, out var malformed); + + if (!malformed && keys is not null) { - // Malformed or security-rejected YAML — fall back to a direct deserialize so - // it surfaces the canonical error (matching the prior scan-then-deserialize flow). - return SafeDeserialize(yaml); + ReportUnknownKeys(keys, sourcePath, diagnostics); } - ScanUnknownKeys(events, lineOffset, sourcePath, diagnostics); - - var deserializer = _options.StrictUnknownKeys ? _strictDeserializer : _lenientDeserializer; - return deserializer.Deserialize(new BufferedYamlParser(events)); + return _yaml.Deserialize(yaml); } - // Drains a single SafeYamlParser pass into a replayable event buffer. The security - // checks (anchors/aliases/tags) run here, once, and a YamlException propagates to the caller. - private static List BufferEvents(string yaml) + private void ReportUnknownKeys(List<(string Name, int Line)> keys, string? sourcePath, DiagnosticContext? diagnostics) + where T : IFrontMatter, new() { - var events = new List(); - var source = new SafeYamlParser(new Parser(new StringReader(yaml))); - while (source.MoveNext()) + var known = _knownKeyCache.GetOrAdd(typeof(T), BuildKnownKeySet); + var location = sourcePath ?? ""; + var unknownFound = false; + + foreach (var (name, line) in keys) { - events.Add(source.Current!); + if (known.Contains(name)) + { + continue; + } + + unknownFound = true; + diagnostics?.AddWarning($"Unknown front-matter key '{name}' in {location}:{line}", DiagnosticSource); } - return events; + // Strict mode throws after the warnings are emitted, matching the prior scan-then-throw flow. + if (unknownFound && _options.StrictUnknownKeys) + { + throw new YamlException($"Unknown front-matter key(s) in {location}."); + } } - private DiagnosticContext? ResolveAmbientDiagnostics() - => _httpContextAccessor.HttpContext?.RequestServices.GetService(); - - private void ScanUnknownKeys(IReadOnlyList events, int lineOffset, string? sourcePath, DiagnosticContext diagnostics) + // One linear pass over the YAML events. Enforces the security policy on every event and, + // when requested, records the root mapping's keys with 1-based line numbers (SharpYaml marks + // are 0-based). A parser failure means malformed YAML — bail and let the deserialize step + // surface the canonical error; a security violation throws immediately. + private List<(string Name, int Line)>? ScanYaml(string yaml, int lineOffset, bool collectKeys, out bool malformed) { - YamlStream stream; - try - { - stream = new YamlStream(); - stream.Load(new BufferedYamlParser(events)); - } - catch (YamlException) - { - // Malformed YAML — let the deserialize step surface the canonical error - // rather than emitting a partial unknown-key list here. - return; - } + malformed = false; + var keys = collectKeys ? new List<(string, int)>() : null; + var parser = Parser.CreateParser(new StringReader(yaml)); - if (stream.Documents.Count == 0 || stream.Documents[0].RootNode is not YamlMappingNode mapping) + var depth = 0; + var rootIsMapping = false; + var rootExpectKey = false; + + while (true) { - return; - } + bool moved; + try + { + moved = parser.MoveNext(); + } + catch (YamlException) + { + malformed = true; + return null; + } - var known = _knownKeyCache.GetOrAdd(typeof(T), BuildKnownKeySet); - var location = sourcePath ?? ""; + if (!moved) + { + break; + } - foreach (var entry in mapping.Children) - { - if (entry.Key is not YamlScalarNode keyNode || keyNode.Value is null) + var current = parser.Current!; + EnforceSecurity(current); + + // Root-mapping children alternate key/value. A key is recorded only when we are + // directly inside the root mapping (depth == 1) and expecting a key. + if (collectKeys && rootIsMapping && depth == 1 && current is Scalar or MappingStart or SequenceStart) { - continue; + if (rootExpectKey) + { + if (current is Scalar { Value: { } value }) + { + keys!.Add((value, current.Start.Line + 1 + lineOffset)); + } + + rootExpectKey = false; + } + else + { + rootExpectKey = true; + } } - var keyName = keyNode.Value; - if (known.Contains(keyName)) + switch (current) { - continue; + case MappingStart: + if (depth == 0) + { + rootIsMapping = true; + rootExpectKey = true; + } + + depth++; + break; + case SequenceStart: + depth++; + break; + case MappingEnd: + case SequenceEnd: + depth--; + break; } + } + + return keys; + } - var line = keyNode.Start.Line + lineOffset; - diagnostics.AddWarning( - $"Unknown front-matter key '{keyName}' in {location}:{line}", - DiagnosticSource); + // Rejects YAML anchors, aliases, and non-standard type tags — preventing billion-laughs + // expansion and arbitrary type instantiation in front matter. + private static void EnforceSecurity(ParsingEvent current) + { + switch (current) + { + case AnchorAlias alias: + throw new YamlException(alias.Start, alias.End, + "YAML aliases are not permitted in front matter."); + + case NodeEvent { Anchor: { Length: > 0 } } node: + throw new YamlException(node.Start, node.End, + "YAML anchors are not permitted in front matter."); + + case NodeEvent node when node.Tag is { Length: > 0 } tag && !AllowedTags.Contains(tag): + throw new YamlException(node.Start, node.End, + $"YAML type tags are not permitted in front matter. Tag: {tag}"); } } + private DiagnosticContext? ResolveAmbientDiagnostics() + => _httpContextAccessor.HttpContext?.RequestServices.GetService(); + private static FrozenSet BuildKnownKeySet(Type t) { - // Mirror what WithNamingConvention(CamelCase) + WithCaseInsensitivePropertyMatching - // accept on the deserializer. Include declared and interface-default members from - // both T and any IFrontMatter capability mixins T implements. - var convention = CamelCaseNamingConvention.Instance; + // Mirror PropertyNamingPolicy.CamelCase + case-insensitive matching on the deserializer. + // Include declared and interface-default members from both T and any IFrontMatter mixins. var seen = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var prop in t.GetProperties(BindingFlags.Public | BindingFlags.Instance)) { - seen.Add(convention.Apply(prop.Name)); + seen.Add(JsonNamingPolicy.CamelCase.ConvertName(prop.Name)); } foreach (var iface in t.GetInterfaces()) { foreach (var prop in iface.GetProperties(BindingFlags.Public | BindingFlags.Instance)) { - seen.Add(convention.Apply(prop.Name)); + seen.Add(JsonNamingPolicy.CamelCase.ConvertName(prop.Name)); } } @@ -268,4 +325,4 @@ private static bool TryExtractYaml(string content, out string yaml, out string b /// /// Deserialized front matter, or null when the content had no front matter block. /// Markdown body with the front matter block stripped. -public record FrontMatterResult(T? Metadata, string Body) where T : IFrontMatter; \ No newline at end of file +public record FrontMatterResult(T? Metadata, string Body) where T : IFrontMatter; diff --git a/src/Pennington/FrontMatter/PenningtonYaml.cs b/src/Pennington/FrontMatter/PenningtonYaml.cs new file mode 100644 index 00000000..fda2b52b --- /dev/null +++ b/src/Pennington/FrontMatter/PenningtonYaml.cs @@ -0,0 +1,22 @@ +namespace Pennington.FrontMatter; + +using System.Text.Json; +using SharpYaml; + +/// +/// Shared SharpYaml configuration. mirrors the front-matter +/// conventions (camelCase keys, case-insensitive matching) and pins an explicit +/// so reflection-based deserialization works +/// regardless of SharpYaml's IsReflectionEnabledByDefault switch — this is the +/// reflection fallback used for any type no source-generated context covers. +/// +internal static class PenningtonYaml +{ + /// Reflection-backed options used for types not registered with a serializer context. + public static YamlSerializerOptions ReflectionOptions { get; } = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + PropertyNameCaseInsensitive = true, + TypeInfoResolver = ReflectionYamlTypeInfoResolver.Default, + }; +} diff --git a/src/Pennington/FrontMatter/PenningtonYamlContext.cs b/src/Pennington/FrontMatter/PenningtonYamlContext.cs new file mode 100644 index 00000000..c1b18cef --- /dev/null +++ b/src/Pennington/FrontMatter/PenningtonYamlContext.cs @@ -0,0 +1,17 @@ +namespace Pennington.FrontMatter; + +using System.Text.Json.Serialization; +using SharpYaml.Serialization; + +/// +/// Source-generated SharpYaml metadata for Pennington's built-in front-matter records, so they +/// deserialize without reflection (NativeAOT/trim-friendly). Registered automatically by +/// . Types not covered by any +/// registered context fall back to reflection — see . +/// +[YamlSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase, PropertyNameCaseInsensitive = true)] +[YamlSerializable(typeof(DocFrontMatter))] +[YamlSerializable(typeof(BlogFrontMatter))] +internal partial class PenningtonYamlContext : YamlSerializerContext +{ +} diff --git a/src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs b/src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs new file mode 100644 index 00000000..c520c1b7 --- /dev/null +++ b/src/Pennington/FrontMatter/PenningtonYamlContextProvider.cs @@ -0,0 +1,53 @@ +namespace Pennington.FrontMatter; + +using System.Collections.Concurrent; +using SharpYaml; +using SharpYaml.Serialization; + +/// +/// Routes a type to the registered that knows it — the +/// built-in , a satellite package context, or one a user +/// added via — and +/// falls back to reflection for everything else. A source-generated context only serves the +/// types it was generated for and rejects foreign options, so each type is dispatched to its +/// own context rather than combined into a single resolver. +/// +public sealed class PenningtonYamlContextProvider +{ + /// A provider seeded with only the built-in context, for non-DI use (tests, scripts). + public static PenningtonYamlContextProvider Default { get; } = new([PenningtonYamlContext.Default]); + + private readonly YamlSerializerContext[] _contexts; + private readonly ConcurrentDictionary _byType = new(); + + /// Initializes the provider with the serializer contexts registered in DI. + /// Registered contexts; the built-in is always present. + public PenningtonYamlContextProvider(IEnumerable contexts) + => _contexts = contexts as YamlSerializerContext[] ?? [.. contexts]; + + /// + /// Deserializes into using the source-generated + /// context that covers , or reflection when none does. + /// + /// Raw YAML text. + public T? Deserialize(string yaml) + { + var context = _byType.GetOrAdd(typeof(T), ResolveContext); + return context is not null + ? YamlSerializer.Deserialize(yaml, context) + : YamlSerializer.Deserialize(yaml, PenningtonYaml.ReflectionOptions); + } + + private YamlSerializerContext? ResolveContext(Type type) + { + foreach (var context in _contexts) + { + if (context.GetTypeInfo(type, context.Options) is not null) + { + return context; + } + } + + return null; + } +} diff --git a/src/Pennington/FrontMatter/SafeYamlParser.cs b/src/Pennington/FrontMatter/SafeYamlParser.cs deleted file mode 100644 index 6a7944da..00000000 --- a/src/Pennington/FrontMatter/SafeYamlParser.cs +++ /dev/null @@ -1,48 +0,0 @@ -namespace Pennington.FrontMatter; - -using YamlDotNet.Core; -using YamlDotNet.Core.Events; - -/// -/// Wraps an to reject YAML anchors, aliases, and non-standard type tags. -/// Prevents billion-laughs expansion attacks and arbitrary type instantiation. -/// -internal sealed class SafeYamlParser(IParser inner) : IParser -{ - private static readonly HashSet AllowedTags = - [ - "tag:yaml.org,2002:str", - "tag:yaml.org,2002:int", - "tag:yaml.org,2002:float", - "tag:yaml.org,2002:bool", - "tag:yaml.org,2002:null", - "tag:yaml.org,2002:seq", - "tag:yaml.org,2002:map", - "tag:yaml.org,2002:timestamp", - ]; - - public ParsingEvent? Current => inner.Current; - - public bool MoveNext() - { - var result = inner.MoveNext(); - - switch (inner.Current) - { - case AnchorAlias alias: - throw new YamlException(alias.Start, alias.End, - "YAML aliases are not permitted in front matter."); - - case NodeEvent { Anchor.IsEmpty: false } node: - throw new YamlException(node.Start, node.End, - "YAML anchors are not permitted in front matter."); - - case NodeEvent { Tag: { IsNonSpecific: false, IsEmpty: false } tag } node - when !AllowedTags.Contains(tag.Value): - throw new YamlException(node.Start, node.End, - $"YAML type tags are not permitted in front matter. Tag: {tag.Value}"); - } - - return result; - } -} \ No newline at end of file diff --git a/src/Pennington/Infrastructure/PenningtonExtensions.cs b/src/Pennington/Infrastructure/PenningtonExtensions.cs index a280407e..04f8d0fb 100644 --- a/src/Pennington/Infrastructure/PenningtonExtensions.cs +++ b/src/Pennington/Infrastructure/PenningtonExtensions.cs @@ -32,6 +32,7 @@ namespace Pennington.Infrastructure; using Pipeline; using Routing; using Search; +using SharpYaml.Serialization; using Testably.Abstractions; /// @@ -112,6 +113,13 @@ public static IServiceCollection AddPennington(this IServiceCollection services, } services.AddSingleton(options.FrontMatter); + + // YAML deserialization: register the built-in source-generated context and the provider + // that dispatches each type to its context (or reflection). Satellite packages and users + // add their own contexts via AddPenningtonYamlContext. + services.AddPenningtonYamlContext(PenningtonYamlContext.Default); + services.AddSingleton(); + services.AddSingleton(); services.AddFileWatched(); @@ -421,6 +429,18 @@ object Resolve(IServiceProvider sp) return services; } + /// + /// Register a source-generated so the types it covers + /// deserialize without reflection (NativeAOT/trim-friendly). Types not covered by any + /// registered context fall back to reflection. Satellite templates call this for their own + /// front-matter records; end users call it for theirs. + /// + public static IServiceCollection AddPenningtonYamlContext(this IServiceCollection services, YamlSerializerContext context) + { + services.AddSingleton(context); + return services; + } + /// /// Builds the assembly set scanned for routable @page components, /// always including the entry assembly (deduped) so a bare host's pages are diff --git a/src/Pennington/Pennington.csproj b/src/Pennington/Pennington.csproj index 58cdaa8a..04d2705a 100644 --- a/src/Pennington/Pennington.csproj +++ b/src/Pennington/Pennington.csproj @@ -18,7 +18,7 @@ - + diff --git a/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs b/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs index 5acf6ac5..3d1a0e82 100644 --- a/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs +++ b/tests/Pennington.Tests/FrontMatter/FrontMatterParserDiagnosticsTests.cs @@ -1,5 +1,5 @@ using Pennington.FrontMatter; -using YamlDotNet.Core; +using SharpYaml; namespace Pennington.Tests.FrontMatter; @@ -135,7 +135,7 @@ public void Parse_WithoutSourcePath_FallsBackToUnknownLabel() } private static FrontMatterParser CreateParser(bool strict) - => new(new FrontMatterParserOptions { StrictUnknownKeys = strict }, new NoopHttpContextAccessor()); + => new(new FrontMatterParserOptions { StrictUnknownKeys = strict }, new NoopHttpContextAccessor(), PenningtonYamlContextProvider.Default); private sealed class NoopHttpContextAccessor : Microsoft.AspNetCore.Http.IHttpContextAccessor { diff --git a/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs b/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs index ee246e62..9141d62d 100644 --- a/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs +++ b/tests/Pennington.Tests/FrontMatter/FrontMatterParserTests.cs @@ -186,7 +186,7 @@ public void Parse_YamlAnchor_ThrowsToPreventBillionLaughs() { var content = "---\ntitle: &bomb payload\n---\nContent."; - Should.Throw( + Should.Throw( () => _parser.Parse(content)); } @@ -195,7 +195,7 @@ public void DeserializeYaml_YamlAnchor_ThrowsToPreventBillionLaughs() { var yaml = "title: &bomb payload"; - Should.Throw( + Should.Throw( () => _parser.DeserializeYaml(yaml)); } @@ -204,7 +204,7 @@ public void Parse_ArbitraryTypeTag_ThrowsToPreventTypeInstantiation() { var content = "---\ntitle: ! malicious\n---\nContent."; - Should.Throw( + Should.Throw( () => _parser.Parse(content)); } @@ -213,7 +213,7 @@ public void Parse_DotNetTypeTag_ThrowsToPreventTypeInstantiation() { var content = "---\ntitle: ! evil\n---\nContent."; - Should.Throw( + Should.Throw( () => _parser.Parse(content)); }