Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions src/Pennington/Generation/AuditRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public sealed class AuditRunner : IHostedService
private readonly AuditCache _cache;
private readonly IFileWatcher _fileWatcher;
private readonly LocalizationOptions _localization;
private readonly IHostApplicationLifetime _lifetime;
private readonly ILogger<AuditRunner> _logger;
private readonly bool _isBuildMode;
private readonly Lock _runLock = new();
Expand All @@ -30,8 +31,9 @@ public AuditRunner(
AuditCache cache,
IFileWatcher fileWatcher,
LocalizationOptions localization,
IHostApplicationLifetime lifetime,
ILogger<AuditRunner> logger)
: this(services, cache, fileWatcher, localization, logger, PenningtonBuildMode.WritesOutput)
: this(services, cache, fileWatcher, localization, lifetime, logger, PenningtonBuildMode.WritesOutput)
{
}

Expand All @@ -41,24 +43,29 @@ internal AuditRunner(
AuditCache cache,
IFileWatcher fileWatcher,
LocalizationOptions localization,
IHostApplicationLifetime lifetime,
ILogger<AuditRunner> logger,
bool isBuildMode)
{
_services = services;
_cache = cache;
_fileWatcher = fileWatcher;
_localization = localization;
_lifetime = lifetime;
_logger = logger;
_isBuildMode = isBuildMode;
}

/// <inheritdoc/>
public Task StartAsync(CancellationToken cancellationToken)
{
// Prime the cache with an initial pass so the first request after startup
// already sees current diagnostics. Subsequent file changes invalidate via
// the IFileWatcher subscription below.
_activeRun = RunAsync(cancellationToken);
// Defer the initial pass until the application has fully started. A hosted service's
// StartAsync runs while sibling hosted services — including the web server that backs
// the in-process self-fetch — may not be up yet, so a build-mode pass that fetches
// rendered HTML through the projection would race the server start and fail (the empty
// result would then poison the projection's cache). ApplicationStarted fires only after
// every hosted service, the server included, has started.
_lifetime.ApplicationStarted.Register(RunInBackground);
_fileWatcher.SubscribeToChanges(() => RunInBackground());
return Task.CompletedTask;
}
Expand Down
24 changes: 21 additions & 3 deletions src/Pennington/Infrastructure/HttpDispatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,33 @@ public HttpClient CreateClient()
// Path-relative URLs ("/foo/bar") resolve against that and are dispatched
// to the same RequestDelegate Kestrel would have invoked. Wrap its handler
// with the cache so repeat self-fetches replay one render.
var testHandler = new CachingHttpHandler(_cache) { InnerHandler = testServer.CreateHandler() };
HttpMessageHandler innerHandler;
try
{
innerHandler = testServer.CreateHandler();
}
catch (InvalidOperationException ex)
{
// TestServer.Application is null until the host's IServer has started. A
// self-fetch issued before that — e.g. a startup hosted service racing the
// server start — is an infrastructure failure, not a per-page content error.
// Surface it as such so callers retry once the host is up instead of baking
// an empty corpus.
throw new SelfFetchUnavailableException(
"The in-process TestServer has not started yet; a self-fetch was issued before " +
"the host's server was ready.",
ex);
}

var testHandler = new CachingHttpHandler(_cache) { InnerHandler = innerHandler };
return new HttpClient(testHandler) { BaseAddress = new Uri("http://localhost/") };
}

var addresses = _server.Features.Get<IServerAddressesFeature>()?.Addresses;
if (addresses is null || addresses.Count == 0)
{
throw new InvalidOperationException(
"HttpDispatcher requires either a TestServer or a listening Kestrel host. " +
throw new SelfFetchUnavailableException(
"HttpDispatcher requires either a started TestServer or a listening Kestrel host. " +
"IServerAddressesFeature has no addresses — is the app started yet?");
}

Expand Down
25 changes: 25 additions & 0 deletions src/Pennington/Infrastructure/SelfFetchUnavailableException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
namespace Pennington.Infrastructure;

/// <summary>
/// Thrown by <see cref="IInProcessHttpDispatcher.CreateClient"/> when the in-process
/// transport is not ready — the host's <see cref="Microsoft.AspNetCore.Hosting.Server.IServer"/>
/// has not started yet (a <c>TestServer</c> whose application is still null, or a Kestrel host
/// that has not bound a listening address). Distinct from a per-page content failure:
/// site-crawling consumers (notably <see cref="Pennington.Pipeline.SiteProjection"/>) must let
/// this propagate so a partially-built or empty corpus is never cached as if the crawl had
/// completed.
/// </summary>
public sealed class SelfFetchUnavailableException : Exception
{
/// <summary>Initializes the exception with a message describing why the transport is unavailable.</summary>
public SelfFetchUnavailableException(string message)
: base(message)
{
}

/// <summary>Initializes the exception with a message and the underlying cause.</summary>
public SelfFetchUnavailableException(string message, Exception innerException)
: base(message, innerException)
{
}
}
7 changes: 6 additions & 1 deletion src/Pennington/Pipeline/SiteProjection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,16 @@ await Parallel.ForEachAsync(
Content: fetched,
Sections: BuildSectionsLazy(_extractor, fetched));
}
catch (Exception ex)
catch (Exception ex) when (ex is not SelfFetchUnavailableException)
{
_logger.LogWarning(ex, "SiteProjection: failed to project {Path}, skipping", toc.Route.CanonicalPath.Value);
return null;
}
// SelfFetchUnavailableException is deliberately NOT caught: the in-process server
// wasn't ready, which is an all-or-nothing infrastructure failure, not a per-page
// content error. Letting it propagate faults SeedAsync so AsyncLazy evicts the task
// and the next access retries — otherwise the whole corpus would be cached empty and
// the search index / llms.txt would silently ship with zero pages.
}

private static Lazy<IReadOnlyList<HeadingSection>> BuildSectionsLazy(
Expand Down
26 changes: 25 additions & 1 deletion tests/Pennington.Tests/Generation/AuditRunnerTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Runtime.CompilerServices;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging.Abstractions;
using Pennington.Generation;
using Pennington.Infrastructure;
Expand All @@ -23,17 +24,24 @@ public async Task StartAsync_InBuildMode_RunsRenderedAuditor_AndCachesItsDiagnos

using var sp = services.BuildServiceProvider();
var cache = sp.GetRequiredService<AuditCache>();
var lifetime = new StubLifetime();
var runner = new AuditRunner(
sp,
cache,
sp.GetRequiredService<IFileWatcher>(),
sp.GetRequiredService<LocalizationOptions>(),
lifetime,
NullLogger<AuditRunner>.Instance,
isBuildMode: true);

await runner.StartAsync(TestContext.Current.CancellationToken);

// RunAsync started in StartAsync; give the cache a moment to be populated.
// The initial pass is gated on ApplicationStarted so the server is up before any
// self-fetch; nothing should have run yet.
cache.Diagnostics.ShouldBeEmpty();
lifetime.FireStarted();

// RunAsync started on ApplicationStarted; give the cache a moment to be populated.
for (var i = 0; i < 50 && cache.Diagnostics.IsEmpty; i++)
{
await Task.Delay(10, TestContext.Current.CancellationToken);
Expand All @@ -58,15 +66,18 @@ public async Task StartAsync_InDevMode_SkipsRenderedAuditors()

using var sp = services.BuildServiceProvider();
var cache = sp.GetRequiredService<AuditCache>();
var lifetime = new StubLifetime();
var runner = new AuditRunner(
sp,
cache,
sp.GetRequiredService<IFileWatcher>(),
sp.GetRequiredService<LocalizationOptions>(),
lifetime,
NullLogger<AuditRunner>.Instance,
isBuildMode: false);

await runner.StartAsync(TestContext.Current.CancellationToken);
lifetime.FireStarted();

// Wait long enough for any background run to settle.
await Task.Delay(100, TestContext.Current.CancellationToken);
Expand Down Expand Up @@ -110,4 +121,17 @@ public void SubscribeToChanges(Action onUpdate) { }
public void SubscribeToChanges(Action<FileChangeNotification> onUpdate) { }
public void Dispose() { }
}

private sealed class StubLifetime : IHostApplicationLifetime
{
private readonly CancellationTokenSource _started = new();
public CancellationToken ApplicationStarted => _started.Token;
public CancellationToken ApplicationStopping => CancellationToken.None;
public CancellationToken ApplicationStopped => CancellationToken.None;
public void StopApplication() { }

// Mirrors the host firing ApplicationStarted once every hosted service (the server
// included) has started — the gate AuditRunner now waits on before its initial pass.
public void FireStarted() => _started.Cancel();
}
}
43 changes: 43 additions & 0 deletions tests/Pennington.Tests/Infrastructure/HttpDispatcherTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using Microsoft.AspNetCore.Hosting.Server;
using Microsoft.AspNetCore.Http.Features;
using Microsoft.AspNetCore.TestHost;
using Microsoft.Extensions.DependencyInjection;
using Pennington.Infrastructure;

namespace Pennington.Tests.Infrastructure;

public class HttpDispatcherTests
{
[Fact]
public void CreateClient_UnstartedTestServer_ThrowsSelfFetchUnavailable()
{
// A TestServer whose host hasn't started has a null Application, so CreateHandler()
// throws InvalidOperationException. The dispatcher must surface that as the dedicated
// infrastructure failure (not a generic exception a per-page catch would swallow) so
// the projection retries instead of caching an empty corpus. This is the exact
// condition the Windows build-ordering bug hit when a startup hosted service raced
// the server start.
using var server = new TestServer(new ServiceCollection().BuildServiceProvider());
var dispatcher = new HttpDispatcher(server, new BuildHtmlCache([]));

Should.Throw<SelfFetchUnavailableException>(() => dispatcher.CreateClient());
}

[Fact]
public void CreateClient_NonTestServerWithoutAddresses_ThrowsSelfFetchUnavailable()
{
// The Kestrel path with no bound addresses is the same "server isn't ready" condition.
var dispatcher = new HttpDispatcher(new NoAddressServer(), new BuildHtmlCache([]));

Should.Throw<SelfFetchUnavailableException>(() => dispatcher.CreateClient());
}

private sealed class NoAddressServer : IServer
{
public IFeatureCollection Features { get; } = new FeatureCollection();
public void Dispose() { }
public Task StartAsync<TContext>(IHttpApplication<TContext> application, CancellationToken cancellationToken)
where TContext : notnull => Task.CompletedTask;
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}
}
107 changes: 103 additions & 4 deletions tests/Pennington.Tests/Pipeline/SiteProjectionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace Pennington.Tests.Pipeline;
using Microsoft.AspNetCore.Routing.Patterns;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Primitives;
using Pennington.Content;
using Pennington.Infrastructure;
using Pennington.LlmsTxt;
using Pennington.Pipeline;
Expand Down Expand Up @@ -104,15 +105,53 @@ public async Task GetPageAsync_UnknownPath_ReturnsNull()
page.ShouldBeNull();
}

private static SiteProjection CreateProjection(EndpointDataSource? endpointDataSource = null)
[Fact]
public async Task SelfFetchUnavailable_DuringSeed_FaultsRatherThanCachingEmptyCorpus()
{
// Regression for the Windows build-ordering bug: when the in-process server isn't
// started yet, the self-fetch throws SelfFetchUnavailableException. That must NOT be
// swallowed as a per-page skip (which would cache an empty corpus and silently ship a
// zero-page search index / llms.txt). It must fault the seed so AsyncLazy evicts and
// the next access — once the server is up — rebuilds the real corpus.
var dispatcher = new FlakyDispatcher();
var projection = CreateProjection(
contentServices: [new SinglePageContentService()],
dispatcher: dispatcher);
var ct = TestContext.Current.CancellationToken;

// First access: server not ready -> infrastructure failure propagates.
await Should.ThrowAsync<SelfFetchUnavailableException>(async () =>
{
await foreach (var _ in projection.GetPagesAsync(ct))
{
}
});

// Second access after the server is up: the faulted seed was evicted, so the
// projection rebuilds and yields the real page instead of an empty corpus.
dispatcher.ServerReady = true;
var pages = new List<RenderedPage>();
await foreach (var page in projection.GetPagesAsync(ct))
{
pages.Add(page);
}

pages.Count.ShouldBe(1);
pages[0].Route.CanonicalPath.Value.ShouldBe("/page/");
dispatcher.CreateClientCalls.ShouldBeGreaterThanOrEqualTo(2);
}

private static SiteProjection CreateProjection(
EndpointDataSource? endpointDataSource = null,
IEnumerable<IContentService>? contentServices = null,
IInProcessHttpDispatcher? dispatcher = null)
{
var dispatcher = new StubDispatcher();
return new SiteProjection(
contentServices: [],
contentServices: contentServices ?? [],
enrichment: new MetadataEnrichmentService([]),
renderer: new StubRenderer(),
xrefResolver: new XrefResolvingService(new XrefResolver([])),
fetcher: new RenderedHtmlFetcher(dispatcher, NullLogger<RenderedHtmlFetcher>.Instance),
fetcher: new RenderedHtmlFetcher(dispatcher ?? new StubDispatcher(), NullLogger<RenderedHtmlFetcher>.Instance),
extractor: new HeadingSectionExtractor(),
options: new SiteProjectionOptions(),
endpointDataSource: endpointDataSource ?? new StubEndpointDataSource(),
Expand Down Expand Up @@ -155,4 +194,64 @@ protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage reques
=> Task.FromResult(new HttpResponseMessage(System.Net.HttpStatusCode.NotFound));
}
}

// Throws the infrastructure failure until the "server" is marked ready, mirroring
// HttpDispatcher.CreateClient against a TestServer whose Application is not yet set.
private sealed class FlakyDispatcher : IInProcessHttpDispatcher
{
public bool ServerReady { get; set; }
public int CreateClientCalls { get; private set; }

public HttpClient CreateClient()
{
CreateClientCalls++;
if (!ServerReady)
{
throw new SelfFetchUnavailableException("server not started (test)");
}

return new HttpClient(new OkHandler()) { BaseAddress = new Uri("http://localhost/") };
}

private sealed class OkHandler : HttpMessageHandler
{
protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
=> Task.FromResult(new HttpResponseMessage(System.Net.HttpStatusCode.OK)
{
Content = new StringContent(
"<html><body><main><h1>Page</h1><p>Body</p></main></body></html>",
System.Text.Encoding.UTF8,
"text/html"),
});
}
}

// Minimal content service yielding a single fetchable TOC entry (no LlmsOnlySource,
// so the projection takes the HTTP self-fetch path through the dispatcher).
private sealed class SinglePageContentService : IContentService
{
private static readonly ContentRoute Route = new()
{
CanonicalPath = new UrlPath("/page/"),
OutputFile = new FilePath("page/index.html"),
};

public IAsyncEnumerable<DiscoveredItem> DiscoverAsync() => AsyncEnumerable.Empty<DiscoveredItem>();

public Task<System.Collections.Immutable.ImmutableList<ContentToCopy>> GetContentToCopyAsync()
=> Task.FromResult(System.Collections.Immutable.ImmutableList<ContentToCopy>.Empty);

public Task<System.Collections.Immutable.ImmutableList<ContentTocItem>> GetContentTocEntriesAsync()
=> Task.FromResult(System.Collections.Immutable.ImmutableList.Create(
new ContentTocItem("Page", Route, 0, ["page"], null, null)));

public Task<System.Collections.Immutable.ImmutableList<CrossReference>> GetCrossReferencesAsync()
=> Task.FromResult(System.Collections.Immutable.ImmutableList<CrossReference>.Empty);

public Task<System.Collections.Immutable.ImmutableList<ContentToCreate>> GetContentToCreateAsync()
=> Task.FromResult(System.Collections.Immutable.ImmutableList<ContentToCreate>.Empty);

public string DefaultSectionLabel => "";
public int SearchPriority => 0;
}
}
Loading