-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathProgram.cs
More file actions
95 lines (80 loc) · 4.06 KB
/
Program.cs
File metadata and controls
95 lines (80 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
using System.Text;
using Microsoft.AspNetCore.Http.Features;
using TinyNotebook.Core.Config;
using TinyNotebook.Core.Models;
using TinyNotebook.Core.Prompts;
using TinyNotebook.Core.Services;
using TinyNotebook.Core.Utilities;
namespace TinyNotebook;
public static class Program
{
public static async Task Main(string[] args)
{
// Ensure Chinese characters in ASP.NET Core logs render correctly on Windows.
Console.OutputEncoding = Encoding.UTF8;
Console.InputEncoding = Encoding.UTF8;
var storagePaths = new RuntimeStoragePaths();
storagePaths.EnsureCreated();
var builder = WebApplication.CreateBuilder(args);
builder.Configuration.AddJsonFile(Path.Combine(AppContext.BaseDirectory, "appsettings.web.json"), optional: true, reloadOnChange: true);
builder.Services.Configure<LlmOptions>(builder.Configuration.GetSection(LlmOptions.SectionName));
builder.Services.Configure<ModelDownloadOptions>(builder.Configuration.GetSection(ModelDownloadOptions.SectionName));
builder.Services.AddSingleton<ModelPathResolver>();
builder.Services.AddSingleton<RuntimeStoragePaths>();
builder.Services.AddSingleton<PromptBuilder>();
builder.Services.AddSingleton<IConversationStore, FileConversationStore>();
builder.Services.AddSingleton<IModelDownloader, HuggingFaceModelDownloader>();
builder.Services.AddSingleton<SessionManager>();
builder.Services.AddSingleton<ILlamaCppBackend, LlamaCppBackend>();
builder.Services.AddSingleton<ILlmService, LlmService>();
builder.Services.AddHostedService<LlmWarmupHostedService>();
builder.Services.AddRazorPages();
var app = builder.Build();
app.Services.GetRequiredService<RuntimeStoragePaths>().EnsureCreated();
if (!app.Environment.IsDevelopment())
{
app.UseExceptionHandler("/Error");
app.UseHsts();
}
app.UseHttpsRedirection();
app.UseRouting();
app.UseAuthorization();
app.MapStaticAssets();
app.MapRazorPages().WithStaticAssets();
// ── Streaming endpoints as Minimal API ──────────────────────────────
// Razor Pages handlers go through the full MVC pipeline which buffers
// the response. Minimal API endpoints write directly to the response
// stream, so every token reaches the browser as soon as it is produced.
app.MapPost("/api/chat", async (HttpContext ctx, ILlmService llmService) =>
{
// Disable all response buffering layers (Kestrel + any middleware).
ctx.Features.Get<IHttpResponseBodyFeature>()?.DisableBuffering();
ctx.Response.ContentType = "text/plain; charset=utf-8";
ctx.Response.Headers.CacheControl = "no-cache";
ctx.Response.Headers["X-Accel-Buffering"] = "no";
var form = await ctx.Request.ReadFormAsync(ctx.RequestAborted);
var message = form["message"].ToString().Trim();
if (string.IsNullOrWhiteSpace(message))
{
ctx.Response.StatusCode = 400;
await ctx.Response.WriteAsync("Message is required.", ctx.RequestAborted);
return;
}
// Flush the response headers immediately so the browser's fetch()
// promise resolves right away and the UI can show "connected" status
// even before the first generated token arrives.
await ctx.Response.Body.FlushAsync(ctx.RequestAborted);
try
{
await foreach (var token in llmService.StreamAsync(
new ChatRequest { UserMessage = message }, ctx.RequestAborted))
{
await ctx.Response.WriteAsync(token, ctx.RequestAborted);
await ctx.Response.Body.FlushAsync(ctx.RequestAborted);
}
}
catch (OperationCanceledException) { /* browser aborted */ }
}).DisableAntiforgery();
app.Run();
}
}