Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
8e96d78
feat: Adds ConceptRecord, ElaborationTask, and ConceptConversation en…
Luburic Mar 25, 2026
c756362
fix: Resolves subtle bug and mildly improves performance for learner …
Luburic Mar 25, 2026
c409601
tests: Adds test suites for Elaboration use cases.
Luburic Mar 25, 2026
860606a
tests: Improves test infrastructure to avoid recreating the DB any ti…
Luburic Mar 26, 2026
b0cc885
fix: Resolves minor security concerns for elaboration authoring.
Luburic Mar 26, 2026
400a508
feat: Enhances capabilities for learning through conversation.
Luburic Mar 26, 2026
09ab494
feat: Expand concept record to include relationships between KPs
Luburic Apr 6, 2026
0f6d9e3
fix: Resolves issue with elaboration task authoring and missing data …
Luburic Apr 10, 2026
dc469e0
fix: Removes order from ConceptRecord child entities.
Luburic Apr 10, 2026
9c766a6
fix: Improves KeyRelation model to better support authoring.
Luburic Apr 11, 2026
c251a46
refactor: Merges ConceptRecord and ElaborationTask into a single aggr…
Luburic Apr 11, 2026
ed059b1
fix: Simplifies HTTP data flow for concept authoring.
Luburic Apr 12, 2026
818c712
refactor: Mild naming and logic reorganization.
Luburic Apr 15, 2026
1537775
refactor: Removes TurnOrchestrator as it was just delegating to agents.
Luburic Apr 15, 2026
e39347c
refactor: Mildly reworks TurnEvaluation and ConversationTurn model.
Luburic Apr 16, 2026
ae2fbc0
refactor: Removes ConversationState dto to simplify code.
Luburic Apr 16, 2026
038b4e3
fix: LLM can now answer clarifying questions instead of treating ever…
Luburic Apr 20, 2026
184bc3b
fix: Improves elaboration conversation UX.
Luburic Apr 20, 2026
3732a0a
feat: Introduces multiple new agents to support more reliable and cos…
Luburic Apr 21, 2026
265acfd
refactor: Extracts Streaming and Structured Agent base clases to enfo…
Luburic Apr 21, 2026
6db6d06
fix: Improves streaming agent reliability and issue with saving broke…
Luburic Apr 22, 2026
a3c23c4
feat: Adds logging of key metrics for LLM conversation observability.
Luburic Apr 22, 2026
5a23deb
feat: Tracks actual token usage per agent during concept elaboration.
Luburic Apr 22, 2026
bda7b15
refactor: Simplifies design to support logging of token usage.
Luburic Apr 22, 2026
c3a3103
refactor: Major. Compressess ConceptRecord into a set of entities and…
Luburic Apr 24, 2026
ea00c7c
refactor: Minor reductions in AgentOrchestrator complexity.
Luburic Apr 24, 2026
380e897
refactor: Removes BoundaryCondition concept and refactors AgentOrches…
Luburic Apr 27, 2026
5da7eca
refactor: Simplifies structured agent data flow.
Luburic Apr 27, 2026
f6869e3
refactor: Simplifies agent configuration.
Luburic Apr 27, 2026
6e18a15
chore: Minor cleanup performed while understanding the data flow.
Luburic Apr 28, 2026
3164c85
refactor: Simplifies data flow.
Luburic Apr 28, 2026
03507bb
refactor: Reworks Probe to become a domain concept with cleaner design.
Luburic Apr 29, 2026
cb32be6
chore: Resolves some sonar issues.
Luburic Apr 29, 2026
c622ab7
refactor: Major rework of Orchestrator that compressess previous rout…
Luburic Apr 29, 2026
84c8066
refactor: Minor cleanup while reviewing Orchestrator.
Luburic May 1, 2026
1689aca
refactor: Reduces 'agents' to 'llmRequests' and simplifies design acc…
Luburic May 1, 2026
ee041f2
refactor: Improves class organization for better code discovery.
Luburic May 1, 2026
be91ed2
chore: Resolves a few SCA errors.
Luburic May 2, 2026
b043612
fix: Removes concept record leakage to learner.
Luburic May 2, 2026
7f0774e
chore: Resolves SCA issues.
Luburic May 2, 2026
21dbc24
fix: Improves prompts.
Luburic May 3, 2026
a21cf42
refactor: Improves scoring mechanism by transitioning from holistic e…
Luburic May 3, 2026
fb46557
chore: Resolves SCA issues.
Luburic May 3, 2026
9e689d3
fix!: Improves CritiquePrompt behavior.
Luburic May 5, 2026
fd8c957
redesign!: Reworks the complete UX of concept elaboration to focus on…
Luburic May 7, 2026
fd73204
refactor: Compresses learner turn, evaluation, system turn, and feedb…
Luburic May 7, 2026
243a487
fix: Resolves build and test issue.
Luburic May 7, 2026
8e424c5
refactor: Renames FeedbackTarget > Probe; TurnEvaluation > RoundEvalu…
Luburic May 7, 2026
4a11bfe
feat: Expands ScoreTarget to include evidence and aligns prompts to u…
Luburic May 8, 2026
a89af5f
refactor: Simplifies prompt infrastructure and improves misconception…
Luburic May 8, 2026
16cd6a9
feat: Utility for extracting elaboration conversations from DB for cr…
Luburic May 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -551,4 +551,4 @@ $RECYCLE.BIN/
src/**/Migrations/*

.claude

plan
79 changes: 63 additions & 16 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ An intelligent tutoring system for structured learning with knowledge and skill

## Architecture

27 projects organized as 5 domain modules, each with 4 layers, plus shared BuildingBlocks and host.
Projects organized as 6 domain modules, each with 4 layers, plus shared BuildingBlocks and host.

**Layer Responsibilities:**
- **API** - Public contracts, DTOs, internal service interfaces (what other modules can consume)
Expand Down Expand Up @@ -37,7 +37,7 @@ An intelligent tutoring system for structured learning with knowledge and skill

**Key Entities:**
- **Course** - Top-level container (code, name, description, startDate, isArchived)
- **KnowledgeUnit** - Weekly learning unit within a course, contains KCs and Tasks
- **KnowledgeUnit** - Weekly learning unit within a course, contains Reflections, KCs, Tasks
- **LearnerGroup** - Groups learners for easier management and monitoring
- **WeeklyFeedback** - Instructor's weekly assessment of learner progress (Red/Yellow/Green semaphore + comment)
- **Reflection** - Structured questions for learners to reflect on their learning
Expand All @@ -60,20 +60,16 @@ An intelligent tutoring system for structured learning with knowledge and skill
- **KnowledgeComponent** - Atomic learning objective (code, name, expectedDuration)
- **AssessmentItem** - Questions to test understanding: MCQ (single choice), MRQ (multiple choice), SAQ (short answer)
- **InstructionalItem** - Learning content: Text, Video, or Image with ordering
- **SessionTracker** - Manages a learner's session state for a KC
- **Submission** - Learner's answer to an assessment item
- **Evaluation** - Feedback on a submission (correct/incorrect, hints, explanations)
- **KCMastery** - Tracks whether a learner has mastered a KC
- **MoveOn Criteria** - Rules for when a KC is considered satisfied (Completed, Passed, CompletedAndPassed, CompletedOrPassed)
- **KcMastery** - Tracks whether a learner has mastered a KC

**Use Cases:**
- **Authoring**: Instructors create KCs with expected duration, add/reorder assessment items (MCQ/MRQ/SAQ with feedback patterns), add/reorder instructional items (text/video/image), clone KCs for reuse
- **Learning**: Learners launch a learning session for a KC, system selects appropriate assessment items based on progress, learners view instructional content, submit answers and receive immediate evaluation with feedback, can pause/continue/terminate sessions
- **Mastery**: System tracks completion (all items seen) and passing (sufficient correct answers), applies move-on criteria to determine if KC is satisfied, records mastery status
- **Analytics**: Instructors view KC statistics (submission counts, correctness rates), system detects common misconceptions from wrong answer patterns, tracks most frequent errors per assessment

**Domain Events:** SessionLaunched, KCStarted, KCCompleted, KCPassed, KCSatisfied (used for analytics and cross-module notifications)

**Dependencies:** → Courses.API (for unit context)

### LearningTasks
Expand All @@ -82,19 +78,14 @@ An intelligent tutoring system for structured learning with knowledge and skill
**Key Entities:**
- **LearningTask** - A practical exercise (name, description, maxPoints, isTemplate)
- **Activity** - A step within a task, contains examples, guidance text, and submission requirements
- **StepProgress** - Tracks learner's progress on a single step (answer, submission time)
- **TaskProgress** - Overall progress on a task (started, completed, graded status)
- **StandardEvaluation** - Instructor's grade and comment for a step
- **SubmissionFormat** - Defines how learners should submit (text, file upload, etc.)

**Use Cases:**
- **Authoring**: Instructors create tasks with multiple steps (activities), define examples with video walkthroughs, write guidance text for each step, specify submission format and point values, clone tasks as templates, move tasks between units
- **Learning**: Learners view task list for a unit with progress summaries, open a task to see step-by-step instructions, access examples (watch videos with play/pause/finish tracking), read guidance materials, submit answers for each step
- **Progress**: System creates/updates task progress records, tracks which steps are completed, records submission timestamps and content
- **Grading**: Instructors view learner submissions, grade individual steps with points and comments, view group summaries showing progress across all learners, bulk retrieve progress for a cohort

**Domain Events:** TaskOpened, TaskCompleted, TaskGraded, StepOpened, StepSubmitted, StepGraded, ExampleOpened, GuidanceOpened, VideoPlayed, VideoPaused, VideoFinished (for learning analytics)

**Dependencies:** → Courses.API (for unit context)

### LearningUtils
Expand All @@ -105,7 +96,6 @@ An intelligent tutoring system for structured learning with knowledge and skill

**Use Cases:**
- **Note-taking**: Learners create notes while studying a unit, update note content, reorder notes, delete notes, retrieve all notes for a unit
- **Export**: Learners export their notes to a downloadable file format

**Dependencies:** → Stakeholders.API (for learner context)

Expand Down Expand Up @@ -155,7 +145,6 @@ Generic AI services available for module-specific features. Core defines abstrac
- `IAiChatService` - Chat completions with `CompleteAsync` (returns full response) and `StreamAsync` (token streaming). Configure via `CompletionRequest` (messages, system prompt, temperature, max tokens).
- `ITextEmbeddingService` - Convert text to vectors via `GenerateEmbeddingAsync` (single) or `GenerateEmbeddingsAsync` (batch).
- `IVectorStore<TMetadata>` - Store/search embeddings with custom metadata. Supports `UpsertAsync`, `SearchAsync` (cosine similarity with filters), `DeleteAsync`. Each module registers its own instance with `AddVectorStore<TMetadata>()`.
- `IInputGuardrail` / `IOutputGuardrail` - Validate user input before LLM calls and LLM output before returning to users. Use `CompositeInputGuardrail` / `CompositeOutputGuardrail` to chain multiple validators.

**Registration:**
```csharp
Expand Down Expand Up @@ -225,7 +214,65 @@ When creating a DTO and matching domain object in a Module.Core project, look fo
| `LoggingInterceptor` | Automatic logging of service call results | Cross-cutting logging concern |
| `ProxiedServiceExtensions.AddProxiedScoped` | Register service with interceptors (e.g., logging) | Module DI registration |

# Coding Style
# Code generation guidelines

## 1. Think Before Coding

**Don't assume. Don't hide confusion. Surface tradeoffs.**

Before implementing:
- State your assumptions explicitly. If uncertain, ask.
- If multiple interpretations exist, present them - don't pick silently.
- If a simpler approach exists, say so. Push back when warranted.
- If something is unclear, stop. Name what's confusing. Ask.

## 2. Simplicity First

**Minimum code that solves the problem. Nothing speculative.**

- No features beyond what was asked.
- No abstractions for single-use code.
- No "flexibility" or "configurability" that wasn't requested.
- No error handling for impossible scenarios.
- If you write 200 lines and it could be 50, rewrite it.

Ask yourself: "Would a senior engineer say this is overcomplicated?" If yes, simplify.

## 3. Surgical Changes

**Touch only what you must. Clean up only your own mess.**

When editing existing code:
- Don't "improve" adjacent code, comments, or formatting.
- Don't refactor things that aren't broken.
- Match existing style, even if you'd do it differently.
- If you notice unrelated dead code, mention it - don't delete it.

When your changes create orphans:
- Remove imports/variables/functions that YOUR changes made unused.
- Don't remove pre-existing dead code unless asked.

The test: Every changed line should trace directly to the user's request.

## 4. Goal-Driven Execution

**Define success criteria. Loop until verified.**

Transform tasks into verifiable goals:
- "Add validation" → "Write tests for invalid inputs, then make them pass"
- "Fix the bug" → "Write a test that reproduces it, then make it pass"
- "Refactor X" → "Ensure tests pass before and after"

For multi-step tasks, state a brief plan:
```
1. [Step] → verify: [check]
2. [Step] → verify: [check]
3. [Step] → verify: [check]
```

Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification.

# Style guidelines
- Methods with 3 or less parameters should have their headers and invocations fit into one row.
- Methods with more than 3 parameters should have their headers and invocations separate into multiple rows, where each row should contain 2 or 3 parameters.
- Methods with more than 3 parameters should have their headers and invocations separate into multiple rows, where each row should contain 3 parameters.
- Do not write method headers and invocations where one row is one parameter.
6 changes: 6 additions & 0 deletions Clean CaDET Tutor.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
<Project Path="src/Modules/Courses/Tutor.Courses.Infrastructure/Tutor.Courses.Infrastructure.csproj" />
<Project Path="src/Modules/Courses/Tutor.Courses.Tests/Tutor.Courses.Tests.csproj" />
</Folder>
<Folder Name="/src/Modules/Elaborations/">
<Project Path="src/Modules/Elaborations/Tutor.Elaborations.API/Tutor.Elaborations.API.csproj" />
<Project Path="src/Modules/Elaborations/Tutor.Elaborations.Core/Tutor.Elaborations.Core.csproj" />
<Project Path="src/Modules/Elaborations/Tutor.Elaborations.Infrastructure/Tutor.Elaborations.Infrastructure.csproj" />
<Project Path="src/Modules/Elaborations/Tutor.Elaborations.Tests/Tutor.Elaborations.Tests.csproj" />
</Folder>
<Folder Name="/src/Modules/KnowledgeComponents/">
<Project Path="src/Modules/KnowledgeComponents/Tutor.KnowledgeComponents.API/Tutor.KnowledgeComponents.API.csproj" />
<Project Path="src/Modules/KnowledgeComponents/Tutor.KnowledgeComponents.Core/Tutor.KnowledgeComponents.Core.csproj" />
Expand Down
187 changes: 187 additions & 0 deletions src/BuildingBlocks/Tutor.BuildingBlocks.AI.Core/Agents/LlmCaller.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text.Json;
using FluentResults;
using Microsoft.Extensions.Logging;
using Tutor.BuildingBlocks.AI.Core.Conversations;

namespace Tutor.BuildingBlocks.AI.Core.Agents;

public abstract class LlmCaller
{
private const int MaxJsonAttempts = 2;

private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};

private readonly IAiChatService _chatService;
private readonly ITurnUsageTracker _usageTracker;
private readonly ILogger _logger;

protected LlmCaller(IAiChatService chatService, ITurnUsageTracker usageTracker, ILogger logger)
{
_chatService = chatService;
_usageTracker = usageTracker;
_logger = logger;
}

protected async Task<Result<TResponse>> CompleteJsonAsync<TResponse>(
CompletionRequest request, string label, CancellationToken ct) where TResponse : class
{
var sw = Stopwatch.StartNew();
var promptTokens = 0;
var completionTokens = 0;
var charCount = 0;
var attempts = 0;
var status = "failure";
string? failureCategory = "transient";

try
{
for (var attempt = 0; attempt < MaxJsonAttempts; attempt++)
{
attempts = attempt + 1;
var completion = await _chatService.CompleteAsync(request, ct);
if (completion.IsFailed)
{
failureCategory = "transient";
continue;
}

promptTokens += completion.Value.Usage.PromptTokens;
completionTokens += completion.Value.Usage.CompletionTokens;
charCount += completion.Value.Content.Length;

if (ShouldSkipRetry(completion.Value.FinishReason))
{
_logger.LogWarning("{Label} skipping retry due to deterministic finish reason '{FinishReason}'.",
label, completion.Value.FinishReason);
failureCategory = "permanent";
break;
}

var parsed = TryDeserialize<TResponse>(completion.Value.Content, label);
if (parsed is null)
{
failureCategory = "parse";
continue;
}

status = "ok";
failureCategory = null;
return parsed;
}

return Result.Fail($"{label} failed.");
}
finally
{
sw.Stop();
var level = status == "ok" ? LogLevel.Information : LogLevel.Warning;
_logger.Log(level,
"Agent={Agent} Status={Status} DurationMs={DurationMs} PromptTokens={PromptTokens} " +
"CompletionTokens={CompletionTokens} ResponseChars={ResponseChars} Attempts={Attempts} " +
"FailureCategory={FailureCategory}",
label, status, sw.ElapsedMilliseconds,
promptTokens, completionTokens, charCount, attempts, failureCategory);

Check warning on line 88 in src/BuildingBlocks/Tutor.BuildingBlocks.AI.Core/Agents/LlmCaller.cs

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Evaluation of this argument may be expensive and unnecessary if logging is disabled

See more on https://sonarcloud.io/project/issues?id=Clean-CaDET_tutor&issues=AZ3O1apMhSlndr8j-Z9k&open=AZ3O1apMhSlndr8j-Z9k&pullRequest=159
}
}

protected async IAsyncEnumerable<StreamOutput> StreamAsync(
CompletionRequest request, string label, [EnumeratorCancellation] CancellationToken ct)
{
var sw = Stopwatch.StartNew();
var usageBefore = _usageTracker.Total;
var charCount = 0;
var status = "ok";
string? failureCategory = null;

try
{
var enumerator = _chatService.StreamAsync(request, ct).GetAsyncEnumerator(ct);
try
{
while (true)
{
string? token = null;
string? failure = null;
var moved = false;

try
{
moved = await enumerator.MoveNextAsync();
if (moved) token = enumerator.Current;
}
catch (OperationCanceledException)
{
status = "cancelled";
failureCategory = "cancelled";
throw;
}
catch (Exception ex)
{
failure = $"Streaming call failed: {ex.Message}";
}

if (failure != null)
{
status = "failure";
failureCategory = "transient";
yield return new StreamFailure(failure);
yield break;
}
if (!moved) break;

if (!string.IsNullOrEmpty(token))
{
charCount += token.Length;
yield return new StreamToken(token);
}
}
}
finally
{
await enumerator.DisposeAsync();
}

if (charCount == 0)
{
status = "empty";
failureCategory = "empty";
yield return new StreamFailure("Empty response from LLM.");
}
}
finally
{
sw.Stop();
var delta = _usageTracker.Total.Subtract(usageBefore);
var level = status == "ok" ? LogLevel.Information : LogLevel.Warning;
_logger.Log(level,
"Agent={Agent} Status={Status} DurationMs={DurationMs} PromptTokens={PromptTokens} " +
"CompletionTokens={CompletionTokens} ResponseChars={ResponseChars} Attempts={Attempts} " +
"FailureCategory={FailureCategory}",
label, status, sw.ElapsedMilliseconds,
delta.PromptTokens, delta.CompletionTokens, charCount, 1, failureCategory);

Check warning on line 166 in src/BuildingBlocks/Tutor.BuildingBlocks.AI.Core/Agents/LlmCaller.cs

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Evaluation of this argument may be expensive and unnecessary if logging is disabled

See more on https://sonarcloud.io/project/issues?id=Clean-CaDET_tutor&issues=AZ3O1apMhSlndr8j-Z9j&open=AZ3O1apMhSlndr8j-Z9j&pullRequest=159
}
}

private static bool ShouldSkipRetry(string? finishReason) =>
string.Equals(finishReason, "length", StringComparison.OrdinalIgnoreCase)
|| string.Equals(finishReason, "max_tokens", StringComparison.OrdinalIgnoreCase)
|| string.Equals(finishReason, "content_filter", StringComparison.OrdinalIgnoreCase);

private TResponse? TryDeserialize<TResponse>(string json, string label) where TResponse : class
{
try
{
return JsonSerializer.Deserialize<TResponse>(json, JsonOptions);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "{Label} failed to parse LLM response.", label);
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace Tutor.BuildingBlocks.AI.Core.Agents;

/// <summary>
/// Output of a streaming agent call. Either a content token or a terminal failure.
/// </summary>
public abstract record StreamOutput;

public sealed record StreamToken(string Content) : StreamOutput;

public sealed record StreamFailure(string Reason) : StreamOutput;
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ public record CompletionResponse
public record TokenUsage(int PromptTokens, int CompletionTokens)
{
public int TotalTokens => PromptTokens + CompletionTokens;

public TokenUsage Subtract(TokenUsage other) =>
new(PromptTokens - other.PromptTokens, CompletionTokens - other.CompletionTokens);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Tutor.BuildingBlocks.AI.Core.Conversations;

/// <summary>
/// Accumulates <see cref="TokenUsage"/> across every LLM call within one scope (typically one HTTP request / one conversation turn).
/// Implementations must be thread-safe.
/// </summary>
public interface ITurnUsageTracker
{
void Add(TokenUsage usage);

TokenUsage Total { get; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public static IServiceCollection AddAIServices(this IServiceCollection services,

var kernel = kernelBuilder.Build();
services.AddSingleton(kernel);
services.AddSingleton<IAiChatService, SemanticKernelChatService>();
services.AddScoped<ITurnUsageTracker, TurnUsageTracker>();
services.AddScoped<IAiChatService, SemanticKernelChatService>();

if (!string.IsNullOrWhiteSpace(configuration.EmbeddingModelId))
{
Expand Down
Loading
Loading