Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,7 @@ replay_pid*

frontend/node_modules/
backend/local-db

# Gradle may extract Spring Boot 4.x spring.factories into the project root
# during dependency resolution; this file is a build artifact, not project source
META-INF/
20 changes: 18 additions & 2 deletions backend/build.gradle
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
plugins {
id 'java'
id 'org.springframework.boot' version '3.5.15'
id 'org.springframework.boot' version '4.1.0'
id 'io.spring.dependency-management' version '1.1.7'
id 'com.diffplug.spotless'
}
Expand All @@ -26,7 +26,7 @@ repositories {
}

ext {
set('springAiVersion', '1.1.6')
set('springAiVersion', '2.0.0')
}

dependencies {
Expand All @@ -38,12 +38,22 @@ dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jdbc'
implementation 'org.postgresql:postgresql'
implementation 'com.h2database:h2'
// Spring Boot 4 extracted FlywayAutoConfiguration out of spring-boot-autoconfigure into the
// dedicated spring-boot-flyway module, which flyway-core does not pull transitively. Without it
// migrations never run (neither at app startup nor in @DataJdbcTest slices).
implementation 'org.springframework.boot:spring-boot-flyway'
implementation 'org.flywaydb:flyway-core'
implementation 'org.flywaydb:flyway-database-postgresql'

implementation 'org.springframework.ai:spring-ai-starter-model-openai'
implementation 'org.springframework.ai:spring-ai-starter-model-chat-memory-repository-jdbc'

// Spring Boot 4 makes Jackson 3 (tools.jackson) the auto-configured JSON mapper and demotes
// the retained Jackson 2 modules to runtime-only. Our internal serialization (ChatMemoryService,
// Spring Data JDBC converters, tool-call persistence) still targets Jackson 2, so pull its
// Java-time module back onto the compile classpath. Version is managed by the Jackson 2 BOM.
implementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310'

implementation 'com.google.guava:guava:33.6.0-jre'

implementation 'io.github.bonede:tree-sitter:0.26.6'
Expand All @@ -61,6 +71,10 @@ dependencies {
annotationProcessor 'org.projectlombok:lombok'

testImplementation 'org.springframework.boot:spring-boot-starter-test'
// Spring Boot 4 extracted the per-slice test support out of spring-boot-test-autoconfigure
// into dedicated spring-boot-<module>-test artifacts (@DataJdbcTest, @AutoConfigureTestDatabase).
testImplementation 'org.springframework.boot:spring-boot-data-jdbc-test'
testImplementation 'org.springframework.boot:spring-boot-jdbc-test'
testImplementation 'org.springframework.security:spring-security-test'
testImplementation 'org.testcontainers:postgresql'
testImplementation 'org.testcontainers:junit-jupiter'
Expand All @@ -70,6 +84,8 @@ dependencies {
dependencyManagement {
imports {
mavenBom "org.springframework.ai:spring-ai-bom:${springAiVersion}"
// Spring Boot 4 no longer manages Testcontainers versions in its BOM, so import it directly.
mavenBom "org.testcontainers:testcontainers-bom:1.21.4"
}
}

Expand Down
277 changes: 169 additions & 108 deletions backend/gradle.lockfile

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package io.github.trialiya.kb.advisor;

import static io.github.trialiya.kb.model.chat.dto.ChatEventType.TOOL_PREPARING;

import io.github.trialiya.kb.service.ChatEventService;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
import org.springframework.ai.chat.client.ChatClientRequest;
import org.springframework.ai.chat.client.ChatClientResponse;
import org.springframework.ai.chat.client.advisor.api.StreamAdvisor;
import org.springframework.ai.chat.client.advisor.api.StreamAdvisorChain;
import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.chat.model.Generation;
import org.springframework.core.Ordered;
import reactor.core.publisher.Flux;

/**
* Самый внутренний advisor стримингового пути ({@link Ordered#LOWEST_PRECEDENCE} — ближе всего к
* модели). Находится внутри цикла {@link
* org.springframework.ai.chat.client.advisor.ToolCallingAdvisor}: вызывается на каждой итерации и
* видит сырой поток модели до того, как инструмент будет запущен.
*
* <p>Когда модель заканчивает формировать вызов инструмента (последний чанк несёт {@code
* finishReason=TOOL_CALLS} или {@code hasToolCalls()=true}), публикует в {@link ChatEventService}
* событие {@link io.github.trialiya.kb.model.chat.dto.ChatEventType#TOOL_PREPARING}. Фронт
* показывает «готовлю данные…» с задержкой — быстрые вызовы проходят незаметно.
*/
public class ToolPreparingAdvisor implements StreamAdvisor {

/** Ключ для передачи runId через advisor-параметры запроса. */
public static final String RUN_ID_PARAM = "RUN_ID";

private final ChatEventService events;

public ToolPreparingAdvisor(ChatEventService events) {
this.events = events;
}

@Override
public String getName() {
return "toolPreparingAdvisor";
}

@Override
public int getOrder() {
return Ordered.LOWEST_PRECEDENCE;
}

@Override
public Flux<ChatClientResponse> adviseStream(
ChatClientRequest request, StreamAdvisorChain chain) {
final String conversationId =
String.valueOf(request.context().getOrDefault(ChatMemory.CONVERSATION_ID, "?"));
final String runId = String.valueOf(request.context().getOrDefault(RUN_ID_PARAM, "?"));
// Одна публикация на итерацию: модель отдаёт несколько tool-call дельт —
// нам достаточно одного сигнала TOOL_PREPARING.
final AtomicBoolean preparingSent = new AtomicBoolean(false);

return chain.nextStream(request)
.doOnNext(
response -> {
if (preparingSent.get()) {
return;
}
final ChatResponse cr = response.chatResponse();
if (cr == null) {
return;
}
final boolean hasToolCalls = cr.hasToolCalls();
final String finishReason = finishReasonOf(cr);
final boolean toolCallFinish =
"TOOL_CALLS".equalsIgnoreCase(finishReason);
if ((hasToolCalls || toolCallFinish)
&& preparingSent.compareAndSet(false, true)) {
events.publish(conversationId, TOOL_PREPARING, runId, null, null);
}
});
}

private static String finishReasonOf(ChatResponse response) {
return Optional.ofNullable(response)
.map(ChatResponse::getResult)
.map(Generation::getMetadata)
.map(ChatGenerationMetadata::getFinishReason)
.orElse(null);
}
}
30 changes: 28 additions & 2 deletions backend/src/main/java/io/github/trialiya/kb/config/ChatConfig.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.github.trialiya.kb.config;

import io.github.trialiya.kb.advisor.ToolPreparingAdvisor;
import io.github.trialiya.kb.config.model.SubAgentConfig;
import io.github.trialiya.kb.functions.AttachmentFunction;
import io.github.trialiya.kb.functions.DocumentFunction;
Expand All @@ -10,6 +11,7 @@
import io.github.trialiya.kb.repository.ChatMessageRepository;
import io.github.trialiya.kb.repository.ChatTopicRepository;
import io.github.trialiya.kb.service.AttachmentService;
import io.github.trialiya.kb.service.ChatEventService;
import io.github.trialiya.kb.service.ChatMemoryService;
import io.github.trialiya.kb.service.DocumentService;
import io.github.trialiya.kb.service.GitService;
Expand All @@ -23,6 +25,8 @@
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
import org.springframework.ai.chat.client.advisor.ToolCallingAdvisor;
import org.springframework.ai.chat.client.advisor.api.Advisor;
import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.memory.MessageWindowChatMemory;
import org.springframework.ai.chat.model.ChatModel;
Expand Down Expand Up @@ -117,7 +121,8 @@ public ChatClient chatClientBuilder(
GitFunction gitFunction,
DocumentFunction documentFunction,
AttachmentService attachmentService,
ObjectProvider<SearchAgentService> searchAgentService) {
ObjectProvider<SearchAgentService> searchAgentService,
ChatEventService chatEventService) {
log.info("Model: {}", chatModel.getDefaultOptions());

List<Object> functions =
Expand All @@ -135,8 +140,29 @@ public ChatClient chatClientBuilder(
Stream.of(ToolCallbacks.from(functions.toArray()))
.map(RecordingToolCallback::new)
.toArray(ToolCallback[]::new);

// Advisor chain — outermost to innermost (ascending getOrder()):
//
// MessageChatMemoryAdvisor (HIGHEST_PRECEDENCE+200 = MIN+200) — OUTSIDE the loop:
// loads conversation history once before the loop starts and saves only the user
// message + final assistant reply. Tool request/response messages are NOT written to
// the store. This is intentional — our JDBC ChatMemoryRepository does not support
// ToolResponseMessage / tool-call serialization. Matches Spring AI 1.x behaviour.
//
// ToolCallingAdvisor (DEFAULT_ORDER = MIN+300) — drives the tool loop.
// Because MessageChatMemoryAdvisor is OUTSIDE the loop (order < DEFAULT_ORDER),
// ToolCallingAdvisor manages its own internal conversation accumulation across
// iterations and no call to .disableInternalConversationHistory() is needed.
//
// ToolPreparingAdvisor (LOWEST_PRECEDENCE = MAX) — INSIDE the loop:
// called on every iteration; emits TOOL_PREPARING before each tool execution round.
List<Advisor> advisors = new ArrayList<>();
advisors.add(MessageChatMemoryAdvisor.builder(chatMemory).build());
advisors.add(ToolCallingAdvisor.builder().toolCallingManager(toolCallingManager).build());
advisors.add(new ToolPreparingAdvisor(chatEventService));

return ChatClient.builder(chatModel)
.defaultAdvisors(MessageChatMemoryAdvisor.builder(chatMemory).build())
.defaultAdvisors(advisors)
.defaultSystem(sysPrompt)
.defaultToolCallbacks(callbacks)
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ public List<String> createMessage(
.toolContext(buildContext(conversationId, toolCollector))
.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId));
if (resolvedModel != null) {
spec = spec.options(OpenAiChatOptions.builder().model(resolvedModel).build());
spec = spec.options(OpenAiChatOptions.builder().model(resolvedModel));
}

final ChatResponse chatResponse = spec.call().chatResponse();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ public enum ChatEventType {
/**
* Ранний сигнал: модель начала формировать вызов инструмента (генерирует аргументы), но сам
* инструмент ещё не запущен и его имя пока недоступно. Без payload — фронт показывает «готовлю
* данные…», если ожидание затягивается.
* данные…», если ожидание затягивается. В данный момент работает не корректно, см. <a
* href="docs/todo/tool-preparing.md">TOOL_PREPARING</a>.
*/
TOOL_PREPARING,
/** Обновление одного вызова инструмента ({@code payload}: {@link ToolCallMessage}). */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package io.github.trialiya.kb.service;

import static io.github.trialiya.kb.advisor.ToolPreparingAdvisor.RUN_ID_PARAM;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.RUN_DONE;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.RUN_ERROR;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.RUN_STARTED;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.RUN_STOPPED;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.STREAM;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.TOOL_CALL;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.TOOL_CALLS;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.TOOL_PREPARING;
import static io.github.trialiya.kb.model.chat.dto.ChatEventType.USER_MESSAGE;

import io.github.trialiya.kb.model.chat.dto.ChatEventType;
Expand Down Expand Up @@ -161,14 +161,9 @@ private void run(
final String conversationId = handle.conversationId();
final String runId = handle.runId();
final AtomicInteger callIndex = new AtomicInteger(0);
// Защёлка «ранний сигнал отправлен»: пока модель формирует вызов инструмента (генерирует
// аргументы), видимого текста нет. Шлём TOOL_PREPARING один раз на такую паузу; сбрасываем,
// когда инструмент реально стартовал, чтобы следующая пауза снова дала сигнал.
final AtomicBoolean preparing = new AtomicBoolean(false);
final Consumer<Object> liveSink =
payload -> {
if (payload instanceof ToolCallMessage tcm) {
preparing.set(false);
if (tcm.toolCall().status() != ToolInvocationStatus.STARTED) {
chatMemoryService.saveToolCallIncremental(
conversationId,
Expand Down Expand Up @@ -198,9 +193,12 @@ private void run(
.toolContext(
ChatUtils.buildContext(
conversationId, toolCollector, handle.user()))
.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId));
.advisors(
a ->
a.param(ChatMemory.CONVERSATION_ID, conversationId)
.param(RUN_ID_PARAM, runId));
if (resolvedModel != null) {
spec = spec.options(OpenAiChatOptions.builder().model(resolvedModel).build());
spec = spec.options(OpenAiChatOptions.builder().model(resolvedModel));
}

final Disposable disposable =
Expand All @@ -214,7 +212,6 @@ private void run(
runId,
buffer,
liveSink,
preparing,
response),
error -> log.error("Stream error {}", conversationId, error),
() -> onComplete(handle, toolCollector, liveSink));
Expand All @@ -232,7 +229,6 @@ private void onNext(
String runId,
StringBuffer buffer,
Consumer<Object> liveSink,
AtomicBoolean preparing,
ChatResponse response) {
final String chunk =
Optional.ofNullable(response)
Expand All @@ -247,28 +243,9 @@ private void onNext(
.map(ChatGenerationMetadata::getFinishReason)
.orElse(null);

// Ранний сигнал: модель формирует вызов инструмента — в чанке появились tool-call дельты
// (или пришёл finishReason=TOOL_CALLS), но видимого текста нет. Шлём TOOL_PREPARING один
// раз; как только снова идёт текст — снимаем «подготовку», чтобы не залипал индикатор.
final boolean toolDelta = hasToolCallDelta(response) || "TOOL_CALLS".equals(finishReason);
if (chunk != null && !chunk.isEmpty()) {
preparing.set(false);
} else if (toolDelta && preparing.compareAndSet(false, true)) {
events.publish(conversationId, TOOL_PREPARING, runId, null, null);
}
// Копим ВЕСЬ текст ответа — он понадобится для частичного сохранения при stop/ошибке.
// На нормальном завершении ответ сохраняет advisor (по doOnComplete), а на отмене/ошибке
// (doOnComplete не срабатывает) сохраняем только мы. Поэтому на границе сегмента (модель
// пошла звать инструменты, finishReason=TOOL_CALLS) буфер НЕ сбрасываем — иначе при
// остановке после tool-call потеряются ранние сегменты; вместо сброса ставим разделитель.
if (chunk != null && !chunk.isEmpty()) {
buffer.append(chunk);
}
if ("TOOL_CALLS".equals(finishReason)
&& buffer.length() > 0
&& buffer.charAt(buffer.length() - 1) != '\n') {
buffer.append("\n\n");
}
liveSink.accept(new StreamMessage(chunk, finishReason));
printUsageStatistics(conversationId, response, finishReason);
}
Expand Down Expand Up @@ -367,20 +344,6 @@ private void printUsageStatistics(
});
}

/**
* Несёт ли чанк стрима данные вызова инструмента: в стриминге OpenAI имя/аргументы инструмента
* приходят отдельными дельтами с пустым текстом — это и есть самый ранний момент, когда видно,
* что модель готовит вызов (ещё до finishReason=TOOL_CALLS и до запуска самого инструмента).
*/
private static boolean hasToolCallDelta(ChatResponse response) {
return Optional.ofNullable(response)
.map(ChatResponse::getResult)
.map(Generation::getOutput)
.map(AssistantMessage::getToolCalls)
.map(calls -> !calls.isEmpty())
.orElse(false);
}

private static ChatEventType eventType(Object payload) {
return switch (payload) {
case ToolCallMessage _ -> TOOL_CALL;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ public SearchAgentResult run(
.model(config.modelId())
.maxTokens(config.maxTokens())
.temperature(0.0)
.internalToolExecutionEnabled(false) // we drive the loop ourselves
.toolCallbacks(toolCallbacks)
.toolContext(buildContext(conversationId))
.build();
Expand Down
5 changes: 3 additions & 2 deletions backend/src/main/resources/application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ kb:
base-url: ${CONFLUENCE_BASE_URL:}
api-token: ${CONFLUENCE_TOKEN:}

#logging:
# level:
logging:
level:
root: INFO
# org:
# springframework:
# jdbc:
Expand Down
Loading