diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 000000000..8b311a3fc
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,5 @@
+{
+ "permissions": {
+ "allow": []
+ }
+}
diff --git a/.dockerignore b/.dockerignore
index 45c1def32..385a6449f 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -37,8 +37,6 @@ build/
*.tgz
# Backend
-backend/assets/*
-!backend/assets/test.wav
backend/flower_db.sqlite
uploads/
test/
@@ -60,4 +58,4 @@ assets/
.Spotlight-V100
.Trashes
ehthumbs.db
-Thumbs.db
\ No newline at end of file
+Thumbs.db
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..6313b56c5
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
diff --git a/.github/workflows/auto-build-data-process-dev.yml b/.github/workflows/auto-build-data-process-dev.yml
index c9885170e..6be8bf638 100644
--- a/.github/workflows/auto-build-data-process-dev.yml
+++ b/.github/workflows/auto-build-data-process-dev.yml
@@ -7,14 +7,14 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'make/data_process/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-build-doc-dev.yml b/.github/workflows/auto-build-doc-dev.yml
index 697aa0204..7c2cd46d7 100644
--- a/.github/workflows/auto-build-doc-dev.yml
+++ b/.github/workflows/auto-build-doc-dev.yml
@@ -7,12 +7,12 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'doc/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'doc/**'
- '.github/workflows/**'
diff --git a/.github/workflows/auto-build-main-dev.yml b/.github/workflows/auto-build-main-dev.yml
index dbd69ac12..2815c50df 100644
--- a/.github/workflows/auto-build-main-dev.yml
+++ b/.github/workflows/auto-build-main-dev.yml
@@ -7,14 +7,14 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'make/main/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-build-mcp-dev.yml b/.github/workflows/auto-build-mcp-dev.yml
index dacf04749..03aea08b2 100644
--- a/.github/workflows/auto-build-mcp-dev.yml
+++ b/.github/workflows/auto-build-mcp-dev.yml
@@ -7,14 +7,14 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'make/mcp/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
diff --git a/.github/workflows/auto-build-terminal-dev.yml b/.github/workflows/auto-build-terminal-dev.yml
index fbc251edb..62fc20165 100644
--- a/.github/workflows/auto-build-terminal-dev.yml
+++ b/.github/workflows/auto-build-terminal-dev.yml
@@ -7,12 +7,12 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'make/terminal/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'make/terminal/**'
- '.github/workflows/**'
diff --git a/.github/workflows/auto-build-web-dev.yml b/.github/workflows/auto-build-web-dev.yml
index 28f967894..a5abeb0b3 100644
--- a/.github/workflows/auto-build-web-dev.yml
+++ b/.github/workflows/auto-build-web-dev.yml
@@ -7,13 +7,13 @@ concurrency:
on:
workflow_dispatch:
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- 'make/web/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- 'make/web/**'
diff --git a/.github/workflows/auto-unit-test.yml b/.github/workflows/auto-unit-test.yml
index 6addafa22..dace8dab6 100644
--- a/.github/workflows/auto-unit-test.yml
+++ b/.github/workflows/auto-unit-test.yml
@@ -12,14 +12,14 @@ on:
required: false
default: '["ubuntu-24.04-arm"]'
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
- 'test/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'backend/**'
- 'sdk/**'
@@ -36,7 +36,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
- python-version: '3.10'
+ python-version: '3.11'
- name: Install uv
run: pip install --upgrade uv
@@ -68,26 +68,7 @@ jobs:
echo "✅ All tests passed successfully."
fi
- # Detect architecture
- - name: Detect architecture
- id: arch
- run: echo "arch=$(uname -m)" >> $GITHUB_OUTPUT
-
- # Use Python uploader on ARM
- - name: Upload coverage to Codecov (Python uploader on ARM)
- if: startsWith(steps.arch.outputs.arch, 'arm') || startsWith(steps.arch.outputs.arch, 'aarch64')
- run: |
- pip install --upgrade codecov
- codecov \
- -t ${{ secrets.CODECOV_TOKEN }} \
- -f test/coverage.xml \
- -F unittests \
- -n codecov-umbrella \
- -v
-
- # Use official action on x86
- - name: Upload coverage to Codecov (Official Action on x86)
- if: steps.arch.outputs.arch == 'x86_64'
+ - name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
files: test/coverage.xml
@@ -96,4 +77,3 @@ jobs:
name: codecov-umbrella
fail_ci_if_error: false
verbose: true
- directory: .
diff --git a/.github/workflows/auto-web-check-dev.yml b/.github/workflows/auto-web-check-dev.yml
index cd107b6e5..ae831a3fb 100644
--- a/.github/workflows/auto-web-check-dev.yml
+++ b/.github/workflows/auto-web-check-dev.yml
@@ -11,12 +11,12 @@ on:
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
default: '["ubuntu-latest"]'
pull_request:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- '.github/workflows/**'
push:
- branches: [develop]
+ branches: [develop, 'release/**', 'hotfix/**']
paths:
- 'frontend/**'
- '.github/workflows/**'
diff --git a/.github/workflows/build-offline-package.yml b/.github/workflows/build-offline-package.yml
new file mode 100644
index 000000000..6619cf764
--- /dev/null
+++ b/.github/workflows/build-offline-package.yml
@@ -0,0 +1,105 @@
+name: Build Offline Deployment Package
+
+on:
+ workflow_dispatch:
+ inputs:
+ include_source:
+ description: 'Include source code in the package'
+ required: false
+ default: true
+ type: boolean
+
+jobs:
+ build-offline-package:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ platform: [amd64, arm64]
+
+ steps:
+ - name: Free disk space
+ uses: jlumbroso/free-disk-space@main
+ with:
+ tool-cache: false
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: true
+ docker-images: false
+ swap-storage: true
+
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set version and platform variables
+ id: set-vars
+ run: |
+ PLATFORM="${{ matrix.platform }}"
+ REF_TYPE="${{ github.ref_type }}"
+ REF_NAME="${{ github.ref_name }}"
+
+ if [ "$REF_TYPE" = "tag" ]; then
+ VERSION="$REF_NAME"
+ elif [ "$REF_TYPE" = "branch" ]; then
+ if [ "$REF_NAME" = "main" ]; then
+ VERSION="latest"
+ else
+ VERSION="${REF_NAME//\//-}"
+ fi
+ else
+ VERSION="latest"
+ fi
+
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
+ echo "platform=$PLATFORM" >> $GITHUB_OUTPUT
+ echo "package-name=nexent-offline-${PLATFORM}-${VERSION}" >> $GITHUB_OUTPUT
+
+ - name: Build offline package
+ run: |
+ chmod +x scripts/offline/build_offline_package.sh
+
+ ./scripts/offline/build_offline_package.sh \
+ --version "${{ steps.set-vars.outputs.version }}" \
+ --platform "${{ matrix.platform }}" \
+ --output-dir ./offline-output \
+ --include-source "${{ inputs.include_source }}"
+
+
+
+ - name: Create ZIP package
+ run: |
+ PACKAGE_NAME="${{ steps.set-vars.outputs.package-name }}"
+
+ cd offline-output
+ zip -r "../${PACKAGE_NAME}.zip" .
+ cd ..
+
+ echo "Package created: ${PACKAGE_NAME}.zip"
+
+ ls -lh "${PACKAGE_NAME}.zip"
+
+ - name: Upload artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: ${{ steps.set-vars.outputs.package-name }}
+ path: ${{ steps.set-vars.outputs.package-name }}.zip
+ retention-days: 30
+
+ - name: Summary
+ run: |
+ echo ""
+ echo "========================================"
+ echo "Offline Package Build Summary"
+ echo "========================================"
+ echo "Version: ${{ steps.set-vars.outputs.version }}"
+ echo "Platform: ${{ matrix.platform }}"
+ echo "Package: ${{ steps.set-vars.outputs.package-name }}.zip"
+ echo "Ref Type: ${{ github.ref_type }}"
+ echo "Ref Name: ${{ github.ref_name }}"
+ echo "========================================"
+ echo ""
+ echo "Package contents:"
+ unzip -l "${{ steps.set-vars.outputs.package-name }}.zip" | head -50
\ No newline at end of file
diff --git a/.github/workflows/docker-build-push-mainland.yml b/.github/workflows/docker-build-push-mainland.yml
index 1aa41b560..8c215c7ec 100644
--- a/.github/workflows/docker-build-push-mainland.yml
+++ b/.github/workflows/docker-build-push-mainland.yml
@@ -16,10 +16,15 @@ on:
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
required: true
default: '["ubuntu-latest"]'
+ push:
+ branches:
+ - main
+ tags:
+ - 'v*'
jobs:
build-and-push-main-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -32,20 +37,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push main image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag main image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64
- name: Push latest main image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64
build-and-push-main-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -58,20 +63,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/main/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push main image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag main image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64
- name: Push latest main image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent:arm64
build-and-push-data-process-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -93,20 +98,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push data process image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag data process image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64
- name: Push latest data process image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64
build-and-push-data-process-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -128,20 +133,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/data_process/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push data process image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag data process image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64
- name: Push latest data process image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:arm64
build-and-push-web-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -154,20 +159,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push web image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag web image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64
- name: Push latest web image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64
build-and-push-web-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -180,20 +185,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/web/Dockerfile --build-arg MIRROR=https://registry.npmmirror.com --build-arg APK_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push web image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag web image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64
- name: Push latest web image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:arm64
build-and-push-terminal-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -206,20 +211,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/terminal/Dockerfile .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push terminal image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag terminal image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64
- name: Push latest terminal image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64
build-and-push-terminal-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -232,20 +237,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/terminal/Dockerfile .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push terminal image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag terminal image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64
- name: Push latest terminal image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:arm64
build-and-push-mcp-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -258,20 +263,20 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/amd64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push MCP image (amd64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag MCP image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64
- name: Push latest MCP image (amd64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64
build-and-push-mcp-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -284,16 +289,16 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
+ docker buildx build --platform linux/arm64 --load -t ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 -f make/mcp/Dockerfile --build-arg MIRROR=https://pypi.tuna.tsinghua.edu.cn/simple --build-arg APT_MIRROR=tsinghua .
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Push MCP image (arm64) to Tencent Cloud
- run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64
+ run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag MCP image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64
- name: Push latest MCP image (arm64) to Tencent Cloud
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:arm64
manifest-push-main:
@@ -305,13 +310,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for main (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for main (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent:amd64 \
@@ -327,13 +333,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for data-process (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for data-process (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-data-process:amd64 \
@@ -349,13 +356,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for web (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for web (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-web:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-web:amd64 \
@@ -371,13 +379,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for terminal (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for terminal (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-ubuntu-terminal:amd64 \
@@ -393,13 +402,14 @@ jobs:
- name: Login to Tencent Cloud
run: echo ${{ secrets.TCR_PASSWORD }} | docker login ccr.ccs.tencentyun.com --username=${{ secrets.TCR_USERNAME }} --password-stdin
- name: Create and push manifest for mcp (Tencent Cloud)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }} \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-amd64 \
- ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}-arm64
- docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ inputs.version }}
+ docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for mcp (Tencent Cloud)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:latest \
ccr.ccs.tencentyun.com/nexent-hub/nexent-mcp:amd64 \
diff --git a/.github/workflows/docker-build-push-overseas.yml b/.github/workflows/docker-build-push-overseas.yml
index d19c2600a..dcbe9d642 100644
--- a/.github/workflows/docker-build-push-overseas.yml
+++ b/.github/workflows/docker-build-push-overseas.yml
@@ -16,10 +16,15 @@ on:
description: 'runner array in json format (e.g. ["ubuntu-latest"] or ["self-hosted"])'
required: true
default: '["ubuntu-latest"]'
+ push:
+ branches:
+ - main
+ tags:
+ - 'v*'
jobs:
build-and-push-main-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -32,20 +37,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent:${{ inputs.version }}-amd64 --load -f make/main/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/main/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push main image (amd64) to DockerHub
- run: docker push nexent/nexent:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag main image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent:${{ inputs.version }}-amd64 nexent/nexent:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent:amd64
- name: Push latest main image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent:amd64
build-and-push-main-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -58,20 +63,20 @@ jobs:
uses: actions/checkout@v4
- name: Build main image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent:${{ inputs.version }}-arm64 --load -f make/main/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/main/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push main image (arm64) to DockerHub
- run: docker push nexent/nexent:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag main image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent:${{ inputs.version }}-arm64 nexent/nexent:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent:arm64
- name: Push latest main image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent:arm64
build-and-push-data-process-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -93,20 +98,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ inputs.version }}-amd64 --load -f make/data_process/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/data_process/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push data process image (amd64) to DockerHub
- run: docker push nexent/nexent-data-process:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag data process image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-data-process:${{ inputs.version }}-amd64 nexent/nexent-data-process:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-data-process:amd64
- name: Push latest data process image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-data-process:amd64
build-and-push-data-process-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Free up disk space on GitHub runner
run: |
@@ -128,20 +133,20 @@ jobs:
rm -rf .git .gitattributes
- name: Build data process image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ inputs.version }}-arm64 --load -f make/data_process/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/data_process/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push data process image (arm64) to DockerHub
- run: docker push nexent/nexent-data-process:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag data process image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-data-process:${{ inputs.version }}-arm64 nexent/nexent-data-process:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-data-process:arm64
- name: Push latest data process image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-data-process:arm64
build-and-push-web-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -154,20 +159,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ inputs.version }}-amd64 --load -f make/web/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/web/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push web image (amd64) to DockerHub
- run: docker push nexent/nexent-web:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag web image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-web:${{ inputs.version }}-amd64 nexent/nexent-web:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-web:amd64
- name: Push latest web image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-web:amd64
build-and-push-web-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -180,20 +185,20 @@ jobs:
uses: actions/checkout@v4
- name: Build web image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ inputs.version }}-arm64 --load -f make/web/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/web/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push web image (arm64) to DockerHub
- run: docker push nexent/nexent-web:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag web image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-web:${{ inputs.version }}-arm64 nexent/nexent-web:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-web:arm64
- name: Push latest web image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-web:arm64
build-and-push-terminal-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -206,20 +211,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 --load -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/terminal/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push terminal image (amd64) to DockerHub
- run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag terminal image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 nexent/nexent-ubuntu-terminal:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-ubuntu-terminal:amd64
- name: Push latest terminal image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-ubuntu-terminal:amd64
build-and-push-terminal-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -232,20 +237,20 @@ jobs:
uses: actions/checkout@v4
- name: Build terminal image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 --load -f make/terminal/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/terminal/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push terminal image (arm64) to DockerHub
- run: docker push nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag terminal image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64 nexent/nexent-ubuntu-terminal:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-ubuntu-terminal:arm64
- name: Push latest terminal image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-ubuntu-terminal:arm64
build-and-push-mcp-amd64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -258,20 +263,20 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (amd64) and load locally
run: |
- docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ inputs.version }}-amd64 --load -f make/mcp/Dockerfile .
+ docker buildx build --platform linux/amd64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 --load -f make/mcp/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push MCP image (amd64) to DockerHub
- run: docker push nexent/nexent-mcp:${{ inputs.version }}-amd64
+ run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64
- name: Tag MCP image (amd64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-mcp:${{ inputs.version }}-amd64 nexent/nexent-mcp:amd64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 nexent/nexent-mcp:amd64
- name: Push latest MCP image (amd64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-mcp:amd64
build-and-push-mcp-arm64:
- runs-on: ${{ fromJson(inputs.runner_label_json) }}
+ runs-on: ${{ fromJson(github.event.inputs.runner_label_json || '["ubuntu-latest"]') }}
steps:
- name: Set up Docker Buildx
run: |
@@ -284,16 +289,16 @@ jobs:
uses: actions/checkout@v4
- name: Build MCP image (arm64) and load locally
run: |
- docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ inputs.version }}-arm64 --load -f make/mcp/Dockerfile .
+ docker buildx build --platform linux/arm64 -t nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 --load -f make/mcp/Dockerfile .
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Push MCP image (arm64) to DockerHub
- run: docker push nexent/nexent-mcp:${{ inputs.version }}-arm64
+ run: docker push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
- name: Tag MCP image (arm64) as latest
- if: inputs.push_latest == 'true'
- run: docker tag nexent/nexent-mcp:${{ inputs.version }}-arm64 nexent/nexent-mcp:arm64
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+ run: docker tag nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64 nexent/nexent-mcp:arm64
- name: Push latest MCP image (arm64) to DockerHub
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: docker push nexent/nexent-mcp:arm64
manifest-push-main:
@@ -305,13 +310,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for main (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent:${{ inputs.version }} \
- nexent/nexent:${{ inputs.version }}-amd64 \
- nexent/nexent:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent:${{ inputs.version }}
+ docker manifest create nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for main (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent:latest \
nexent/nexent:amd64 \
@@ -327,13 +333,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for data-process (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-data-process:${{ inputs.version }} \
- nexent/nexent-data-process:${{ inputs.version }}-amd64 \
- nexent/nexent-data-process:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-data-process:${{ inputs.version }}
+ docker manifest create nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-data-process:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for data-process (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-data-process:latest \
nexent/nexent-data-process:amd64 \
@@ -349,13 +356,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for web (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-web:${{ inputs.version }} \
- nexent/nexent-web:${{ inputs.version }}-amd64 \
- nexent/nexent-web:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-web:${{ inputs.version }}
+ docker manifest create nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-web:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for web (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-web:latest \
nexent/nexent-web:amd64 \
@@ -371,13 +379,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for terminal (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-ubuntu-terminal:${{ inputs.version }} \
- nexent/nexent-ubuntu-terminal:${{ inputs.version }}-amd64 \
- nexent/nexent-ubuntu-terminal:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-ubuntu-terminal:${{ inputs.version }}
+ docker manifest create nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-ubuntu-terminal:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for terminal (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-ubuntu-terminal:latest \
nexent/nexent-ubuntu-terminal:amd64 \
@@ -393,13 +402,14 @@ jobs:
- name: Login to DockerHub
run: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login -u nexent --password-stdin
- name: Create and push manifest for mcp (DockerHub)
+ if: github.event_name != 'push' || github.ref != 'refs/heads/main'
run: |
- docker manifest create nexent/nexent-mcp:${{ inputs.version }} \
- nexent/nexent-mcp:${{ inputs.version }}-amd64 \
- nexent/nexent-mcp:${{ inputs.version }}-arm64
- docker manifest push nexent/nexent-mcp:${{ inputs.version }}
+ docker manifest create nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }} \
+ nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-amd64 \
+ nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}-arm64
+ docker manifest push nexent/nexent-mcp:${{ github.event.inputs.version || (github.event_name == 'push' && github.ref == 'refs/heads/main' && 'latest') || github.ref_name }}
- name: Create and push latest manifest for mcp (DockerHub)
- if: inputs.push_latest == 'true'
+ if: (github.event.inputs.push_latest == 'true' && github.event_name == 'workflow_dispatch') || (github.event_name == 'push' && github.ref == 'refs/heads/main')
run: |
docker manifest create nexent/nexent-mcp:latest \
nexent/nexent-mcp:amd64 \
diff --git a/.github/workflows/docker-deploy.yml b/.github/workflows/docker-deploy.yml
index 9d04c8913..a77c2491f 100644
--- a/.github/workflows/docker-deploy.yml
+++ b/.github/workflows/docker-deploy.yml
@@ -38,7 +38,10 @@ jobs:
- name: Check if model is cached locally
id: check-model
run: |
- if [ -f ~/model-assets/clip-vit-base-patch32/config.json ] && [ -d ~/model-assets/nltk_data ]; then
+ if [ -f ~/model-assets/clip-vit-base-patch32/config.json ] && \
+ [ -d ~/model-assets/nltk_data ] && \
+ [ -d ~/model-assets/table-transformer-structure-recognition ] && \
+ [ -d ~/model-assets/yolox ]; then
echo "cache-hit=true" >> "$GITHUB_OUTPUT"
cp -r ~/model-assets ./
else
@@ -105,4 +108,4 @@ jobs:
./deploy.sh --mode 3 --is-mainland N --enable-terminal N --version 2 --root-dir "$HOME/nexent-production-data"
else
./deploy.sh --mode 1 --is-mainland N --enable-terminal N --version 2 --root-dir "$HOME/nexent-development-data"
- fi
\ No newline at end of file
+ fi
diff --git a/.github/workflows/sdk_publish.yml b/.github/workflows/sdk_publish.yml
index 1e5759277..3cc413381 100644
--- a/.github/workflows/sdk_publish.yml
+++ b/.github/workflows/sdk_publish.yml
@@ -21,7 +21,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
- python-version: '3.10'
+ python-version: '3.11'
- name: Install build dependencies
run: |
diff --git a/.gitignore b/.gitignore
index 702982568..e0bac2b47 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,9 +19,16 @@ docker/uploads
docker/openssh-server
docker/volumes/db/data
docker/.env
+docker/monitoring/monitoring.env
docker/.run
docker/deploy.options
-k8s/helm/.deploy.options
+k8s/helm/deploy.options
+scripts/deployment/local-config.yaml
+scripts/deployment/generated/
+docker/.env.generated
+docker/docker-compose.generated.yml
+k8s/helm/nexent/generated-values.yaml
+k8s/helm/nexent/generated-secrets-values.yaml
frontend_standalone/
.pnpm-store/
@@ -34,3 +41,29 @@ model-assets/
*.pytest_cache
*.coverage
*coverage.xml
+
+# Log files
+*.log
+
+.sisyphus/
+.opencode/
+openspec/
+logs/
+
+.agents/
+.devspace/
+devspace.yaml
+k8s/helm/**/*.tgz
+k8s/helm/nexent/Chart.lock
+
+MAC_DEVELOPMENT_GUIDE.md
+data/
+sdk/benchmark/.env
+/docker/.env.bak
+
+.venv
+
+.pytest-tmp
+doc/mermaid
+
+.claude/skills/python-import-triage
\ No newline at end of file
diff --git a/README.md b/README.md
index 894cd1862..7983e6c6c 100644
--- a/README.md
+++ b/README.md
@@ -11,111 +11,111 @@ Nexent is a zero-code platform for auto-generating production-grade AI agents, b
> One prompt. Endless reach.
-### 🌐 Visit our [official website](https://nexent.tech/)
+
-
+# 🚀 Get Started Now
-https://github.com/user-attachments/assets/db6b7f5a-9ee8-4327-ae6f-c5af896126b4
+> ⭐ Before you get started, please star us on [GitHub](https://github.com/ModelEngine-Group/nexent) — your support drives us forward!
-# ⚡ Have a try first
+## Option 1: Try Our Official Demo
-### 📋 Prerequisites
+No installation required — jump right in with our **[online demo environment](http://60.204.251.153:3000/en)** to experience Nexent's capabilities instantly.
-| Resource | Minimum |
-|----------|---------|
-| **CPU** | 2 cores |
-| **RAM** | 6 GiB |
-| **Software** | Docker & Docker Compose installed |
+## Option 2: Deploy on Your Own
-### 🛠️ Quick start with Docker Compose
+If you need to run Nexent locally or in your private infrastructure, we offer two deployment options:
-```bash
-git clone https://github.com/ModelEngine-Group/nexent.git
-cd nexent/docker
-cp .env.example .env # fill only necessary configs
-bash deploy.sh
-```
+### System Requirements
-When the containers are running, open **http://localhost:3000** in your browser and follow the setup wizard.
-
-# 🤝 Join Our Community
-
-> *If you want to go fast, go alone; if you want to go far, go together.*
+| Resource | Docker | Kubernetes |
+|----------|--------|-------------|
+| **CPU** | 4 cores (min) / 8 cores (rec.) | 4 cores (min) / 8 cores (rec.) |
+| **Memory** | 8 GiB (min) / 16 GiB (rec.) | 16 GiB (min) / 64 GiB (rec.) |
+| **Disk** | 40 GiB (min) / 100 GiB (rec.) | 100 GiB (min) / 200 GiB (rec.) |
+| **Architecture** | x86_64 / ARM64 | x86_64 / ARM64 |
+| **Software** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ |
-We have released **Nexent v1**, and the platform is now relatively stable. However, there may still be some bugs, and we are continuously improving and adding new features. Stay tuned: we will announce **v2.0** soon!
+> **Note:** Recommended configurations ensure optimal performance in production environments.
-* **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features.
-* **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab.
-* **🐛 Check our [Known Issues page](https://github.com/orgs/ModelEngine-Group/projects/9)** for the latest issue status and solutions.
-
-> *Rome wasn't built in a day.*
-
-If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us.
-
-Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life.
-
-Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing.
+### Docker Deployment (Recommended for Individuals/Small Teams)
-## 💬 Community & contact
+Quick and straightforward for most users. Prerequisites: Docker 24+ and Docker Compose v2+:
-- Browse the [Documentation](https://modelengine-group.github.io/nexent) for more information.
-- Join our [Discord community](https://discord.gg/tb5H3S3wyv) to chat with other developers and get help!
-- Conntact us by Wechat, find our QR Code in our [website](https://nexent.tech/en/contact)
-
-# ✨ Key Features
-
-`1` **Smart agent prompt generation**
- Turn plain language into runnable prompts. Nexent automatically chooses the right tools and plans the best action path for every request.
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/docker
+bash deploy.sh
+```
- 
+The Docker and Kubernetes deploy scripts share the same deployment configuration model. Interactive runs show Bash TUI menus for component selection, port policy, and image source. `infrastructure` is required; `application` is selected by default but can be disabled. Use `b`/Backspace to return to the previous TUI step and `q` to quit. Non-interactive runs can pass the same choices with `--components`, `--port-policy development|production`, and `--image-source general|mainland|local-latest`. Successful deployments save non-sensitive choices to each deploy directory's `deploy.options` for reuse on the next run.
-`2` **Scalable data process engine**
- Process 20+ data formats with fast OCR and table structure extraction, scaling smoothly from a single process to large-batch pipelines.
+Docker uninstall is handled by `bash uninstall.sh`. It can preserve or delete data volumes: run it interactively, pass `--delete-volumes true|false`, or use `bash uninstall.sh delete-all` to remove containers and persistent data.
- 
+For detailed deployment instructions, see [Docker Installation](https://modelengine-group.github.io/nexent/en/quick-start/installation.html).
-`3` **Personal-grade knowledge base**
- Import files in real time, auto-summarise them, and let agents access both personal and global knowledge instantly, also knowing what it can get from each knowledge base.
+### Kubernetes Deployment (For Enterprise Production)
- 
+Ideal for enterprise scenarios requiring high availability and elastic scaling. Prerequisites: Kubernetes 1.24+ and Helm 3+:
-`4` **Internet knowledge search**
- Connect to 5+ web search providers so agents can mix fresh internet facts with your private data.
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/k8s/helm
+./deploy.sh
+```
- 
+Kubernetes uninstall is handled by `bash uninstall.sh`. It removes the Helm release first, then can optionally delete the namespace and local hostPath data. Use `--delete-namespace true|false`, `--delete-local-data true|false`, or `bash uninstall.sh delete-all`; pass `--keep-local-data` with `delete-all` to preserve local volume contents.
-`5` **Knowledge-level traceability**
- Serve answers with precise citations from web and knowledge-base sources, making every fact verifiable.
+For detailed deployment instructions, see [Kubernetes Installation](https://modelengine-group.github.io/nexent/en/quick-start/kubernetes-installation.html).
- 
+# ✨ Core Features
-`6` **Multimodal understanding & dialogue**
- Speak, type, files, or show images. Nexent understands voice, text, and pictures, and can even generate new images on demand.
+Nexent provides a comprehensive feature set for building powerful AI agents:
- 
+| Feature | Description |
+|---------|-------------|
+| **⚙️ Multi-Model Integration** | OpenAI-compatible with any provider, full LLM/Embedding/VLM/STT/TTS coverage, supports domestic model switching |
+| **🤖 Zero-Code Agent Generation** | Describe requirements in natural language, generate executable agents instantly, what you think is what you get |
+| **🤝 A2A Agent Collaboration** | Agent-to-Agent protocol enables seamless multi-agent cooperation and distributed workflows |
+| **🧠 Layered Memory Mechanism** | Two-tier memory (user-level + user-agent-level) for persistent context across conversations |
+| **📝 Progressive Skill Disclosure** | Dynamically loads Skill into context, maximizing context window efficiency |
+| **🗄️ Personal-Grade Knowledge Base** | Real-time import and intelligent retrieval for 20+ document formats, auto summaries, fine-grained access control |
+| **🔧 MCP Tool Ecosystem** | Plug-and-play extension system with custom development and third-party MCP service support |
+| **🌐 Internet Knowledge Integration** | Multi-source search blending real-time information with private data |
+| **🔍 Knowledge-Level Traceability** | Precise citations and source verification, full transparency for every fact |
+| **🎭 Multimodal Interaction** | Voice, text, images, files — comprehensive natural dialogue |
+| **🔢 Agent Version Management** | Version iteration and history rollback, safe and controllable |
+| **🏪 Agent Marketplace** | Official and community curated agents, one-click install and use |
+| **👥 Multi-Tenancy & RBAC** | Multi-tenant isolation, role-based access control, fine-grained resource management |
-`7` **MCP tool ecosystem**
- Drop in or build Python plug-ins that follow the MCP spec; swap models, tools, and chains without touching core code.
+# 🤝 Join Our Community
- 
+> *If you want to go fast, go alone; if you want to go far, go together.*
-# 🌱 MCP Tool Ecosystem
+We have released **Nexent v2.0**! A comprehensive upgrade from v1.0, featuring A2A protocol support, progressive Skill disclosure, layered memory mechanism, user management with multi-tenancy, agent version management, agent marketplace, and more.
-Check our [MCP Ecosystem page](https://modelengine-group.github.io/nexent/en/mcp-ecosystem/overview.html) for detailed information about the MCP tool ecosystem, including community hubs, recommended tools, and integration guides.
+- **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features.
+- **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab.
-# 🛠️ Developer Guide
+> *Rome wasn't built in a day.*
-### 🤖 Model Configuration & Provider Recommendations
+If our vision speaks to you, jump in via the **[Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing)** and shape Nexent with us.
-Check our [Model Providers page](https://modelengine-group.github.io/nexent/en/getting-started/model-providers.html) for detailed model configuration guides and recommended provider information.
+Early contributors won't go unnoticed: from special badges and swag to other tangible rewards, we're committed to thanking the pioneers who help bring Nexent to life.
-### 🔧 Hack on Nexent
+Most of all, we need visibility. Star ⭐ and watch the repo, share it with friends, and help more developers discover Nexent — your click brings new hands to the project and keeps the momentum growing.
-Want to build from source or add new features? Check the [Contribution Guide](https://modelengine-group.github.io/nexent/en/contributing) for step-by-step instructions.
+# 📖 What's Next
-### 🛠️ Build from Source
+Ready to dive deeper? Here are the main documentation entry points:
-Prefer to run Nexent from source code? Follow our [Developer Guide](https://modelengine-group.github.io/nexent/en/getting-started/development-guide) for detailed setup instructions and customization options.
+- **[Quick Start](https://modelengine-group.github.io/nexent/en/quick-start/installation.html)** — System requirements and deployment guide
+- **[Core Features](https://modelengine-group.github.io/nexent/en/getting-started/features.html)** — Comprehensive feature documentation
+- **[User Guide](https://modelengine-group.github.io/nexent/en/user-guide/home-page.html)** — Agent development and usage
+- **[Developer Guide](https://modelengine-group.github.io/nexent/en/developer-guide/overview)** — Build from source and customization
+- **[FAQ](https://modelengine-group.github.io/nexent/en/quick-start/faq.html)** — Common questions and troubleshooting
# 📄 License
diff --git a/README_CN.md b/README_CN.md
index c16de5d32..032776418 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -11,111 +11,104 @@ Nexent 是一个基于 **Harness Engineering** 原则打造的零代码智能体
> 一个提示词,无限种可能。
-### 🌐 访问我们的[官方网站](https://nexent.tech/)
+
-
+# 🚀 先来试试看
-https://github.com/user-attachments/assets/b844e05d-5277-4509-9463-1c5b3516f11e
+> ⭐ 在您开始使用前,请您顺手在 [GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点个 Star,您的支持是我们前进的动力!
-# ⚡ 先来试试看
+## 方式一:使用官方体验环境
-### 📋 系统要求
+无需安装,直接访问我们的 **[在线体验环境](http://60.204.251.153:3000/zh)**,快速体验 Nexent 的强大功能。
-| 资源 | 最低要求 |
-|----------|---------|
-| **CPU** | 2 核 |
-| **内存** | 6 GiB |
-| **软件** | 已安装 Docker 和 Docker Compose |
+## 方式二:自行部署
-### 🛠️ 使用 Docker Compose 快速开始
+如果需要在本地或私有环境中部署 Nexent,我们提供两种部署方式:
-```bash
-git clone https://github.com/ModelEngine-Group/nexent.git
-cd nexent/docker
-cp .env.example .env # fill only necessary configs
-bash deploy.sh
-```
-
-当容器运行后,在浏览器中打开 **http://localhost:3000** 并按照设置向导操作。
-
-# 🤝 加入我们的社区
-
-> *If you want to go fast, go alone; if you want to go far, go together.*
-
-我们已经发布了 **Nexent v1**,平台现在相对稳定。但是,可能仍然存在一些 bug,我们正在持续改进并添加新功能。敬请期待:我们很快将宣布 **v2.0**!
-
-* **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。
-* **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。
-* **🐛 查看我们的[已知问题页面](https://github.com/orgs/ModelEngine-Group/projects/9)** 了解最新的问题状态和解决方案。
-
-> *Rome wasn't built in a day.*
-
-如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。
-
-早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。
+### 系统要求
-最重要的是,我们需要关注度。请为仓库点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。
+| 资源 | Docker 部署 | Kubernetes 部署 |
+|------|------------|----------------|
+| **CPU** | 4 核(最低)/ 8 核(推荐) | 4 核(最低)/ 8 核(推荐) |
+| **内存** | 8 GiB(最低)/ 16 GiB(推荐) | 16 GiB(最低)/ 64 GiB(推荐) |
+| **磁盘** | 40 GiB(最低)/ 100 GiB(推荐) | 100 GiB(最低)/ 200 GiB(推荐) |
+| **架构** | x86_64 / ARM64 | x86_64 / ARM64 |
+| **软件** | Docker 24+, Docker Compose v2+ | Kubernetes 1.24+, Helm 3+ |
-## 💬 社区与联系方式
+> **注意:** 推荐配置可确保生产环境下的最佳性能。
-- 浏览 [文档](https://modelengine-group.github.io/nexent) 了解更多信息。
-- 加入我们的 [Discord 社区](https://discord.gg/tb5H3S3wyv) 与其他开发者交流并获取帮助!
-- 通过微信联系我们,在我们的[网站](https://nexent.tech/zh/contact)找到二维码
+### Docker 部署(推荐个人/小团队使用)
-# ✨ 主要特性
+适用于大多数用户,快速简单。部署前需准备Docker 24+, Docker Compose v2+:
-`1` **智能体提示词自动生成**
- 将自然语言转化为可被Agent执行的提示词。Nexent可以根据你的需要自动选择正确的工具并为每个请求规划最佳执行路径。
-
- 
-
-`2` **可扩展数据处理引擎**
- 支持 20+ 数据格式的快速 OCR 和表格结构提取,从单进程到大规模批处理管道都能平滑扩展。
-
- 
-
-`3` **个人级知识库**
- 实时导入文件,自动总结,让智能体能够即时访问个人和全局知识,并了解每个知识库能提供什么。
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/docker
+cp .env.example .env
+bash deploy.sh
+```
- 
+详细部署指南请参考 [Docker 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)。
-`4` **互联网知识搜索**
- 连接 5+ 个网络搜索提供商,让智能体能够将最新的互联网信息与您的私有数据结合。
+### Kubernetes 部署(适合企业级生产环境)
- 
+适用于需要高可用、弹性扩展的企业场景。部署前需准备 Kubernetes 集群(1.24+)和 Helm 3+:
-`5` **知识级可追溯性**
- 提供来自网络和知识库来源的精确引用,使每个事实都可验证。
+```bash
+git clone https://github.com/ModelEngine-Group/nexent.git
+cd nexent/k8s/helm
+./deploy-helm.sh apply
+```
- 
+详细部署指南请参考 [Kubernetes 安装部署](https://modelengine-group.github.io/nexent/zh/quick-start/kubernetes-installation.html)。
-`6` **多模态理解与对话**
- 说话、打字、文件或展示图片。Nexent 理解语音、文本和图片,甚至可以根据需求生成新图像。
+# ✨ 核心特性
- 
+Nexent 为构建强大的 AI 智能体提供全面的功能集:
-`7` **MCP 工具生态系统**
- 插入或构建符合 MCP 规范的 Python 插件;无需修改核心代码即可更换模型、工具和链。
+| 特性 | 描述 |
+|------|------|
+| **⚙️ 多模型集成** | OpenAI 兼容任意提供商,LLM/Embedding/VLM/STT/TTS 全覆盖,支持灵活切换 |
+| **🤖 零代码智能体生成** | 纯自然语言描述需求,一键生成可执行智能体,所想即所得 |
+| **🤝 A2A 智能体协作** | Agent-to-Agent 协议支持多智能体无缝协作,构建分布式工作流 |
+| **🧠 分层记忆机制** | 两层记忆体系(用户级+用户-智能体级),跨对话持续积累上下文 |
+| **📝 Skill 渐进式披露** | 动态加载 Skill 内容至上下文,高效利用上下文窗口 |
+| **🗄️ 个人级知识库** | 20+ 文档格式实时导入与智能检索,自动摘要,细粒度权限控制 |
+| **🔧 MCP 工具生态** | 即插即用的扩展工具体系,支持自定义开发和第三方 MCP 服务 |
+| **🌐 互联网知识集成** | 多搜索源混合,实时信息与私有数据融合 |
+| **🔍 知识级溯源** | 精确引用与来源验证,每个事实透明可查 |
+| **🎭 多模态交互** | 语音、文字、图像、文件,全方位自然对话 |
+| **🔢 智能体版本管理** | 版本迭代与历史回溯,安全可控 |
+| **🏪 智能体市场** | 官方与社区优质智能体一键安装即用 |
+| **👥 分权分域管理** | 多租户隔离,RBAC 权限体系,资源级精细管控 |
- 
+# 🤝 加入我们的社区
-# 🌱 MCP 工具生态
+> *If you want to go fast, go alone; if you want to go far, go together.*
-查看我们的[MCP 生态系统页面](https://modelengine-group.github.io/nexent/zh/mcp-ecosystem/overview.html)了解 MCP 工具生态系统的详细信息,包括社区中心、推荐工具和集成指南。
+- **🗺️ 查看我们的 [功能地图](https://github.com/orgs/ModelEngine-Group/projects/6)** 探索当前和即将推出的功能。
+- **🔍 试用当前版本** 并在 [问题反馈](https://github.com/ModelEngine-Group/nexent/issues) 中留下想法或报告错误。
-# 🛠️ 开发者指南
+> *Rome wasn't built in a day.*
-### 🤖 模型配置与模型提供商推荐
+如果我们的愿景与您产生共鸣,请通过 **[贡献指南](https://modelengine-group.github.io/nexent/zh/contributing)** 加入我们,共同塑造 Nexent。
-查看我们的[模型提供商页面](https://modelengine-group.github.io/nexent/zh/getting-started/model-providers.html)了解详细的模型配置指南和推荐的提供商信息。
+早期贡献者不会被忽视:从特殊徽章和纪念品到其他实质性奖励,我们致力于感谢那些帮助 Nexent 诞生的先驱者。
-### 🔧 开发 Nexent
+最重要的是,我们需要关注度。请 [前往 GitHub](https://github.com/ModelEngine-Group/nexent) 为我们点星 ⭐ 并关注,与朋友分享,帮助更多开发者发现 Nexent —— 您的每一次点击都能为项目带来新的参与者,保持发展势头。
-想要从源代码构建或添加新功能?查看 [贡献指南](https://modelengine-group.github.io/nexent/zh/contributing) 获取分步说明。
+# 📖 下一步
-### 🛠️ 从源码构建
+准备好深入了解了吗?以下是主要文档入口:
-想要从源码运行 Nexent?查看我们的[开发者指南](https://modelengine-group.github.io/nexent/zh/getting-started/development-guide)获取详细的设置说明和自定义选项。
+- **[快速开始](https://modelengine-group.github.io/nexent/zh/quick-start/installation.html)** — 系统要求和部署指南
+- **[核心特性详解](https://modelengine-group.github.io/nexent/zh/getting-started/features.html)** — 完整的功能说明
+- **[用户指南](https://modelengine-group.github.io/nexent/zh/user-guide/home-page.html)** — 智能体开发与使用
+- **[开发者指南](https://modelengine-group.github.io/nexent/zh/developer-guide/overview)** — 从源码构建和自定义
+- **[常见问题](https://modelengine-group.github.io/nexent/zh/quick-start/faq.html)** — 常见问题和故障排除
# 📄 许可证
diff --git a/backend/adapters/__init__.py b/backend/adapters/__init__.py
new file mode 100644
index 000000000..ed46fc888
--- /dev/null
+++ b/backend/adapters/__init__.py
@@ -0,0 +1,13 @@
+from adapters.exception import JiuwenSDKError, JiuwenSDKUnavailableError, NexentCapabilityError
+
+try:
+ from adapters.jiuwen_sdk_adapter import JiuwenSDKAdapter
+except ModuleNotFoundError:
+ JiuwenSDKAdapter = None # type: ignore[assignment, misc]
+
+__all__ = [
+ "JiuwenSDKError",
+ "JiuwenSDKUnavailableError",
+ "NexentCapabilityError",
+ "JiuwenSDKAdapter",
+]
diff --git a/backend/adapters/exception.py b/backend/adapters/exception.py
new file mode 100644
index 000000000..63812d3af
--- /dev/null
+++ b/backend/adapters/exception.py
@@ -0,0 +1,13 @@
+class JiuwenSDKError(Exception):
+ """Jiuwen SDK 调用失败的通用异常"""
+ pass
+
+
+class JiuwenSDKUnavailableError(JiuwenSDKError):
+ """Jiuwen SDK 不可用(依赖缺失或未启用)"""
+ pass
+
+
+class NexentCapabilityError(Exception):
+ """nexent 原生模式不支持该能力"""
+ pass
diff --git a/backend/adapters/jiuwen_sdk_adapter.py b/backend/adapters/jiuwen_sdk_adapter.py
new file mode 100644
index 000000000..f62ce9d06
--- /dev/null
+++ b/backend/adapters/jiuwen_sdk_adapter.py
@@ -0,0 +1,514 @@
+"""
+openjiuwen SDK adapter for Nexent.
+
+This module must be imported lazily (not at module load time) because
+openjiuwen 0.1.13 has circular import bugs in its __init__.py files that
+prevent the SDK from loading unless we bypass them.
+
+Import flow:
+ backend/adapters/__init__.py -> try/except -> JiuwenSDKAdapter = None
+ -> when needed: _install_jiuwen_bypasser() -> openjiuwen imports work
+"""
+import asyncio
+import importlib.abc
+import importlib.machinery
+import json
+import logging
+import os
+import sys
+import types
+from typing import Any, List, Literal, Optional
+
+logger = logging.getLogger("jiuwen_adapter")
+
+from adapters.exception import JiuwenSDKError
+
+
+# ----------------------------------------------------------------------
+# Circular import bypasser for openjiuwen 0.1.13
+#
+# openjiuwen has broken __init__.py files that create circular import chains:
+# tune/__init__.py -> tune.optimizer -> core.operator -> agent_evolving -> ...
+# This bypasser prevents those __init__.py files from executing while still
+# allowing regular .py submodule files to load normally.
+# ----------------------------------------------------------------------
+_CIRCULAR_CHAIN = {
+ "openjiuwen.agent_evolving",
+ "openjiuwen.agent_evolving.trainer",
+ "openjiuwen.agent_evolving.trainer.trainer",
+ "openjiuwen.agent_evolving.trainer.progress",
+ "openjiuwen.core",
+ "openjiuwen.dev_tools",
+ "openjiuwen.dev_tools.tune",
+ "openjiuwen.dev_tools.tune.optimizer",
+ "openjiuwen.dev_tools.tune.optimizer.instruction_optimizer",
+ "openjiuwen.dev_tools.prompt_builder",
+ "openjiuwen.dev_tools.prompt_builder.builder",
+}
+
+
+class _JiuwenInitBypasser(importlib.abc.MetaPathFinder, importlib.abc.Loader):
+ """
+ Meta path finder that intercepts __init__.py loading within openjiuwen,
+ blocking only the packages in the circular import chain while letting
+ all other modules (including base.py files) load normally.
+ """
+
+ def find_spec(self, fullname: str, path: Any, target: Any = None) -> Any:
+ if not fullname.startswith("openjiuwen") or fullname == "openjiuwen":
+ return None
+
+ try:
+ import openjiuwen as _oj
+
+ pkg_root = _oj.__path__[0]
+ except ImportError:
+ return None
+
+ parts = fullname.split(".")[1:]
+ file_path = pkg_root
+ for p in parts:
+ file_path = os.path.join(file_path, p)
+
+ is_package = os.path.isdir(file_path)
+ if not is_package:
+ return None
+
+ init_path = os.path.join(file_path, "__init__.py")
+ if not os.path.exists(init_path):
+ return None
+
+ if fullname not in _CIRCULAR_CHAIN:
+ return None
+
+ spec = importlib.machinery.ModuleSpec(
+ fullname, self, is_package=True, origin=""
+ )
+ spec.submodule_search_locations = [file_path]
+ return spec
+
+ def create_module(self, module: Any) -> None:
+ return None
+
+ def exec_module(self, module: Any) -> None:
+ import openjiuwen as _oj
+
+ pkg_root = _oj.__path__[0]
+ parts = module.__name__.split(".")[1:]
+ file_path = pkg_root
+ for p in parts:
+ file_path = os.path.join(file_path, p)
+ module.__path__ = [file_path]
+ module.__file__ = os.path.join(file_path, "__init__.py")
+
+ def __getattr__(self, name: str) -> Any:
+ """Handle special attributes like find_distributions to prevent recursion."""
+ import openjiuwen as _oj
+ import importlib
+
+ # Prevent recursion when Python scans sys.meta_path for find_distributions etc.
+ if name in (
+ "find_distributions",
+ "find_module",
+ "__path__",
+ "__name__",
+ "__file__",
+ "__loader__",
+ "__package__",
+ "__spec__",
+ ):
+ raise AttributeError(name)
+
+ pkg_root = _oj.__path__[0]
+ parts = self.__name__.split(".")[1:] + [name]
+ file_path = pkg_root
+ for p in parts:
+ file_path = os.path.join(file_path, p)
+
+ # If it's a package directory, import it as a submodule
+ if os.path.isdir(file_path) and os.path.exists(os.path.join(file_path, "__init__.py")):
+ return importlib.import_module(f"{self.__name__}.{name}")
+ # If it's a regular .py file
+ if os.path.exists(file_path + ".py"):
+ return importlib.import_module(f"{self.__name__}.{name}")
+ raise AttributeError(name)
+
+
+_bypasser_installed = False
+
+
+def _install_jiuwen_bypasser() -> bool:
+ """
+ Install the circular import bypasser for openjiuwen.
+ Returns True if installed, False if already installed or openjiuwen not available.
+ """
+ global _bypasser_installed
+ if _bypasser_installed:
+ return True
+
+ # Stub missing optional dependencies before openjiuwen import chain reaches them
+ _stubbed = [
+ ("pymilvus", {"is_successful": lambda *args, **kwargs: True}),
+ ("dashscope", {}),
+ ("pdfplumber", {}),
+ ]
+ for _name, _attrs in _stubbed:
+ if _name not in sys.modules:
+ _mod = types.ModuleType(_name)
+ for _k, _v in _attrs.items():
+ setattr(_mod, _k, _v)
+ sys.modules[_name] = _mod
+ _mod.__path__ = []
+
+ # Pre-create nested stub modules for pymilvus.client.utils chain
+ if "pymilvus.client" not in sys.modules:
+ _client_mod = types.ModuleType("pymilvus.client")
+ _client_mod.__path__ = []
+ sys.modules["pymilvus.client"] = _client_mod
+ if "pymilvus.client.utils" not in sys.modules:
+ _utils_mod = types.ModuleType("pymilvus.client.utils")
+ _utils_mod.is_successful = lambda *args, **kwargs: True
+ sys.modules["pymilvus.client.utils"] = _utils_mod
+
+ # Stub dashscope sub-modules that may be imported lazily
+ _dashscope_subs = [
+ ("dashscope.api_entities", {}),
+ ("dashscope.api_entities.data", {}),
+ ("dashscope.api_entities.dashscope_response", {"DashScopeAPIResponse": object}),
+ ("dashscope.common", {"REQUEST_TIMEOUT_KEYWORD": "timeout"}),
+ ("dashscope.common.constants", {"REQUEST_TIMEOUT_KEYWORD": "timeout"}),
+ ]
+ for _name, _attrs in _dashscope_subs:
+ if _name not in sys.modules:
+ _m = types.ModuleType(_name)
+ _m.__path__ = []
+ for _k, _v in _attrs.items():
+ setattr(_m, _k, _v)
+ sys.modules[_name] = _m
+
+ try:
+ import openjiuwen # noqa: F401
+ except ImportError:
+ return False
+
+ for finder in sys.meta_path:
+ if isinstance(finder, _JiuwenInitBypasser):
+ _bypasser_installed = True
+ return True
+
+ sys.meta_path.insert(0, _JiuwenInitBypasser())
+ _bypasser_installed = True
+ return True
+
+
+# ----------------------------------------------------------------------
+# Language helpers
+# ----------------------------------------------------------------------
+LANGUAGE_MAP = {"zh": "zh-CN", "en": "en-US"}
+
+
+def normalize_language(language: str) -> str:
+ return LANGUAGE_MAP.get(language, "zh-CN")
+
+
+def run_async(coro):
+ """
+ Safely run async coroutine from sync context (FastAPI or Celery).
+ Handles existing event loops properly.
+ """
+ try:
+ loop = asyncio.get_running_loop()
+ except RuntimeError:
+ return asyncio.run(coro)
+
+ if loop.is_running():
+ try:
+ import nest_asyncio
+ nest_asyncio.apply()
+ return loop.run_until_complete(coro)
+ except ImportError:
+ import concurrent.futures
+
+ def run_in_thread():
+ new_loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(new_loop)
+ try:
+ return new_loop.run_until_complete(coro)
+ finally:
+ new_loop.close()
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+ future = executor.submit(run_in_thread)
+ return future.result()
+
+ return loop.run_until_complete(coro)
+
+
+# ----------------------------------------------------------------------
+# Jiuwen SDK lazy import helpers
+# ----------------------------------------------------------------------
+def _lazy_import_jiuwen_config():
+ """Lazily import only lightweight Jiuwen config classes."""
+ _install_jiuwen_bypasser()
+
+ try:
+ import openjiuwen # noqa: F401
+ except ImportError as e:
+ raise JiuwenSDKError(f"Jiuwen SDK 未安装: {e}") from e
+
+ from openjiuwen.core.foundation.llm.schema.config import (
+ ModelRequestConfig,
+ ModelClientConfig,
+ ProviderType,
+ )
+
+ return ModelRequestConfig, ModelClientConfig, ProviderType
+
+
+def build_jiuwen_model_configs(model_id: int, tenant_id: str):
+ """将 nexent 模型配置转换为 Jiuwen 配置对象"""
+ from database.model_management_db import get_model_by_model_id
+ from utils.config_utils import get_model_name_from_config
+
+ ModelRequestConfig, ModelClientConfig, ProviderType = _lazy_import_jiuwen_config()
+
+ model_config = get_model_by_model_id(model_id, tenant_id)
+ if not model_config:
+ raise JiuwenSDKError(f"model_id={model_id} not found")
+
+ api_base = (model_config.get("base_url", "") or "").strip()
+ if not api_base:
+ api_base = "https://api.openai.com/v1"
+
+ # Jiuwen ModelClientConfig defaults to timeout=60.0, max_retries=3.
+ # For prompt optimization calls, 60s can be too small. Reuse Nexent model config timeout_seconds.
+ timeout_seconds = model_config.get("timeout_seconds")
+ if timeout_seconds is None:
+ timeout_seconds = 120
+
+ ssl_cert = model_config.get("ssl_cert") or None
+ ssl_verify = model_config.get("ssl_verify", True)
+ if ssl_verify and not ssl_cert:
+ ssl_verify = False
+
+ client_config = ModelClientConfig(
+ client_provider=ProviderType.OpenAI,
+ api_key=model_config["api_key"],
+ api_base=api_base,
+ timeout=float(timeout_seconds),
+ verify_ssl=ssl_verify,
+ ssl_cert=ssl_cert,
+ )
+
+ request_config = ModelRequestConfig(
+ model_name=get_model_name_from_config(model_config),
+ temperature=0.3,
+ )
+ return request_config, client_config
+
+
+def _lazy_import_jiuwen_builders():
+ """Lazily import prompt builders only when optimization paths need them."""
+ _install_jiuwen_bypasser()
+
+ try:
+ import openjiuwen # noqa: F401
+ except ImportError as e:
+ raise JiuwenSDKError(f"Jiuwen SDK 未安装: {e}") from e
+
+ from openjiuwen.dev_tools.prompt_builder.builder.feedback_prompt_builder import (
+ FeedbackPromptBuilder,
+ )
+ from openjiuwen.dev_tools.prompt_builder.builder.badcase_prompt_builder import (
+ BadCasePromptBuilder,
+ )
+
+ return FeedbackPromptBuilder, BadCasePromptBuilder
+
+
+def _unwrap_prompt_response(text: str) -> str:
+ """Strip JSON wrapper or markdown fence that Jiuwen LLM sometimes generates."""
+ _logger = logging.getLogger("jiuwen_adapter")
+ _logger.debug(f"[unwrap] raw ({len(text)} chars): {text[:200]}")
+
+ # Step 1: strip markdown code fences
+ text = text.strip()
+ if text.startswith("```"):
+ for lang in ("json", ""):
+ fence = f"```{lang}\n"
+ if text.startswith(fence):
+ text = text[len(fence):]
+ if text.endswith("\n```"):
+ text = text[:-4]
+ elif text.endswith("```"):
+ text = text[:-3]
+ break
+ text = text.strip()
+ _logger.debug(f"[unwrap] after fence strip ({len(text)} chars)")
+
+ # Step 2: try standard JSON parse (handles format 1 and 2)
+ if text.startswith("{"):
+ try:
+ parsed = json.loads(text)
+ if isinstance(parsed, dict) and "prompt" in parsed:
+ result = parsed["prompt"].strip()
+ _logger.debug(f"[unwrap] extracted prompt ({len(result)} chars)")
+ return result
+ if isinstance(parsed, dict) and "result" in parsed:
+ result = parsed["result"].strip()
+ _logger.debug(f"[unwrap] extracted result ({len(result)} chars)")
+ return result
+ except Exception:
+ pass
+
+ # Step 3: format 3 and 4 - raw text (possibly multi-line), return as-is
+ _logger.debug(f"[unwrap] no JSON wrapper, returning raw ({len(text)} chars)")
+ return text
+
+
+def _lazy_import_jiuwen_tune_types():
+ """Lazily import Jiuwen tune types only when badcase flow needs them."""
+ _install_jiuwen_bypasser()
+ from openjiuwen.dev_tools.tune.base import Case, EvaluatedCase
+ return Case, EvaluatedCase
+
+
+def to_jiuwen_evaluated_case(bad_case) -> Any:
+ """将 nexent BadCase 转换为 Jiuwen EvaluatedCase"""
+ Case, EvaluatedCase = _lazy_import_jiuwen_tune_types()
+
+ case = Case(
+ inputs={"question": bad_case.question},
+ label={"answer": bad_case.label or ""},
+ )
+ return EvaluatedCase(
+ case=case,
+ answer={"content": bad_case.answer},
+ score=0.0,
+ reason=bad_case.reason or "",
+ )
+
+
+# ----------------------------------------------------------------------
+# Main adapter class
+# ----------------------------------------------------------------------
+class JiuwenSDKAdapter:
+ """
+ Jiuwen SDK 调用适配器
+
+ 封装 Jiuwen SDK 的所有调用,内部不处理降级,
+ 失败时抛出 JiuwenSDKError,由上层 PromptOptimizationService 决定是否降级
+ """
+
+ def __init__(self, model_id: int, tenant_id: str):
+ self.model_id = model_id
+ self.tenant_id = tenant_id
+ self.logger = logging.getLogger("jiuwen_adapter")
+
+ def _ensure_available(self):
+ """确保 Jiuwen SDK 可用"""
+ if not _bypasser_installed:
+ _install_jiuwen_bypasser()
+
+ try:
+ import openjiuwen # noqa: F401
+ except ImportError as e:
+ raise JiuwenSDKError(f"Jiuwen SDK 未安装: {e}") from e
+
+ def optimize(
+ self,
+ prompt: str,
+ feedback: str,
+ mode: Literal["general", "insert", "select"] = "general",
+ start_pos: Optional[int] = None,
+ end_pos: Optional[int] = None,
+ language: str = "zh",
+ ) -> str:
+ """
+ 调用 Jiuwen FeedbackPromptBuilder
+
+ Raises:
+ JiuwenSDKError: SDK 调用失败
+ """
+ self._ensure_available()
+
+ logger.info(f"[jiuwen-adapter] mode={mode}, start_pos={start_pos}, end_pos={end_pos}")
+
+ request_config, client_config = build_jiuwen_model_configs(
+ self.model_id, self.tenant_id
+ )
+ logger.info(
+ f"[jiuwen-adapter] model_id={self.model_id}, tenant_id={self.tenant_id}, "
+ f"api_base={client_config.api_base}, model={request_config.model_name}, "
+ f"timeout={getattr(client_config, 'timeout', None)}, max_retries={getattr(client_config, 'max_retries', None)}"
+ )
+ FeedbackPromptBuilder, _ = _lazy_import_jiuwen_builders()
+
+ builder = FeedbackPromptBuilder(
+ model_config=request_config,
+ model_client_config=client_config,
+ )
+
+ try:
+ result = run_async(
+ builder.build(
+ prompt=prompt,
+ feedback=feedback,
+ mode=mode,
+ start_pos=start_pos,
+ end_pos=end_pos,
+ language=normalize_language(language),
+ )
+ )
+ if result is None:
+ raise JiuwenSDKError("Jiuwen FeedbackPromptBuilder 返回为空")
+ return _unwrap_prompt_response(str(result))
+ except Exception as e:
+ self.logger.error(f"Jiuwen FeedbackPromptBuilder 调用失败: {e}")
+ raise JiuwenSDKError(f"优化调用失败: {e}") from e
+
+ def optimize_badcase(
+ self,
+ prompt: str,
+ bad_cases: List,
+ language: str = "zh",
+ ) -> str:
+ """
+ 调用 Jiuwen BadCasePromptBuilder
+
+ Raises:
+ JiuwenSDKError: SDK 调用失败
+ """
+ self._ensure_available()
+
+ _, BadCasePromptBuilder = _lazy_import_jiuwen_builders()
+
+ request_config, client_config = build_jiuwen_model_configs(
+ self.model_id, self.tenant_id
+ )
+ builder = BadCasePromptBuilder(
+ model_config=request_config,
+ model_client_config=client_config,
+ )
+
+ jiuwen_cases = [to_jiuwen_evaluated_case(bc) for bc in bad_cases]
+
+ try:
+ result = run_async(
+ builder.build(
+ prompt=prompt,
+ cases=jiuwen_cases,
+ language=normalize_language(language),
+ )
+ )
+ if result is None:
+ raise JiuwenSDKError("Jiuwen BadCasePromptBuilder 返回为空")
+ return _unwrap_prompt_response(str(result))
+ except Exception as e:
+ self.logger.error(f"Jiuwen BadCasePromptBuilder 调用失败: {e}")
+ raise JiuwenSDKError(f"BadCasePromptBuilder 调用失败: {e}") from e
+
+ def generate(self, **kwargs) -> dict:
+ """调用 Jiuwen 提示词生成能力"""
+ self._ensure_available()
+ raise JiuwenSDKError("Jiuwen 提示词生成能力尚未实现")
diff --git a/backend/agents/agent_run_manager.py b/backend/agents/agent_run_manager.py
index 5f7920f17..83a05aa2a 100644
--- a/backend/agents/agent_run_manager.py
+++ b/backend/agents/agent_run_manager.py
@@ -1,69 +1,107 @@
-import logging
-import threading
-from typing import Dict
-
-from nexent.core.agents.agent_model import AgentRunInfo
-
-logger = logging.getLogger("agent_run_manager")
-
-
-class AgentRunManager:
- _instance = None
- _lock = threading.Lock()
-
- def __new__(cls):
- if cls._instance is None:
- with cls._lock:
- if cls._instance is None:
- cls._instance = super(AgentRunManager, cls).__new__(cls)
- cls._instance._initialized = False
- return cls._instance
-
- def __init__(self):
- if not self._initialized:
- # user_id:conversation_id -> agent_run_info
- self.agent_runs: Dict[str, AgentRunInfo] = {}
- self._initialized = True
-
- def _get_run_key(self, conversation_id: int, user_id: str) -> str:
- """Generate unique key for agent run using user_id and conversation_id"""
- return f"{user_id}:{conversation_id}"
-
- def register_agent_run(self, conversation_id: int, agent_run_info, user_id: str):
- """register agent run instance"""
- with self._lock:
- run_key = self._get_run_key(conversation_id, user_id)
- self.agent_runs[run_key] = agent_run_info
- logger.info(
- f"register agent run instance, user_id: {user_id}, conversation_id: {conversation_id}")
-
- def unregister_agent_run(self, conversation_id: int, user_id: str):
- """unregister agent run instance"""
- with self._lock:
- run_key = self._get_run_key(conversation_id, user_id)
- if run_key in self.agent_runs:
- del self.agent_runs[run_key]
- logger.info(
- f"unregister agent run instance, user_id: {user_id}, conversation_id: {conversation_id}")
- else:
- logger.info(
- f"no agent run instance found for user_id: {user_id}, conversation_id: {conversation_id}")
-
- def get_agent_run_info(self, conversation_id: int, user_id: str):
- """get agent run instance"""
- run_key = self._get_run_key(conversation_id, user_id)
- return self.agent_runs.get(run_key)
-
- def stop_agent_run(self, conversation_id: int, user_id: str) -> bool:
- """stop agent run for specified conversation_id and user_id"""
- agent_run_info = self.get_agent_run_info(conversation_id, user_id)
- if agent_run_info is not None:
- agent_run_info.stop_event.set()
- logger.info(
- f"agent run stopped, user_id: {user_id}, conversation_id: {conversation_id}")
- return True
- return False
-
-
-# create singleton instance
-agent_run_manager = AgentRunManager()
+import logging
+import threading
+from typing import Dict, Union
+
+from nexent.core.agents.agent_model import AgentRunInfo
+from nexent.core.agents.agent_context import ContextManager, ContextManagerConfig
+
+logger = logging.getLogger("agent_run_manager")
+
+
+class AgentRunManager:
+ _instance = None
+ _lock = threading.Lock()
+
+ def __new__(cls):
+ if cls._instance is None:
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = super(AgentRunManager, cls).__new__(cls)
+ cls._instance._initialized = False
+ return cls._instance
+
+ def __init__(self):
+ if not self._initialized:
+ # user_id:conversation_id -> agent_run_info
+ self.agent_runs: Dict[str, AgentRunInfo] = {}
+ # conversation_id -> ContextManager (conversation-level lifetime)
+ self._conversation_context_managers: Dict[str, ContextManager] = {}
+ # conversation_id -> active run count for safe cleanup
+ self._conversation_run_counts: Dict[str, int] = {}
+ self._initialized = True
+
+ def _get_run_key(self, conversation_id: Union[int, str], user_id: str) -> str:
+ """Generate unique key for agent run using user_id and conversation_id"""
+ return f"{user_id}:{conversation_id}"
+
+ def register_agent_run(self, conversation_id: Union[int, str], agent_run_info, user_id: str):
+ """register agent run instance"""
+ with self._lock:
+ run_key = self._get_run_key(conversation_id, user_id)
+ self.agent_runs[run_key] = agent_run_info
+ conv_key = str(conversation_id)
+ self._conversation_run_counts[conv_key] = self._conversation_run_counts.get(conv_key, 0) + 1
+ logger.info(
+ f"register agent run instance, user_id: {user_id}, conversation_id: {conversation_id}")
+
+ def unregister_agent_run(self, conversation_id: Union[int, str], user_id: str):
+ """unregister agent run instance"""
+ with self._lock:
+ run_key = self._get_run_key(conversation_id, user_id)
+ if run_key in self.agent_runs:
+ del self.agent_runs[run_key]
+ conv_key = str(conversation_id)
+ self._conversation_run_counts[conv_key] = max(
+ 0, self._conversation_run_counts.get(conv_key, 0) - 1
+ )
+ logger.info(
+ f"unregister agent run instance, user_id: {user_id}, conversation_id: {conversation_id}")
+ else:
+ logger.info(
+ f"no agent run instance found for user_id: {user_id}, conversation_id: {conversation_id}")
+
+ def get_agent_run_info(self, conversation_id: Union[int, str], user_id: str):
+ """get agent run instance"""
+ run_key = self._get_run_key(conversation_id, user_id)
+ return self.agent_runs.get(run_key)
+
+ def stop_agent_run(self, conversation_id: Union[int, str], user_id: str) -> bool:
+ """stop agent run for specified conversation_id and user_id"""
+ agent_run_info = self.get_agent_run_info(conversation_id, user_id)
+ if agent_run_info is not None:
+ agent_run_info.stop_event.set()
+ logger.info(
+ f"agent run stopped, user_id: {user_id}, conversation_id: {conversation_id}")
+ return True
+ return False
+
+ def get_or_create_context_manager(
+ self,
+ conversation_id: Union[int, str],
+ config: ContextManagerConfig,
+ max_steps: int
+ ) -> ContextManager:
+ """Get or create a conversation-level ContextManager instance."""
+ conv_key = str(conversation_id)
+ with self._lock:
+ cm = self._conversation_context_managers.get(conv_key)
+ if cm is None:
+ cm = ContextManager(config=config, max_steps=max_steps)
+ self._conversation_context_managers[conv_key] = cm
+ logger.info(
+ f"Created new ContextManager for conversation_id: {conv_key}")
+ return cm
+
+ def clear_conversation_context_manager(self, conversation_id: Union[int, str]):
+ """Explicitly clear the ContextManager for a conversation."""
+ conv_key = str(conversation_id)
+ with self._lock:
+ cm = self._conversation_context_managers.pop(conv_key, None)
+ self._conversation_run_counts.pop(conv_key, None)
+ if cm:
+ logger.info(
+ f"Cleared ContextManager for conversation_id: {conv_key}")
+
+
+# create singleton instance
+agent_run_manager = AgentRunManager()
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
index ea3ba24e8..7e3b42e28 100644
--- a/backend/agents/create_agent_info.py
+++ b/backend/agents/create_agent_info.py
@@ -1,41 +1,130 @@
+import json
import threading
import logging
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
from urllib.parse import urljoin
-from datetime import datetime
from jinja2 import Template, StrictUndefined
from nexent.core.utils.observer import MessageObserver
-from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig, ExternalA2AAgentConfig
+from nexent.core.agents.agent_model import AgentRunInfo, ModelConfig, AgentConfig, ToolConfig, ExternalA2AAgentConfig, AgentHistory, AgentVerificationConfig
+from nexent.core.agents.agent_context import ContextManagerConfig
from nexent.memory.memory_service import search_memory_in_levels
-from services.file_management_service import get_llm_model
+from services.file_management_service import get_llm_model, validate_urls_access
from services.vectordatabase_service import (
ElasticSearchService,
get_vector_db_core,
- get_embedding_model,
+ get_embedding_model_by_index_name,
get_rerank_model,
)
from services.remote_mcp_service import get_remote_mcp_server_list
from database.a2a_agent_db import PROTOCOL_JSONRPC
from services.memory_config_service import build_memory_context
-from services.image_service import get_vlm_model
-from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
+from services.image_service import get_video_understanding_model, get_vlm_model
+from database.agent_db import (
+ search_agent_info_by_agent_id,
+ query_sub_agent_relations,
+ resolve_sub_agent_version_no,
+)
from database.agent_version_db import query_current_version_no
from database.tool_db import search_tools_for_sub_agent
from database.model_management_db import get_model_records, get_model_by_model_id
+from database.knowledge_db import get_knowledge_name_map_by_index_names
from database.client import minio_client
from utils.model_name_utils import add_repo_to_name
from utils.prompt_template_utils import get_agent_prompt_template
from utils.config_utils import tenant_config_manager, get_model_name_from_config
-from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE
-import re
+from utils.context_utils import build_context_components
+from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE, MINIO_DEFAULT_BUCKET
+from consts.model import AgentToolParamsRequest, ToolParamsRequest
+from consts.exceptions import ValidationError
logger = logging.getLogger("create_agent_info")
logger.setLevel(logging.DEBUG)
+def _normalize_tool_params_request(tool_params: Optional[ToolParamsRequest | Dict[str, Any]]) -> ToolParamsRequest:
+ """Normalize request-scoped tool parameter overrides into a ToolParamsRequest."""
+ if tool_params is None:
+ return ToolParamsRequest()
+ if isinstance(tool_params, ToolParamsRequest):
+ return tool_params
+ if not isinstance(tool_params, dict):
+ raise ValidationError("tool_params must be an object.")
+ try:
+ return ToolParamsRequest.model_validate(tool_params)
+ except Exception as exc:
+ raise ValidationError(f"Invalid tool_params payload: {exc}") from exc
+
+
+def _get_agent_tool_overrides(
+ tool_params: Optional[ToolParamsRequest],
+ agent_name: Optional[str],
+) -> Dict[str, Dict[str, Any]]:
+ """Resolve tool overrides for a specific agent by its name."""
+ if tool_params is None:
+ return {}
+ if not agent_name:
+ return {}
+ agent_override = tool_params.agents.get(agent_name)
+ if agent_override is None:
+ return {}
+ return dict(agent_override.tools)
+
+
+def _merge_tool_params(
+ tool_record: Dict[str, Any],
+ override_params: Optional[Dict[str, Any]],
+ extra_params: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+ """Merge request overrides on top of tool instance defaults from DB.
+
+ Args:
+ tool_record: Tool configuration from database
+ override_params: Request-scoped overrides from tool_params
+ extra_params: Additional internal params not in DB schema (e.g., document_paths)
+
+ Returns:
+ Merged params dict with DB defaults, overrides, and extra params
+ """
+ merged_params: Dict[str, Any] = {}
+ for param in tool_record.get("params", []):
+ merged_params[param["name"]] = param.get("default")
+
+ if override_params:
+ merged_params.update(override_params)
+
+ # Extra params (e.g., internal access control params) always take precedence
+ if extra_params:
+ merged_params.update(extra_params)
+
+ return merged_params
+
+
+def _build_internal_s3_url(file: dict) -> str:
+ """Build a valid S3 URL for internal tools from uploaded file metadata."""
+ if not isinstance(file, dict):
+ return ""
+
+ object_name = str(file.get("object_name") or "").strip().lstrip("/")
+ if object_name:
+ bucket = MINIO_DEFAULT_BUCKET or "nexent"
+ return f"s3://{bucket}/{object_name}"
+
+ url = str(file.get("url") or "").strip()
+ if not url or url.startswith("blob:") or url.startswith("s3:/blob:"):
+ return ""
+
+ if url.startswith("s3://"):
+ return url
+
+ if url.startswith("s3:/"):
+ return "s3://" + url.replace("s3:/", "", 1).lstrip("/")
+
+ return "s3:/" + url
+
+
def _get_skills_for_template(
agent_id: int,
tenant_id: str,
@@ -245,7 +334,9 @@ async def create_model_config_list(tenant_id):
),
url=record["base_url"],
ssl_verify=record.get("ssl_verify", True),
- model_factory=record.get("model_factory")))
+ model_factory=record.get("model_factory"),
+ timeout_seconds=record.get("timeout_seconds"),
+ concurrency_limit=record.get("concurrency_limit")))
# fit for old version, main_model and sub_model use default model
main_model_config = tenant_config_manager.get_model_config(
key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
@@ -256,7 +347,9 @@ async def create_model_config_list(tenant_id):
"model_name") else "",
url=main_model_config.get("base_url", ""),
ssl_verify=main_model_config.get("ssl_verify", True),
- model_factory=main_model_config.get("model_factory")))
+ model_factory=main_model_config.get("model_factory"),
+ timeout_seconds=main_model_config.get("timeout_seconds"),
+ concurrency_limit=main_model_config.get("concurrency_limit")))
model_list.append(
ModelConfig(cite_name="sub_model",
api_key=main_model_config.get("api_key", ""),
@@ -264,7 +357,9 @@ async def create_model_config_list(tenant_id):
"model_name") else "",
url=main_model_config.get("base_url", ""),
ssl_verify=main_model_config.get("ssl_verify", True),
- model_factory=main_model_config.get("model_factory")))
+ model_factory=main_model_config.get("model_factory"),
+ timeout_seconds=main_model_config.get("timeout_seconds"),
+ concurrency_limit=main_model_config.get("concurrency_limit")))
return model_list
@@ -278,18 +373,23 @@ async def create_agent_config(
allow_memory_search: bool = True,
version_no: int = 0,
override_model_id: int | None = None,
+ tool_params: Optional[ToolParamsRequest | Dict[str, Any]] = None,
):
+ normalized_tool_params = _normalize_tool_params_request(tool_params)
agent_info = search_agent_info_by_agent_id(
agent_id=agent_id, tenant_id=tenant_id, version_no=version_no)
# create sub agent
- sub_agent_id_list = query_sub_agents_id_list(
+ sub_agent_relations = query_sub_agent_relations(
main_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no)
managed_agents = []
- for sub_agent_id in sub_agent_id_list:
- # Get the current published version for this sub-agent (from draft version 0)
- sub_agent_version_no = query_current_version_no(
- agent_id=sub_agent_id, tenant_id=tenant_id) or 0
+ for rel in sub_agent_relations:
+ sub_agent_id = rel['selected_agent_id']
+ sub_agent_version_no = resolve_sub_agent_version_no(
+ selected_agent_id=sub_agent_id,
+ selected_agent_version_no=rel.get('selected_agent_version_no'),
+ tenant_id=tenant_id,
+ )
sub_agent_config = await create_agent_config(
agent_id=sub_agent_id,
tenant_id=tenant_id,
@@ -299,13 +399,20 @@ async def create_agent_config(
allow_memory_search=allow_memory_search,
version_no=sub_agent_version_no,
override_model_id=None,
+ tool_params=normalized_tool_params,
)
managed_agents.append(sub_agent_config)
# create external A2A agents (synchronous function, no await needed)
external_a2a_agents = _get_external_a2a_agents(agent_id, tenant_id, version_no)
- tool_list = await create_tool_config_list(agent_id, tenant_id, user_id, version_no=version_no)
+ tool_list = await create_tool_config_list(
+ agent_id,
+ tenant_id,
+ user_id,
+ version_no=version_no,
+ tool_params=normalized_tool_params,
+ )
# Build system prompt: prioritize segmented fields, fallback to original prompt field if not available
duty_prompt = agent_info.get("duty_prompt", "")
@@ -351,6 +458,77 @@ async def create_agent_config(
# Bubble up to streaming layer so it can emit and fall back
raise Exception(f"Failed to retrieve memory list: {e}")
+ # Append active memory tools if memory is enabled
+ if memory_context.user_config.memory_switch and memory_context.memory_config:
+ try:
+ memory_metadata = {
+ "memory_config": memory_context.memory_config,
+ "memory_user_config": memory_context.user_config,
+ "tenant_id": memory_context.tenant_id,
+ "user_id": memory_context.user_id,
+ "agent_id": memory_context.agent_id,
+ }
+
+ store_tool_config = ToolConfig(
+ class_name="StoreMemoryTool",
+ name="store_memory",
+ description=(
+ "Save important information to long-term memory for future recall. "
+ "Use this when the user shares personal preferences, facts about themselves, "
+ "project context, or instructions that should persist across conversations. "
+ "Do NOT store transient information like temporary calculations, information "
+ "already in the knowledge base, or data the user explicitly says to forget."
+ ),
+ inputs=json.dumps({
+ "content": {
+ "type": "string",
+ "description": "The information to remember",
+ "description_zh": "需要记住的信息"
+ }
+ }, ensure_ascii=False),
+ output_type="string",
+ params={},
+ source="local",
+ usage=None,
+ metadata=memory_metadata,
+ )
+ tool_list.append(store_tool_config)
+
+ search_tool_config = ToolConfig(
+ class_name="SearchMemoryTool",
+ name="search_memory",
+ description=(
+ "Search long-term memory for relevant information from previous interactions. "
+ "Use this when you need context about the user's preferences, past decisions, "
+ "or previously discussed topics that aren't in the current conversation. "
+ "The system already provides some memory context automatically -- use this tool "
+ "when you need to search for specific information not already available."
+ ),
+ inputs=json.dumps({
+ "query": {
+ "type": "string",
+ "description": "Natural language query describing what to search for",
+ "description_zh": "描述要搜索内容的自然语言查询"
+ },
+ "top_k": {
+ "type": "integer",
+ "description": "Maximum number of results to return",
+ "description_zh": "返回结果的最大数量",
+ "default": 5,
+ "nullable": True
+ }
+ }, ensure_ascii=False),
+ output_type="string",
+ params={},
+ source="local",
+ usage=None,
+ metadata=memory_metadata,
+ )
+ tool_list.append(search_tool_config)
+ logger.debug("Active memory tools appended to agent tool list")
+ except Exception as e:
+ logger.warning(f"Failed to append active memory tools: {e}")
+
# Build knowledge base summary
knowledge_base_summary = ""
try:
@@ -358,11 +536,15 @@ async def create_agent_config(
if "KnowledgeBaseSearchTool" == tool.class_name:
index_names = tool.params.get("index_names")
if index_names:
+ # Reuse the index_name -> display_name mapping from tool.metadata
+ # (already computed in create_tool_config_list to avoid redundant DB query)
+ index_name_to_display_map = tool.metadata.get("index_name_to_display_map", {}) if tool.metadata else {}
for index_name in index_names:
try:
+ display_name = index_name_to_display_map.get(index_name, index_name)
message = ElasticSearchService().get_summary(index_name=index_name)
summary = message.get("summary", "")
- knowledge_base_summary += f"**{index_name}**: {summary}\n\n"
+ knowledge_base_summary += f"**{display_name}**: {summary}\n\n"
except Exception as e:
logger.warning(
f"Failed to get summary for knowledge base {index_name}: {e}")
@@ -377,6 +559,8 @@ async def create_agent_config(
# Get skills list for prompt template
skills = _get_skills_for_template(agent_id, tenant_id, version_no)
+ is_manager = len(managed_agents) > 0 or len(external_a2a_agents) > 0
+
render_kwargs = {
"duty": duty_prompt,
"constraint": constraint_prompt,
@@ -389,17 +573,49 @@ async def create_agent_config(
"APP_DESCRIPTION": app_description,
"memory_list": memory_list,
"knowledge_base_summary": knowledge_base_summary,
- "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"user_id": user_id,
}
system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs)
model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id")
+ model_max_tokens = 10000
if model_id_to_use is not None:
model_info = get_model_by_model_id(model_id_to_use, tenant_id=tenant_id)
model_name = model_info["display_name"] if model_info is not None else "main_model"
+ if model_info is not None and model_info.get("max_tokens"):
+ model_max_tokens = model_info["max_tokens"]
else:
model_name = "main_model"
+
+ # Use agent-level setting for context management, default to False.
+ # When ContextManager is disabled, do not attach context_components because
+ # downstream runtime may prefer component-based prompt assembly over the
+ # rendered system_prompt, causing the actual model input to diverge from the
+ # template output.
+ enable_context_manager = agent_info.get("enable_context_manager", False)
+ context_components = []
+ if enable_context_manager:
+ context_components = build_context_components(
+ duty=duty_prompt,
+ constraint=constraint_prompt,
+ few_shots=few_shots_prompt,
+ app_name=app_name,
+ app_description=app_description,
+ user_id=user_id,
+ language=language,
+ is_manager=is_manager,
+ tools=render_kwargs["tools"],
+ skills=skills,
+ managed_agents=render_kwargs["managed_agents"],
+ external_a2a_agents=render_kwargs["external_a2a_agents"],
+ memory_list=memory_list,
+ memory_search_query=last_user_query,
+ knowledge_base_summary=knowledge_base_summary,
+ )
+ cm_config = ContextManagerConfig(
+ enabled=enable_context_manager,
+ token_threshold=model_max_tokens,
+ )
agent_config = AgentConfig(
name="undefined" if agent_info["name"] is None else agent_info["name"],
description="undefined" if agent_info["description"] is None else agent_info["description"],
@@ -410,26 +626,55 @@ async def create_agent_config(
agent_id=agent_id
),
tools=tool_list + _get_skill_script_tools(agent_id, tenant_id, version_no),
- max_steps=agent_info.get("max_steps", 10),
+ max_steps=agent_info.get("max_steps", 15),
model_name=model_name,
provide_run_summary=agent_info.get("provide_run_summary", False),
managed_agents=managed_agents,
- external_a2a_agents=external_a2a_agents
+ external_a2a_agents=external_a2a_agents,
+ context_manager_config=cm_config,
+ context_components=context_components,
+ verification_config=AgentVerificationConfig.model_validate(agent_info.get("verification_config") or {}),
)
return agent_config
-async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int = 0):
- # create tool
+async def create_tool_config_list(
+ agent_id,
+ tenant_id,
+ user_id,
+ version_no: int = 0,
+ tool_params: Optional[ToolParamsRequest | Dict[str, Any]] = None,
+):
tool_config_list = []
langchain_tools = await discover_langchain_tools()
+ normalized_tool_params = _normalize_tool_params_request(tool_params)
# now only admin can modify the agent, user_id is not used
tools_list = search_tools_for_sub_agent(agent_id, tenant_id, version_no=version_no)
+
+ # Look up agent name for use in error messages.
+ # Agent name is optional for tool_params matching (matching uses tool identifiers only),
+ # but we include it in error messages so callers can identify which agent/tool caused a failure.
+ agent_info = search_agent_info_by_agent_id(agent_id=agent_id, tenant_id=tenant_id, version_no=version_no)
+ agent_name = agent_info.get("name") if agent_info else None
+ agent_tool_overrides = _get_agent_tool_overrides(normalized_tool_params, agent_name)
+
+ tool_keys_seen = set()
for tool in tools_list:
- param_dict = {}
- for param in tool.get("params", []):
- param_dict[param["name"]] = param.get("default")
+ tool_identifier = tool.get("name") or tool.get("class_name")
+ if tool_identifier in tool_keys_seen:
+ raise ValidationError(
+ f"Duplicate tool identifier '{tool_identifier}' found in agent '{agent_name or agent_id}'."
+ )
+ tool_keys_seen.add(tool_identifier)
+
+ override_params = None
+ if tool.get("name") in agent_tool_overrides:
+ override_params = agent_tool_overrides[tool.get("name")]
+ elif tool.get("class_name") in agent_tool_overrides:
+ override_params = agent_tool_overrides[tool.get("class_name")]
+
+ param_dict = _merge_tool_params(tool, override_params)
tool_config = ToolConfig(
class_name=tool.get("class_name"),
name=tool.get("name"),
@@ -448,24 +693,62 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int
tool_config.metadata = langchain_tool
break
+ # Extract document_paths for KnowledgeBaseSearchTool (internal access control, not in DB schema)
+ document_paths = None
+ if override_params and "document_paths" in override_params:
+ document_paths = override_params.get("document_paths")
+ # Also check using the tool name as key
+ if not document_paths:
+ kb_overrides = agent_tool_overrides.get("knowledge_base_search")
+ if kb_overrides and "document_paths" in kb_overrides:
+ document_paths = kb_overrides.get("document_paths")
+
# special logic for search tools that may use reranking models
if tool_config.class_name == "KnowledgeBaseSearchTool":
- rerank = param_dict.get("rerank", False)
- rerank_model_name = param_dict.get("rerank_model_name", "")
+ rerank = tool_config.params.get("rerank", False)
+ rerank_model_name = tool_config.params.get("rerank_model_name", "")
rerank_model = None
if rerank and rerank_model_name:
rerank_model = get_rerank_model(
tenant_id=tenant_id, model_name=rerank_model_name
)
+ # Build display_name to index_name mapping for LLM parameter conversion
+ # Also build reverse mapping (index_name -> display_name) for knowledge_base_summary
+ index_names = tool_config.params.get("index_names", [])
+ display_name_to_index_map = {}
+ index_name_to_display_map = {}
+ if index_names:
+ knowledge_name_map = get_knowledge_name_map_by_index_names(index_names)
+ # Reverse the mapping: display_name (knowledge_name) -> index_name
+ for idx_name, kb_name in knowledge_name_map.items():
+ display_name_to_index_map[kb_name] = idx_name
+ index_name_to_display_map[idx_name] = kb_name
+
tool_config.metadata = {
"vdb_core": get_vector_db_core(),
- "embedding_model": get_embedding_model(tenant_id=tenant_id),
+ "embedding_model": None,
"rerank_model": rerank_model,
+ "display_name_to_index_map": display_name_to_index_map,
+ "index_name_to_display_map": index_name_to_display_map,
+ # Internal access control: restrict results to specific document paths (path_or_urls)
+ "document_paths": document_paths,
}
+
+ if not index_names:
+ raise ValidationError(
+ f"[{agent_name or agent_id}] knowledge_base_search tool requires index_names, "
+ f"but it is not configured in the agent and not provided via tool_params.")
+
+ embedding_model, _, _ = get_embedding_model_by_index_name(tenant_id, index_names[0])
+ if not embedding_model:
+ raise ValidationError(
+ f"No embedding model found for index '{index_names[0]}'. "
+ f"Please configure an embedding model for this knowledge base.")
+ tool_config.metadata["embedding_model"] = embedding_model
elif tool_config.class_name in ["DifySearchTool", "DataMateSearchTool"]:
- rerank = param_dict.get("rerank", False)
- rerank_model_name = param_dict.get("rerank_model_name", "")
+ rerank = tool_config.params.get("rerank", False)
+ rerank_model_name = tool_config.params.get("rerank_model_name", "")
rerank_model = None
if rerank and rerank_model_name:
rerank_model = get_rerank_model(
@@ -479,12 +762,21 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int
tool_config.metadata = {
"llm_model": get_llm_model(tenant_id=tenant_id),
"storage_client": minio_client,
- "data_process_service_url": DATA_PROCESS_SERVICE
+ "data_process_service_url": DATA_PROCESS_SERVICE,
+ "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
}
elif tool_config.class_name == "AnalyzeImageTool":
tool_config.metadata = {
+ # get_vlm_model reads the first multimodal slot, now shown as image understanding.
"vlm_model": get_vlm_model(tenant_id=tenant_id),
"storage_client": minio_client,
+ "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
+ }
+ elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]:
+ tool_config.metadata = {
+ "vlm_model": get_video_understanding_model(tenant_id=tenant_id),
+ "storage_client": minio_client,
+ "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
}
tool_config_list.append(tool_config)
@@ -552,20 +844,167 @@ async def prepare_prompt_templates(
return prompt_templates
-async def join_minio_file_description_to_query(minio_files, query):
+async def join_minio_file_description_to_query(
+ minio_files,
+ query,
+ history=None,
+ max_files: int = 50,
+ max_chars: int = 10000,
+):
+ """
+ Join MinIO file descriptions to the user query.
+
+ This function formats uploaded file information into a structured description
+ that includes both S3 URL (for internal tools) and presigned_url (for external MCP tools).
+ It processes files from both the current message and historical messages.
+
+ De-duplication is performed using the file URL as the unique key. A maximum
+ file count and total character limit are enforced to prevent prompt bloat.
+
+ Args:
+ minio_files: List of file info dicts from current message upload
+ query: Original user query
+ history: Optional list of historical message dicts, each may contain minio_files
+ max_files: Maximum number of files to include (default 50)
+ max_chars: Maximum total characters for file descriptions (default 10000)
+
+ Returns:
+ Modified query with file descriptions appended
+ """
final_query = query
+ seen_urls: set[str] = set()
+ all_files: list[dict] = []
+
+ # Collect files from current message first (higher priority)
if minio_files and isinstance(minio_files, list):
- file_descriptions = []
for file in minio_files:
- if isinstance(file, dict) and "url" in file and file["url"] and "name" in file and file["name"]:
- file_descriptions.append(f"File name: {file['name']}, S3 URL: s3:/{file['url']}")
+ if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")):
+ s3_url = _build_internal_s3_url(file)
+ if not s3_url:
+ continue
+ if s3_url not in seen_urls:
+ seen_urls.add(s3_url)
+ all_files.append(file)
+
+ # Collect files from historical messages (lower priority, already-deduped)
+ if history and isinstance(history, list):
+ for msg in history:
+ if isinstance(msg, dict) and msg.get("minio_files"):
+ for file in msg["minio_files"]:
+ if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")):
+ s3_url = _build_internal_s3_url(file)
+ if not s3_url:
+ continue
+ if s3_url not in seen_urls:
+ seen_urls.add(s3_url)
+ all_files.append(file)
+
+ # Enforce file count limit (keep most recent files by truncating from the end)
+ if len(all_files) > max_files:
+ all_files = all_files[:max_files]
+ logger.debug(f"File list truncated from {len(all_files)} to {max_files} files")
+
+ if all_files:
+ file_descriptions: list[str] = []
+ # Calculate fixed overhead that is added only once
+ prefix = "User uploaded files. The file information is as follows:\n"
+ suffix = f"\n\nUser wants to answer questions based on the information in the above files: {query}"
+ fixed_overhead = len(prefix) + len(suffix)
+
+ for i, file in enumerate(all_files):
+ s3_url = _build_internal_s3_url(file)
+ presigned_url = file.get("presigned_url", "")
+
+ # Build description with both URLs
+ if presigned_url:
+ desc = (
+ f"File name: {file['name']}\n"
+ f"- S3 URL: {s3_url} [for tools WITHOUT [MCP] prefix, like analyze_text_file]\n"
+ f"- presigned_url: {presigned_url} [for tools WITH [MCP] prefix]"
+ )
+ else:
+ desc = f"File name: {file['name']}, S3 URL: {s3_url} [permanent]"
+
+ # Calculate total length if we include this description
+ # Each description after the first adds 2 chars for \n\n separator
+ separator_chars = 2 if i > 0 else 0
+ total_len = sum(len(d) for d in file_descriptions) + len(desc) + separator_chars + fixed_overhead
+
+ # Check if adding this description would exceed the character limit
+ if total_len > max_chars:
+ logger.debug(
+ f"File descriptions truncated at {len(file_descriptions)} files "
+ f"to stay within {max_chars} character limit"
+ )
+ break
+
+ file_descriptions.append(desc)
+
if file_descriptions:
- final_query = "User uploaded files. The file information is as follows:\n"
- final_query += "\n".join(file_descriptions) + "\n\n"
- final_query += f"User wants to answer questions based on the information in the above files: {query}"
+ final_query = prefix + "\n\n".join(file_descriptions) + suffix
+
return final_query
+def _format_minio_files_for_content(minio_files: Optional[List[dict]], max_files: int = 20) -> str:
+ """Format minio_files into a string for embedding in history content.
+
+ Args:
+ minio_files: List of file info dicts
+ max_files: Maximum number of files to include per message
+
+ Returns:
+ Formatted string describing the files, or empty string if no files
+ """
+ if not minio_files or not isinstance(minio_files, list):
+ return ""
+
+ file_lines = []
+ for i, file in enumerate(minio_files):
+ if i >= max_files:
+ file_lines.append(f" - ... (and {len(minio_files) - max_files} more files)")
+ break
+ if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")):
+ s3_url = _build_internal_s3_url(file)
+ if not s3_url:
+ continue
+ presigned_url = file.get("presigned_url", "")
+ if presigned_url:
+ file_lines.append(
+ f" - {file['name']}: {s3_url} (for non-MCP tools), presigned_url: {presigned_url} (for [MCP] tools)"
+ )
+ else:
+ file_lines.append(f" - {file['name']}: {s3_url}")
+
+ if not file_lines:
+ return ""
+
+ return "\n[Attached files]:\n" + "\n".join(file_lines)
+
+
+def _convert_history_with_minio_files(history: List) -> Optional[List[AgentHistory]]:
+ """Convert HistoryItem list to AgentHistory list, embedding minio_files into content.
+
+ Args:
+ history: List of HistoryItem from API
+
+ Returns:
+ List of AgentHistory with file info embedded in content, or None if history is None
+ """
+ if history is None:
+ return None
+
+ result = []
+ for item in history:
+ content = item.content
+ if item.minio_files:
+ file_info = _format_minio_files_for_content(item.minio_files)
+ if file_info:
+ content = content + file_info if content else file_info
+ result.append(AgentHistory(role=item.role, content=content))
+ return result
+
+
def filter_mcp_servers_and_tools(input_agent_config: AgentConfig, mcp_info_dict) -> list:
"""
Filter mcp servers and tools, only keep the actual used mcp servers
@@ -603,6 +1042,7 @@ async def create_agent_run_info(
is_debug: bool = False,
override_version_no: int | None = None,
override_model_id: int | None = None,
+ tool_params: Optional[ToolParamsRequest | Dict[str, Any]] = None,
):
# Determine which version_no to use based on is_debug flag
# If is_debug=false, use the current published version (current_version_no)
@@ -617,7 +1057,11 @@ async def create_agent_run_info(
version_no = 0
logger.info(f"Agent {agent_id} has no published version, using draft version 0")
- final_query = await join_minio_file_description_to_query(minio_files=minio_files, query=query)
+ final_query = await join_minio_file_description_to_query(
+ minio_files=minio_files,
+ query=query,
+ history=history
+ )
model_list = await create_model_config_list(tenant_id)
create_config_kwargs = {
"agent_id": agent_id,
@@ -631,7 +1075,7 @@ async def create_agent_run_info(
if override_model_id is not None:
create_config_kwargs["override_model_id"] = override_model_id
- agent_config = await create_agent_config(**create_config_kwargs)
+ agent_config = await create_agent_config(**create_config_kwargs, tool_params=tool_params)
remote_mcp_list = await get_remote_mcp_server_list(tenant_id=tenant_id, is_need_auth=True)
default_mcp_url = urljoin(LOCAL_MCP_SERVER, "sse")
@@ -646,7 +1090,7 @@ async def create_agent_run_info(
# Filter MCP servers and tools, and build mcp_host with authorization
used_mcp_urls = filter_mcp_servers_and_tools(agent_config, remote_mcp_dict)
- # Build mcp_host list with authorization tokens
+ # Build mcp_host list with authorization tokens and custom headers
mcp_host = []
for url in used_mcp_urls:
# Find the MCP record for this URL
@@ -661,22 +1105,30 @@ async def create_agent_run_info(
"url": url,
"transport": "sse" if url.endswith("/sse") else "streamable-http"
}
- # Add authorization if present
+ headers = {}
auth_token = mcp_record.get("authorization_token")
if auth_token:
- mcp_config["authorization"] = auth_token
+ headers["Authorization"] = auth_token
+ custom_headers = mcp_record.get("custom_headers")
+ if custom_headers and isinstance(custom_headers, dict):
+ headers.update(custom_headers)
+ if headers:
+ mcp_config["headers"] = headers
mcp_host.append(mcp_config)
else:
# Fallback to string format if record not found
mcp_host.append(url)
+ # Convert HistoryItem (from API) to AgentHistory (expected by SDK)
+ converted_history = _convert_history_with_minio_files(history)
+
agent_run_info = AgentRunInfo(
query=final_query,
model_config_list=model_list,
observer=MessageObserver(lang=language),
agent_config=agent_config,
mcp_host=mcp_host,
- history=history,
+ history=converted_history,
stop_event=threading.Event()
)
return agent_run_info
diff --git a/backend/agents/skill_creation_agent.py b/backend/agents/skill_creation_agent.py
index 3dc0cfa80..37c3ec2ad 100644
--- a/backend/agents/skill_creation_agent.py
+++ b/backend/agents/skill_creation_agent.py
@@ -86,7 +86,7 @@ def run_skill_creation_agent(
agent_run_thread(agent_run_info)
-def create_simple_skill_from_request(
+def create_skill_from_request(
system_prompt: str,
user_prompt: str,
model_config_list: List[ModelConfig],
diff --git a/backend/apps/a2a_client_app.py b/backend/apps/a2a_client_app.py
index db7acd108..ea149ac31 100644
--- a/backend/apps/a2a_client_app.py
+++ b/backend/apps/a2a_client_app.py
@@ -5,6 +5,7 @@
Used internally for configuring A2A sub-agents.
"""
import logging
+import uuid
from typing import Annotated, List, Optional
from http import HTTPStatus
@@ -45,6 +46,14 @@ class UpdateAgentProtocolRequest(BaseModel):
)
+class TestNacosConnectionRequest(BaseModel):
+ """Request to test Nacos connectivity without saving the config."""
+ nacos_addr: str = Field(description="Nacos server address (e.g., http://nacos-server:8848)")
+ nacos_username: Optional[str] = None
+ nacos_password: Optional[str] = None
+ namespace_id: Optional[str] = "public"
+
+
# =============================================================================
# External Agent Discovery
# =============================================================================
@@ -102,7 +111,7 @@ async def discover_from_nacos(
results = await a2a_client_service.discover_from_nacos(
nacos_config_id=request.nacos_config_id,
- agent_names=request.agent_names,
+ agent_names=[name.strip() for name in request.agent_names],
tenant_id=tenant_id,
user_id=user_id,
namespace=request.namespace
@@ -482,6 +491,17 @@ class CreateNacosConfigRequest(BaseModel):
description: Optional[str] = None
+class UpdateNacosConfigRequest(BaseModel):
+ """Request to update a Nacos config."""
+ name: Optional[str] = None
+ nacos_addr: Optional[str] = None
+ nacos_username: Optional[str] = None
+ nacos_password: Optional[str] = None
+ namespace_id: Optional[str] = None
+ description: Optional[str] = None
+ is_active: Optional[bool] = None
+
+
@router.post("/nacos-configs")
async def create_nacos_config(
request: CreateNacosConfigRequest,
@@ -577,6 +597,51 @@ async def get_nacos_config(
)
+@router.put("/nacos-configs/{config_id}")
+async def update_nacos_config(
+ config_id: str,
+ request: UpdateNacosConfigRequest,
+ authorization: Annotated[Optional[str], Header()] = None,
+ http_request: Request = None
+):
+ """Update a Nacos configuration."""
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ result = a2a_agent_db.update_nacos_config(
+ config_id=config_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=request.name,
+ nacos_addr=request.nacos_addr,
+ nacos_username=request.nacos_username,
+ nacos_password=request.nacos_password,
+ namespace_id=request.namespace_id,
+ description=request.description,
+ is_active=request.is_active
+ )
+
+ if not result:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=f"Nacos config {config_id} not found"
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": result}
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Update Nacos config failed: {e}", exc_info=True)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to update Nacos config"
+ )
+
+
@router.delete("/nacos-configs/{config_id}")
async def delete_nacos_config(
config_id: str,
@@ -610,6 +675,62 @@ async def delete_nacos_config(
)
+@router.post("/nacos-configs/test-connection")
+async def test_nacos_connection(
+ request: TestNacosConnectionRequest,
+ authorization: Annotated[Optional[str], Header()] = None,
+ http_request: Request = None
+):
+ """Test connectivity to Nacos server without saving the configuration."""
+ from utils.nacos_client import NacosClient, NacosConnectionError
+
+ try:
+ get_current_user_info(authorization, http_request)
+
+ async with NacosClient(
+ nacos_addr=request.nacos_addr,
+ username=request.nacos_username,
+ password=request.nacos_password
+ ) as client:
+ result = await client.test_connectivity(namespace=request.namespace_id or "public")
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "status": "success",
+ "data": {
+ "success": result["success"],
+ "message": result["message"]
+ }
+ }
+ )
+
+ except NacosConnectionError as e:
+ logger.warning(f"Nacos connection test failed: {e}")
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "status": "success",
+ "data": {
+ "success": False,
+ "message": str(e)
+ }
+ }
+ )
+ except Exception as e:
+ logger.error(f"Test Nacos connection failed: {e}", exc_info=True)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "status": "success",
+ "data": {
+ "success": False,
+ "message": f"Failed to test Nacos connection: {e}"
+ }
+ }
+ )
+
+
# =============================================================================
# External Agent Chat
# =============================================================================
@@ -648,11 +769,11 @@ async def chat_with_external_agent(
# Build A2A message format following A2A protocol with parts array
a2a_message = {
+ "message_id": f"msg_{uuid.uuid4().hex}",
"role": "ROLE_USER",
"parts": [
{
"text": request_body.message.strip(),
- "mediaType": "text/plain"
}
],
}
diff --git a/backend/apps/agent_app.py b/backend/apps/agent_app.py
index b4f932dc5..87abbf9e8 100644
--- a/backend/apps/agent_app.py
+++ b/backend/apps/agent_app.py
@@ -1,12 +1,17 @@
+import json
import logging
from http import HTTPStatus
from typing import Optional
from fastapi import APIRouter, Body, Header, HTTPException, Request, Query
from fastapi.encoders import jsonable_encoder
-from starlette.responses import JSONResponse
+from starlette.responses import JSONResponse, Response
+from consts.const import ASSET_OWNER_TENANT_ID
from consts.model import AgentRequest, AgentInfoRequest, AgentIDRequest, ConversationResponse, AgentImportRequest, AgentNameBatchCheckRequest, AgentNameBatchRegenerateRequest, VersionPublishRequest, VersionListResponse, VersionDetailResponse, VersionRollbackRequest, VersionStatusRequest, CurrentVersionResponse, VersionCompareRequest, VersionUpdateRequest
+from consts.exceptions import SkillDuplicateError
+from services.asset_owner_visibility import apply_agent_detail_prompt_visibility
+
from services.agent_service import (
get_agent_info_impl,
get_creating_sub_agent_info_impl,
@@ -22,6 +27,8 @@
get_agent_call_relationship_impl,
clear_agent_new_mark_impl,
get_agent_by_name_impl,
+ export_agent_with_skills_impl,
+ import_agent_with_skills_impl,
)
from services.agent_version_service import (
publish_version_impl,
@@ -38,9 +45,6 @@
)
from utils.auth_utils import get_current_user_info, get_current_user_id
-# Import monitoring utilities
-from utils.monitoring import monitoring_manager
-
agent_runtime_router = APIRouter(prefix="/agent")
agent_config_router = APIRouter(prefix="/agent")
logger = logging.getLogger("agent_app")
@@ -48,7 +52,6 @@
# Define API route
@agent_runtime_router.post("/run")
-@monitoring_manager.monitor_endpoint("agent.run", exclude_params=["authorization"])
async def agent_run_api(agent_request: AgentRequest, http_request: Request, authorization: str = Header(None)):
"""
Agent execution API endpoint
@@ -61,8 +64,11 @@ async def agent_run_api(agent_request: AgentRequest, http_request: Request, auth
)
except Exception as e:
logger.error(f"Agent run error: {str(e)}")
+ # Only expose actual error in debug mode for better diagnosis
+ # Keep generic message in normal mode for user experience
+ error_detail = str(e) if agent_request.is_debug else "Agent run error."
raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Agent run error.")
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=error_detail)
@agent_runtime_router.get("/stop/{conversation_id}")
@@ -85,12 +91,14 @@ async def search_agent_info_api(
"""
Search agent info by agent_id and version_no
version_no defaults to 0 (current/draft version)
+ Returns permission field indicating whether the user can edit this agent.
"""
try:
- _, auth_tenant_id = get_current_user_id(authorization)
+ user_id, auth_tenant_id = get_current_user_id(authorization)
# Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
effective_tenant_id = tenant_id or auth_tenant_id
- return await get_agent_info_impl(agent_id, effective_tenant_id, version_no)
+ agent_info = await get_agent_info_impl(agent_id, effective_tenant_id, version_no, user_id)
+ return apply_agent_detail_prompt_visibility(auth_tenant_id, agent_info)
except Exception as e:
logger.error(f"Agent search info error: {str(e)}")
raise HTTPException(
@@ -157,7 +165,8 @@ async def delete_agent_api(
Delete an agent
"""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request)
+ user_id, auth_tenant_id, _ = get_current_user_info(
+ authorization, http_request)
# Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
effective_tenant_id = tenant_id or auth_tenant_id
await delete_agent_impl(request.agent_id, effective_tenant_id, user_id)
@@ -171,11 +180,22 @@ async def delete_agent_api(
@agent_config_router.post("/export")
async def export_agent_api(request: AgentIDRequest, authorization: Optional[str] = Header(None)):
"""
- export an agent
+ export an agent.
+
+ Returns a ZIP file if the agent has skill instances, otherwise returns plain JSON.
+ The response Content-Type and body differ based on the agent's skill configuration.
"""
try:
- agent_info_str = await export_agent_impl(request.agent_id, authorization)
- return ConversationResponse(code=0, message="success", data=agent_info_str)
+ result = await export_agent_with_skills_impl(request.agent_id, authorization)
+ if isinstance(result, dict) and result.get("_zip"):
+ return Response(
+ content=result["data"],
+ media_type="application/zip",
+ headers={
+ "Content-Disposition": f"attachment; filename=\"{result.get('filename', 'agent_export.zip')}\""
+ }
+ )
+ return ConversationResponse(code=0, message="success", data=result)
except Exception as e:
logger.error(f"Agent export error: {str(e)}")
raise HTTPException(
@@ -185,15 +205,32 @@ async def export_agent_api(request: AgentIDRequest, authorization: Optional[str]
@agent_config_router.post("/import")
async def import_agent_api(request: AgentImportRequest, authorization: Optional[str] = Header(None)):
"""
- import an agent
+ import an agent.
+
+ Accepts both plain JSON (agent without skills) and JSON with embedded skill ZIPs
+ (agent with skills). The skills field, if present, should contain base64-encoded
+ ZIP packages for each skill.
"""
try:
- await import_agent_impl(
- request.agent_info,
- authorization,
- force_import=request.force_import
- )
+ if request.skills:
+ await import_agent_with_skills_impl(
+ request.agent_info,
+ request.skills,
+ authorization,
+ force_import=request.force_import
+ )
+ else:
+ await import_agent_impl(
+ request.agent_info,
+ authorization,
+ force_import=request.force_import
+ )
return {}
+ except SkillDuplicateError as exc:
+ raise HTTPException(status_code=409, detail={
+ "type": "skill_duplicate",
+ "duplicate_skills": exc.duplicate_names
+ })
except Exception as e:
logger.error(f"Agent import error: {str(e)}")
raise HTTPException(
@@ -256,10 +293,18 @@ async def list_all_agent_info_api(
list all agent info
"""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(authorization, request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
- effective_tenant_id = tenant_id or auth_tenant_id
- return await list_all_agent_info_impl(tenant_id=effective_tenant_id, user_id=user_id)
+ user_id, tenant_id, _ = get_current_user_info(
+ authorization, request)
+
+ agent_list = await list_all_agent_info_impl(
+ tenant_id=tenant_id, user_id=user_id
+ )
+ if tenant_id != ASSET_OWNER_TENANT_ID:
+ asset_agent_list = await list_all_agent_info_impl(
+ tenant_id=ASSET_OWNER_TENANT_ID, user_id=user_id
+ )
+ return agent_list + asset_agent_list
+ return agent_list
except Exception as e:
logger.error(f"Agent list error: {str(e)}")
raise HTTPException(
@@ -308,7 +353,8 @@ async def publish_version_api(
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logger.error(f"Publish version error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Publish version error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Publish version error.")
@agent_config_router.post("/{agent_id}/versions/compare")
@@ -333,7 +379,8 @@ async def compare_versions_api(
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logger.error(f"Compare versions error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Compare versions error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Compare versions error.")
@agent_config_router.get("/{agent_id}/versions", response_model=VersionListResponse)
@@ -344,14 +391,14 @@ async def get_version_list_api(
authorization: Optional[str] = Header(None),
request: Request = None
):
- """
+ """versions = session.query(AgentVersion)
Get version list for an agent
"""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(authorization, request)
+ _, auth_tenant_id, _ = get_current_user_info(
+ authorization, request)
# Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
effective_tenant_id = tenant_id or auth_tenant_id
- logger.info(f"Get version list for agent_id: {agent_id}, tenant_id: {effective_tenant_id}")
result = get_version_list_impl(
agent_id=agent_id,
tenant_id=effective_tenant_id,
@@ -360,7 +407,8 @@ async def get_version_list_api(
return JSONResponse(status_code=HTTPStatus.OK, content=jsonable_encoder(result))
except Exception as e:
logger.error(f"Get version list error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version list error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version list error.")
@agent_config_router.get("/{agent_id}/versions/{version_no}", response_model=VersionDetailResponse)
@@ -384,7 +432,9 @@ async def get_version_api(
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
except Exception as e:
logger.error(f"Get version detail error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.")
+
@agent_config_router.get("/{agent_id}/versions/{version_no}/detail", response_model=VersionDetailResponse)
async def get_version_detail_api(
@@ -407,7 +457,8 @@ async def get_version_detail_api(
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
except Exception as e:
logger.error(f"Get version detail error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get version detail error.")
@agent_config_router.post("/{agent_id}/versions/{version_no}/rollback")
@@ -417,9 +468,10 @@ async def rollback_version_api(
authorization: str = Header(None),
):
"""
- Rollback to a specific version by updating current_version_no only.
- This does NOT create a new version - the draft will point to the target version.
- Use the publish endpoint to create an actual new version after rollback.
+ Rollback to a specific version by restoring draft data from that version.
+ This copies the target version's snapshot (agent, tools, relations, skills)
+ into the draft (version_no=0) and updates current_version_no.
+ The user can then edit or re-publish from the restored state.
"""
try:
_, tenant_id = get_current_user_id(authorization)
@@ -433,7 +485,8 @@ async def rollback_version_api(
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logger.error(f"Rollback version error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Rollback version error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Rollback version error.")
@agent_config_router.patch("/{agent_id}/versions/{version_no}/status")
@@ -460,7 +513,8 @@ async def update_version_status_api(
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logger.error(f"Update version status error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version status error.")
+ raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Update version status error.")
@agent_config_router.put("/{agent_id}/versions/{version_no}")
@@ -488,7 +542,8 @@ async def update_version_api(
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logger.error(f"Update version error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Update version error.")
@agent_config_router.delete("/{agent_id}/versions/{version_no}")
@@ -513,7 +568,8 @@ async def delete_version_api(
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logger.error(f"Delete version error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Delete version error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Delete version error.")
@agent_config_router.get("/{agent_id}/current_version", response_model=CurrentVersionResponse)
@@ -535,7 +591,8 @@ async def get_current_version_api(
raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
except Exception as e:
logger.error(f"Get current version error: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get current version error.")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get current version error.")
@agent_config_router.get("/published_list")
@@ -548,10 +605,19 @@ async def list_published_agents_api(
"""
try:
user_id, tenant_id, _ = get_current_user_info(authorization, request)
- return await list_published_agents_impl(tenant_id=tenant_id, user_id=user_id)
+ agent_list = await list_published_agents_impl(
+ tenant_id=tenant_id, user_id=user_id
+ )
+ if tenant_id != ASSET_OWNER_TENANT_ID:
+ asset_agent_list = await list_published_agents_impl(
+ tenant_id=ASSET_OWNER_TENANT_ID, user_id=user_id
+ )
+ return agent_list + asset_agent_list
+ return agent_list
except Exception as e:
logger.error(f"Published agents list error: {str(e)}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Published agents list error."
)
+
diff --git a/backend/apps/agent_repository_app.py b/backend/apps/agent_repository_app.py
new file mode 100644
index 000000000..e9da2fde0
--- /dev/null
+++ b/backend/apps/agent_repository_app.py
@@ -0,0 +1,134 @@
+import logging
+from http import HTTPStatus
+from typing import Optional
+
+from fastapi import APIRouter, Body, Header, HTTPException, Query
+from starlette.responses import JSONResponse
+
+from consts.exceptions import SkillDuplicateError, UnauthorizedError
+from services.agent_repository_service import (
+ create_agent_repository_listing_impl,
+ import_agent_from_repository_impl,
+ list_agent_repository_listings_impl,
+ update_agent_repository_status_impl,
+)
+from utils.auth_utils import get_current_user_id
+
+agent_repository_router = APIRouter(prefix="/repository/agent")
+logger = logging.getLogger("agent_repository_app")
+
+
+@agent_repository_router.get("")
+async def list_agent_repository_listings_api(
+ status: Optional[str] = Query(None, description="Filter by listing status"),
+ authorization: str = Header(None),
+):
+ """List all marketplace repository listings with optional status filter."""
+ try:
+ get_current_user_id(authorization)
+ result = list_agent_repository_listings_impl(status=status)
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except UnauthorizedError as e:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except ValueError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"List agent repository listings error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="List agent repository listings error.",
+ )
+
+
+@agent_repository_router.patch("/{agent_repository_id}/status")
+async def update_agent_repository_status_api(
+ agent_repository_id: int,
+ status: str = Body(
+ ...,
+ embed=True,
+ description=(
+ "New status: NOT_SHARED (未共享) / PENDING_REVIEW (待审核) / "
+ "REJECTED (审核驳回) / SHARED (已共享)"
+ ),
+ ),
+ authorization: str = Header(None),
+):
+ """Update marketplace repository listing status (share, unshare, approve, reject)."""
+ try:
+ user_id, _ = get_current_user_id(authorization)
+ result = update_agent_repository_status_impl(
+ agent_repository_id=agent_repository_id,
+ status=status,
+ user_id=user_id,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except UnauthorizedError as e:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except ValueError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"Update agent repository status error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Update agent repository status error.",
+ )
+
+
+@agent_repository_router.post("/{agent_id}/versions/{version_no}")
+async def create_agent_repository_listing_api(
+ agent_id: int,
+ version_no: int,
+ authorization: str = Header(None),
+):
+ """Create or update a marketplace repository listing from an agent version snapshot."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ result = await create_agent_repository_listing_impl(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=version_no,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except UnauthorizedError as e:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except ValueError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"Create agent repository listing error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Create agent repository listing error.",
+ )
+
+
+@agent_repository_router.post("/{agent_repository_id}/import")
+async def import_agent_from_repository_api(
+ agent_repository_id: int,
+ authorization: Optional[str] = Header(None),
+):
+ """Import an agent tree from a marketplace repository listing into the current tenant."""
+ try:
+ await import_agent_from_repository_impl(
+ agent_repository_id=agent_repository_id,
+ authorization=authorization,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content={})
+ except UnauthorizedError as e:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except SkillDuplicateError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.CONFLICT,
+ detail={
+ "type": "skill_duplicate",
+ "duplicate_skills": exc.duplicate_names,
+ },
+ )
+ except ValueError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
+ except Exception as e:
+ logger.error(f"Import agent from repository error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Import agent from repository error.",
+ )
diff --git a/backend/apps/aidp_app.py b/backend/apps/aidp_app.py
new file mode 100644
index 000000000..eae9cb678
--- /dev/null
+++ b/backend/apps/aidp_app.py
@@ -0,0 +1,43 @@
+"""
+AIDP App Layer
+FastAPI endpoints for AIDP knowledge base list proxy.
+"""
+import logging
+from http import HTTPStatus
+from typing import Annotated
+
+from fastapi import APIRouter, Query
+from fastapi.responses import JSONResponse
+
+from consts.error_code import ErrorCode
+from consts.exceptions import AppException
+from services.aidp_service import fetch_aidp_knowledge_bases_impl
+
+router = APIRouter(prefix="/aidp")
+logger = logging.getLogger("aidp_app")
+
+
+@router.get("/knowledge-bases")
+async def fetch_aidp_knowledge_bases_api(
+ server_url: Annotated[str, Query(description="AIDP API server URL")],
+ api_key: Annotated[str, Query(description="AIDP API key")],
+ page: Annotated[int, Query(ge=1, description="Page number starting from 1")] = 1,
+ page_size: Annotated[int, Query(ge=1, le=100, description="Page size from 1 to 100")] = 20,
+) -> JSONResponse:
+ """Fetch paginated knowledge bases from the external AIDP API."""
+ try:
+ result = fetch_aidp_knowledge_bases_impl(
+ server_url=server_url,
+ api_key=api_key,
+ page=page,
+ page_size=page_size,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except AppException:
+ raise
+ except Exception as e:
+ logger.exception("Failed to fetch AIDP knowledge bases: %s", e)
+ raise AppException(
+ ErrorCode.AIDP_SERVICE_ERROR,
+ f"Failed to fetch AIDP knowledge bases: {str(e)}",
+ )
diff --git a/backend/apps/app_factory.py b/backend/apps/app_factory.py
index 219da5b82..02816cec1 100644
--- a/backend/apps/app_factory.py
+++ b/backend/apps/app_factory.py
@@ -101,6 +101,16 @@ async def generic_exception_handler(request, exc):
if isinstance(exc, AppException):
return await app_exception_handler(request, exc)
+ # Handle NexentCapabilityError with a friendly message
+ from adapters.exception import NexentCapabilityError as _NCE
+
+ if isinstance(exc, _NCE):
+ logger.warning(f"NexentCapabilityError: {exc}")
+ return JSONResponse(
+ status_code=400,
+ content={"message": str(exc)},
+ )
+
logger.error(f"Generic Exception: {exc}")
return JSONResponse(
status_code=500,
diff --git a/backend/apps/cas_app.py b/backend/apps/cas_app.py
new file mode 100644
index 000000000..dbf4815f8
--- /dev/null
+++ b/backend/apps/cas_app.py
@@ -0,0 +1,156 @@
+import html
+import logging
+from http import HTTPStatus
+from typing import Optional
+from urllib.parse import parse_qs, urlsplit
+
+from fastapi import APIRouter, HTTPException, Query, Request
+from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
+
+from services.cas_service import (
+ CAS_SERVER_URL,
+ CasAuthenticationError,
+ build_login_url,
+ build_renew_url,
+ get_cas_config,
+ login_with_ticket,
+ renew_with_ticket,
+ revoke_from_logout_request,
+)
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/user/cas", tags=["cas"])
+
+
+@router.get("/config")
+async def config():
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "success", "data": get_cas_config()},
+ )
+
+
+@router.get("/login")
+async def login(redirect: str = Query("/", description="URL to return to after login")):
+ try:
+ login_url = _require_cas_server_redirect(build_login_url(redirect))
+ return RedirectResponse(url=login_url, status_code=HTTPStatus.FOUND)
+ except CasAuthenticationError as exc:
+ logger.warning("CAS login rejected: %s", exc)
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail="CAS login is not available")
+
+
+@router.get("/callback")
+async def callback(ticket: str = "", redirect: str = "/"):
+ try:
+ result = await login_with_ticket(ticket, redirect)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "CAS login successful", "data": result},
+ )
+ except CasAuthenticationError as exc:
+ logger.warning("CAS callback rejected: %s", exc)
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="CAS authentication failed")
+ except Exception as exc:
+ logger.error(f"CAS callback failed: {exc}")
+ raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="CAS login failed")
+
+
+@router.post("/callback")
+async def callback_logout(request: Request, logout_request: Optional[str] = None):
+ return await _handle_logout_request(request, logout_request, endpoint="callback")
+
+
+@router.get("/renew")
+async def renew():
+ try:
+ return RedirectResponse(url=build_renew_url(), status_code=HTTPStatus.FOUND)
+ except CasAuthenticationError as exc:
+ logger.warning("CAS renew rejected: %s", exc)
+ return _renew_html(False, "CAS renew failed")
+
+
+@router.get("/renew_callback")
+async def renew_callback(ticket: str = ""):
+ if not ticket:
+ return _renew_html(False, "CAS session is not active")
+ try:
+ result = await renew_with_ticket(ticket)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "CAS renew successful", "data": result},
+ )
+ except Exception as exc:
+ logger.warning(f"CAS renew failed: {exc}")
+ return _renew_html(False, "CAS renew failed")
+
+
+@router.post("/logout_callback")
+async def logout_callback(
+ request: Request,
+ logout_request: Optional[str] = None,
+):
+ return await _handle_logout_request(request, logout_request, endpoint="logout_callback")
+
+
+async def _handle_logout_request(
+ request: Request,
+ logout_request: Optional[str] = None,
+ endpoint: str = "unknown",
+):
+ logout_request = await _extract_logout_request(request, logout_request)
+ logger.info(
+ "CAS SLO %s received logoutRequest: present=%s length=%s",
+ endpoint,
+ bool(logout_request),
+ len(logout_request or ""),
+ )
+ result = revoke_from_logout_request(logout_request)
+ logger.info("CAS SLO %s revoke result: %s", endpoint, result)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "success", "data": result},
+ )
+
+
+async def _extract_logout_request(request: Request, logout_request: Optional[str] = None) -> str:
+ if logout_request:
+ return logout_request
+
+ query_logout_request = request.query_params.get("logoutRequest") or request.query_params.get("logout_request")
+ if query_logout_request:
+ return query_logout_request
+
+ body = await request.body()
+ raw_body = body.decode("utf-8") if body else ""
+ if not raw_body:
+ return ""
+
+ parsed = parse_qs(raw_body)
+ return (parsed.get("logoutRequest") or parsed.get("logout_request") or [raw_body])[0]
+
+
+def _renew_html(success: bool, reason: str = "") -> HTMLResponse:
+ status = "success" if success else "failed"
+ safe_reason = html.escape(reason)
+ return HTMLResponse(
+ status_code=HTTPStatus.OK,
+ content=f"""
+""",
+ )
+
+
+def _require_cas_server_redirect(url: str) -> str:
+ parsed_url = urlsplit(url)
+ parsed_cas = urlsplit(CAS_SERVER_URL)
+ if (
+ parsed_url.scheme not in {"http", "https"}
+ or not parsed_url.netloc
+ or parsed_url.scheme != parsed_cas.scheme
+ or parsed_url.netloc != parsed_cas.netloc
+ ):
+ logger.warning("Blocked CAS redirect outside configured server: %s", url)
+ raise CasAuthenticationError("Invalid CAS redirect URL")
+ return url
diff --git a/backend/apps/config_app.py b/backend/apps/config_app.py
index fc6267555..9ffadfe5e 100644
--- a/backend/apps/config_app.py
+++ b/backend/apps/config_app.py
@@ -2,17 +2,23 @@
from apps.app_factory import create_app
from apps.agent_app import agent_config_router as agent_router
+from apps.agent_repository_app import agent_repository_router
from apps.config_sync_app import router as config_sync_router
from apps.datamate_app import router as datamate_router
from apps.vectordatabase_app import router as vectordatabase_router
from apps.dify_app import router as dify_router
from apps.idata_app import router as idata_router
-from apps.file_management_app import file_management_config_router as file_manager_router
+from apps.file_management_app import (
+ file_management_config_router as file_manager_router,
+)
from apps.image_app import router as proxy_router
from apps.knowledge_summary_app import router as summary_router
from apps.mock_user_management_app import router as mock_user_management_router
from apps.model_managment_app import router as model_manager_router
+from apps.oauth_app import router as oauth_router
from apps.prompt_app import router as prompt_router
+from apps.prompt_template_app import router as prompt_template_router
+from apps.mcp_management_app import router as mcp_management_router
from apps.remote_mcp_app import router as remote_mcp_router
from apps.skill_app import router as skill_router
from apps.tenant_config_app import router as tenant_config_router
@@ -24,8 +30,13 @@
from apps.user_app import router as user_router
from apps.invitation_app import router as invitation_router
from apps.a2a_client_app import router as a2a_client_router
+from apps.monitoring_app import router as monitoring_router
from apps.a2a_server_app import router as a2a_server_router
+from apps.haotian_app import router as haotian_router
+from apps.aidp_app import router as aidp_router
+from apps.cas_app import router as cas_router
from consts.const import IS_SPEED_MODE
+from services.prompt_template_service import sync_system_default_prompt_template
# Create logger instance
logger = logging.getLogger("base_app")
@@ -33,9 +44,20 @@
# Create FastAPI app with common configurations
app = create_app(title="Nexent Config API", description="Configuration APIs")
+
+@app.on_event("startup")
+async def sync_default_prompt_template_on_startup():
+ """Sync the YAML-backed system default prompt template into the database on startup."""
+ try:
+ sync_system_default_prompt_template()
+ logger.info("System default prompt template synced successfully.")
+ except Exception as exc:
+ logger.error(f"Failed to sync system default prompt template: {str(exc)}")
+
app.include_router(model_manager_router)
app.include_router(config_sync_router)
app.include_router(agent_router)
+app.include_router(agent_repository_router)
app.include_router(vectordatabase_router)
app.include_router(datamate_router)
app.include_router(voice_router)
@@ -44,6 +66,7 @@
app.include_router(tool_config_router)
app.include_router(dify_router)
app.include_router(idata_router)
+app.include_router(monitoring_router)
# Choose user management router based on IS_SPEED_MODE
if IS_SPEED_MODE:
@@ -53,14 +76,21 @@
logger.info("Normal mode - using real user management router")
app.include_router(user_management_router)
+app.include_router(oauth_router)
+app.include_router(cas_router)
+
app.include_router(summary_router)
app.include_router(prompt_router)
+app.include_router(prompt_template_router)
app.include_router(skill_router)
app.include_router(tenant_config_router)
+app.include_router(mcp_management_router)
app.include_router(remote_mcp_router)
app.include_router(tenant_router)
app.include_router(group_router)
app.include_router(user_router)
app.include_router(invitation_router)
app.include_router(a2a_client_router)
-app.include_router(a2a_server_router)
\ No newline at end of file
+app.include_router(a2a_server_router)
+app.include_router(haotian_router)
+app.include_router(aidp_router)
diff --git a/backend/apps/data_process_app.py b/backend/apps/data_process_app.py
index 9138d5ef1..693eb987e 100644
--- a/backend/apps/data_process_app.py
+++ b/backend/apps/data_process_app.py
@@ -204,9 +204,14 @@ async def get_index_tasks(index_name: str):
Returns tasks that are being processed or waiting to be processed
"""
+ import time
+ start = time.time()
try:
- return await service.get_index_tasks(index_name)
+ result = await service.get_index_tasks(index_name)
+ logger.info(f"[get_index_tasks] index={index_name}, tasks={len(result)}, duration={time.time()-start:.3f}s")
+ return result
except Exception as e:
+ logger.error(f"[get_index_tasks] error: {e}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e))
diff --git a/backend/apps/file_management_app.py b/backend/apps/file_management_app.py
index 50224c952..427bde6f3 100644
--- a/backend/apps/file_management_app.py
+++ b/backend/apps/file_management_app.py
@@ -14,7 +14,9 @@
from consts.model import ProcessParams
from services.file_management_service import upload_to_minio, upload_files_impl, \
get_file_url_impl, get_file_stream_impl, delete_file_impl, list_files_impl, \
- resolve_preview_file, get_preview_stream
+ resolve_preview_file, get_preview_stream, check_file_access, check_file_access_batch, \
+ resolve_minio_upload_folder
+from utils.auth_utils import get_current_user_id
from utils.file_management_utils import trigger_data_process
logger = logging.getLogger("file_management_app")
@@ -91,37 +93,49 @@ async def upload_files(
folder: str = Form(
"attachments", description="Storage folder path for MinIO (optional)"),
index_name: Optional[str] = Form(
- None, description="Knowledge base index for conflict resolution")
+ None, description="Knowledge base index for conflict resolution"),
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
- if not file:
- raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
- detail="No files in the request")
-
- errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl(destination, file, folder, index_name)
+ try:
+ if not file:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
+ detail="No files in the request")
- if uploaded_file_paths:
- return JSONResponse(
- status_code=HTTPStatus.OK,
- content={
- "message": f"Files uploaded successfully to {destination}, ready for processing.",
- "uploaded_filenames": uploaded_filenames,
- "uploaded_file_paths": uploaded_file_paths,
- "errors": errors
- }
+ user_id, tenant_id = get_current_user_id(authorization)
+ errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl(
+ destination, file, folder, index_name, user_id, uploader_tenant_id=tenant_id
)
- else:
- raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
- detail="No valid files uploaded")
+
+ if uploaded_file_paths:
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": f"Files uploaded successfully to {destination}, ready for processing.",
+ "uploaded_filenames": uploaded_filenames,
+ "uploaded_file_paths": uploaded_file_paths,
+ "errors": errors
+ }
+ )
+ else:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
+ detail="No valid files uploaded")
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"File upload error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="File upload error.")
@file_management_config_router.post("/process")
async def process_files(
- files: List[dict] = Body(
- ..., description="List of file details to process, including path_or_url and filename"),
- chunking_strategy: Optional[str] = Body("basic"),
- index_name: str = Body(...),
- destination: str = Body(...),
- authorization: Optional[str] = Header(None)
+ files: Annotated[List[dict], Body(
+ ..., description="List of file details to process, including path_or_url and filename")],
+ index_name: Annotated[str, Body(...)],
+ destination: Annotated[str, Body(...)],
+ chunking_strategy: Annotated[Optional[str], Body(...)] = "basic",
+ model_id: Annotated[Optional[int], Body(...)] = None,
+ authorization: Annotated[Optional[str], Header()] = None
):
"""
Trigger data processing for a list of uploaded files.
@@ -134,7 +148,8 @@ async def process_files(
chunking_strategy=chunking_strategy,
source_type=destination,
index_name=index_name,
- authorization=authorization
+ authorization=authorization,
+ model_id=model_id
)
process_result = await trigger_data_process(files, process_params)
@@ -168,39 +183,48 @@ async def get_storage_file(
"'base64' (return base64-encoded content for images)."
),
),
- expires: int = Query(3600, description="URL validity period (seconds)"),
- filename: Optional[str] = Query(None, description="Original filename for download (optional)")
+ expires: int = Query(86400, description="URL validity period (seconds)"),
+ filename: Optional[str] = Query(None, description="Original filename for download (optional)"),
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
"""
- Get information, download link, or file stream for a single file
+ Get information, download link, or file stream for a single file.
+
+ Access control:
+ - knowledge_base/*: All authenticated users can access
+ - attachments/{user_id}/*: Only the owner (user_id) can access
- **object_name**: File object name
- **download**: Download mode: ignore (default, return file info), stream (return file stream), redirect (redirect to download URL)
- - **expires**: URL validity period in seconds (default 3600)
+ - **expires**: URL validity period in seconds (default 86400 = 24 hours)
- **filename**: Original filename for download (optional, if not provided, will use object_name)
Returns file information, download link, or file content
"""
try:
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ if not check_file_access(object_name, user_id, tenant_id):
+ logger.warning(f"[get_storage_file] Access denied: object_name={object_name}, user_id={user_id}")
+ raise HTTPException(
+ status_code=HTTPStatus.FORBIDDEN,
+ detail="You don't have permission to access this file"
+ )
+
logger.info(f"[get_storage_file] Route matched! object_name={object_name}, download={download}, filename={filename}")
if download == "redirect":
- # return a redirect download URL
result = await get_file_url_impl(object_name=object_name, expires=expires)
return RedirectResponse(url=result["url"])
elif download == "stream":
- # return a readable file stream
file_stream, content_type = await get_file_stream_impl(object_name=object_name)
logger.info(f"Streaming file: object_name={object_name}, content_type={content_type}")
-
- # Use provided filename or extract from object_name
+
download_filename = filename
if not download_filename:
- # Extract filename from object_name (get the last part after the last slash)
download_filename = object_name.split("/")[-1] if "/" in object_name else object_name
-
- # Build Content-Disposition header with proper encoding for non-ASCII characters
+
content_disposition = build_content_disposition_header(download_filename)
-
+
return StreamingResponse(
file_stream,
media_type=content_type,
@@ -211,7 +235,6 @@ async def get_storage_file(
}
)
elif download == "base64":
- # Return base64 encoded file content (primarily for images)
file_stream, content_type = await get_file_stream_impl(object_name=object_name)
try:
data = file_stream.read()
@@ -233,13 +256,13 @@ async def get_storage_file(
},
)
else:
- # return file metadata
return await get_file_url_impl(object_name=object_name, expires=expires)
+ except HTTPException:
+ raise
except Exception as e:
logger.error(f"Failed to get file: object_name={object_name}, error={str(e)}")
raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail=f"Failed to get file information: {str(e)}"
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Failed to get file."
)
@@ -248,17 +271,38 @@ async def get_storage_file(
async def storage_upload_files(
files: List[UploadFile] = File(..., description="List of files to upload"),
folder: str = Form(
- "attachments", description="Storage folder path (optional)")
+ "attachments", description="Storage folder path (optional)"),
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
"""
- Upload one or more files to MinIO storage
+ Upload one or more files to MinIO storage.
- **files**: List of files to upload
- **folder**: Storage folder path (optional, defaults to 'attachments')
+ Use 'knowledge_base' for shared files accessible by all users.
+ Other folders (like 'attachments') will be isolated by user_id.
Returns upload results including file information and access URLs
"""
- results = await upload_to_minio(files=files, folder=folder)
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ actual_folder = resolve_minio_upload_folder(folder, user_id, tenant_id)
+ results = await upload_to_minio(files=files, folder=actual_folder)
+
+ return {
+ "message": f"Processed {len(results)} files",
+ "success_count": sum(1 for r in results if r.get("success", False)),
+ "failed_count": sum(1 for r in results if not r.get("success", False)),
+ "results": results
+ }
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Storage upload error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Storage upload error."
+ )
# Return upload results for all files
return {
@@ -274,10 +318,16 @@ async def get_storage_files(
prefix: str = Query("", description="File prefix filter"),
limit: int = Query(100, description="Maximum number of files to return"),
include_urls: bool = Query(
- True, description="Whether to include presigned URLs")
+ True, description="Whether to include presigned URLs"),
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
"""
- Get list of files from MinIO storage
+ Get list of files from MinIO storage.
+
+ Access control:
+ - Returns only files the user has permission to access:
+ - knowledge_base/*: All authenticated users can access
+ - attachments/{user_id}/*: Only the owner's files
- **prefix**: File prefix filter (optional)
- **limit**: Maximum number of files to return (default 100)
@@ -286,8 +336,22 @@ async def get_storage_files(
Returns file list and metadata
"""
try:
+ user_id, tenant_id = get_current_user_id(authorization)
files = await list_files_impl(prefix, limit)
- # Remove URLs if not needed
+
+ if user_id:
+ filtered_files = [
+ f for f in files
+ if f.get("key") and check_file_access(f.get("key"), user_id, tenant_id)
+ ]
+ else:
+ filtered_files = [
+ f for f in files
+ if f.get("key") and f.get("key", "").startswith("knowledge_base/")
+ ]
+
+ files = filtered_files
+
if not include_urls:
for file in files:
if "url" in file:
@@ -297,10 +361,12 @@ async def get_storage_files(
"total": len(files),
"files": files
}
+ except HTTPException:
+ raise
except Exception as e:
+ logger.error(f"Get storage files error: {str(e)}")
raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail=f"Failed to get file list: {str(e)}"
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Get storage files error."
)
@@ -481,7 +547,7 @@ async def download_datamate_file(
# Build Content-Disposition header with proper encoding for non-ASCII characters
content_disposition = build_content_disposition_header(download_filename)
-
+
return StreamingResponse(
iter([response.content]),
media_type=content_type,
@@ -507,25 +573,41 @@ async def download_datamate_file(
@file_management_config_router.delete("/storage/{object_name:path}")
async def remove_storage_file(
- object_name: str = PathParam(..., description="File object name to delete")
+ object_name: str = PathParam(..., description="File object name to delete"),
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
"""
- Delete file from MinIO storage
+ Delete file from MinIO storage.
+
+ Access control:
+ - knowledge_base/*: Only allow deletion (admin operation)
+ - attachments/{user_id}/*: Only the owner (user_id) can delete
- **object_name**: File object name to delete
Returns deletion operation result
"""
try:
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ if not check_file_access(object_name, user_id, tenant_id):
+ logger.warning(f"[remove_storage_file] Access denied: object_name={object_name}, user_id={user_id}")
+ raise HTTPException(
+ status_code=HTTPStatus.FORBIDDEN,
+ detail="You don't have permission to delete this file"
+ )
+
await delete_file_impl(object_name=object_name)
return {
"success": True,
"message": f"File {object_name} successfully deleted"
}
+ except HTTPException:
+ raise
except Exception as e:
+ logger.error(f"Remove storage file error: {str(e)}")
raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail=f"Failed to delete file: {str(e)}"
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Remove storage file error."
)
@@ -533,57 +615,83 @@ async def remove_storage_file(
async def get_storage_file_batch_urls(
request_data: dict = Body(...,
description="JSON containing list of file object names"),
- expires: int = Query(3600, description="URL validity period (seconds)")
+ expires: int = Query(3600, description="URL validity period (seconds)"),
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
"""
- Batch get download URLs for multiple files (JSON request)
+ Batch get download URLs for multiple files (JSON request).
+
+ Access control:
+ - knowledge_base/*: All authenticated users can access
+ - attachments/{user_id}/*: Only the owner (user_id) can access
- **request_data**: JSON request body containing object_names list
- - **expires**: URL validity period in seconds (default 3600)
+ - **expires**: URL validity period in seconds (default 86400 = 24 hours)
Returns URL and status information for each file
"""
- # Extract object_names from request body
- object_names = request_data.get("object_names", [])
- if not object_names or not isinstance(object_names, list):
- raise HTTPException(
- status_code=400, detail="Request body must contain object_names array")
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
- results = []
+ object_names = request_data.get("object_names", [])
+ if not object_names or not isinstance(object_names, list):
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST, detail="Request body must contain object_names array")
- for object_name in object_names:
- try:
- # Get file URL
- result = get_file_url_impl(
- object_name=object_name, expires=expires)
- results.append({
- "object_name": object_name,
- "success": result["success"],
- "url": result.get("url"),
- "error": result.get("error")
- })
- except Exception as e:
- results.append({
- "object_name": object_name,
- "success": False,
- "error": str(e)
- })
+ results = []
- return {
- "total": len(results),
- "success_count": sum(1 for r in results if r.get("success", False)),
- "failed_count": sum(1 for r in results if not r.get("success", False)),
- "results": results
- }
+ for object_name in object_names:
+ if not check_file_access(object_name, user_id, tenant_id):
+ results.append({
+ "object_name": object_name,
+ "success": False,
+ "error": "Access denied"
+ })
+ continue
+
+ try:
+ result = get_file_url_impl(object_name=object_name, expires=expires)
+ results.append({
+ "object_name": object_name,
+ "success": result["success"],
+ "url": result.get("url"),
+ "error": result.get("error")
+ })
+ except Exception as e:
+ results.append({
+ "object_name": object_name,
+ "success": False,
+ "error": str(e)
+ })
+
+ return {
+ "total": len(results),
+ "success_count": sum(1 for r in results if r.get("success", False)),
+ "failed_count": sum(1 for r in results if not r.get("success", False)),
+ "results": results
+ }
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Batch URLs error: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Batch URLs error."
+ )
@file_management_config_router.get("/preview/{object_name:path}")
async def preview_file(
object_name: str = PathParam(..., description="File object name to preview"),
filename: Annotated[Optional[str], Query(description="Original filename for display (optional)")] = None,
range_header: Annotated[Optional[str], Header(alias="range")] = None,
+ authorization: Optional[str] = Header(None, alias="Authorization")
):
"""
- Preview file inline in browser
+ Preview file inline in browser.
+
+ Access control:
+ - knowledge_base/*: All authenticated users can access
+ - attachments/{user_id}/*: Only the owner (user_id) can access
+ - attachments/asset_owner/{user_id}/*: ASSET_OWNER virtual tenant and owner only
- **object_name**: File object name in storage
- **filename**: Original filename for Content-Disposition header (optional)
@@ -592,6 +700,15 @@ async def preview_file(
Returns 206 Partial Content when a valid Range header is present.
"""
try:
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ if not check_file_access(object_name, user_id, tenant_id):
+ logger.warning(f"[preview_file] Access denied: object_name={object_name}, user_id={user_id}")
+ raise HTTPException(
+ status_code=HTTPStatus.FORBIDDEN,
+ detail="You don't have permission to access this file"
+ )
+
actual_name, content_type, total_size = await resolve_preview_file(object_name=object_name)
except FileTooLargeException as e:
logger.warning(f"[preview_file] File too large: object_name={object_name}, error={str(e)}")
@@ -608,13 +725,15 @@ async def preview_file(
except UnsupportedFileTypeException as e:
logger.error(f"[preview_file] Unsupported file type: object_name={object_name}, error={str(e)}")
raise HTTPException(
- status_code=HTTPStatus.BAD_REQUEST,
+ status_code=HTTPStatus.BAD_REQUEST,
detail=f"File format not supported for preview: {str(e)}"
)
+ except HTTPException:
+ raise
except Exception as e:
logger.error(f"[preview_file] Unexpected error: object_name={object_name}, error={str(e)}")
raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
detail="Failed to preview file"
)
diff --git a/backend/apps/haotian_app.py b/backend/apps/haotian_app.py
new file mode 100644
index 000000000..c0f3682b5
--- /dev/null
+++ b/backend/apps/haotian_app.py
@@ -0,0 +1,92 @@
+"""
+Haotian App Layer
+FastAPI endpoints for Haotian external knowledge base operations.
+
+This module provides proxy APIs so the frontend does not call external services directly.
+"""
+
+import logging
+from http import HTTPStatus
+from typing import Optional, Dict
+
+from fastapi import APIRouter, Header, HTTPException, Body
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+
+from services.haotian_service import (
+ fetch_haotian_knowledge_sets_impl,
+ test_haotian_connection_impl,
+)
+
+router = APIRouter(prefix="/haotian")
+logger = logging.getLogger("haotian_app")
+
+
+class HaotianListRequest(BaseModel):
+ list_url: str = Field(..., description="Haotian knowledge sets list URL")
+ authorization: str = Field(
+ ..., description="Authorization header value, e.g. 'Bearer xxx'"
+ )
+
+
+class HaotianTestConnectionRequest(BaseModel):
+ list_url: str = Field(..., description="Haotian knowledge sets list URL")
+ authorization: str = Field(
+ ..., description="Authorization header value, e.g. 'Bearer xxx'"
+ )
+
+
+@router.post("/knowledge-sets")
+async def fetch_haotian_knowledge_sets_api(
+ authorization: Optional[str] = Header(None),
+ request: HaotianListRequest = Body(...),
+) -> JSONResponse:
+ """
+ Fetch knowledge sets from the external Haotian list_url and return a filtered/normalized payload.
+ """
+ _ = authorization
+ try:
+ result: Dict[str, any] = await fetch_haotian_knowledge_sets_impl(
+ list_url=request.list_url,
+ external_authorization=request.authorization,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except Exception as e:
+ logger.error(f"Failed to fetch Haotian knowledge sets: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail=f"Failed to fetch Haotian knowledge sets: {str(e)}",
+ )
+
+
+@router.post("/test-connection")
+async def test_haotian_connection_api(
+ authorization: Optional[str] = Header(None),
+ request: HaotianTestConnectionRequest = Body(...),
+) -> JSONResponse:
+ """
+ Test connection to Haotian list_url using the provided authorization.
+ """
+ _ = authorization
+ try:
+ ok, error_message = await test_haotian_connection_impl(
+ list_url=request.list_url,
+ external_authorization=request.authorization,
+ )
+ if ok:
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"success": True, "message": "Connection successful"},
+ )
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail=f"Cannot connect to Haotian server: {error_message}",
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"Error testing Haotian connection: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=f"Error testing Haotian connection: {str(e)}",
+ )
diff --git a/backend/apps/invitation_app.py b/backend/apps/invitation_app.py
index 2aa3edc9e..55bbac998 100644
--- a/backend/apps/invitation_app.py
+++ b/backend/apps/invitation_app.py
@@ -69,6 +69,12 @@ async def list_invitations_endpoint(
status_code=HTTPStatus.UNAUTHORIZED,
detail=str(exc)
)
+ except ValidationError as exc:
+ logger.warning(f"Invitation list rejected by feature flag: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail=str(exc)
+ )
except Exception as exc:
logger.error(f"Unexpected error retrieving invitation list: {str(exc)}")
raise HTTPException(
@@ -131,6 +137,12 @@ async def create_invitation_endpoint(
status_code=HTTPStatus.BAD_REQUEST,
detail=str(exc)
)
+ except ValidationError as exc:
+ logger.warning(f"Invitation creation rejected by feature flag: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail=str(exc)
+ )
except DuplicateError as exc:
logger.warning(f"Duplicate invitation code: {str(exc)}")
raise HTTPException(
diff --git a/backend/apps/knowledge_summary_app.py b/backend/apps/knowledge_summary_app.py
index e4e11ace9..ab45170fb 100644
--- a/backend/apps/knowledge_summary_app.py
+++ b/backend/apps/knowledge_summary_app.py
@@ -8,6 +8,7 @@
from consts.model import ChangeSummaryRequest
from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
from utils.auth_utils import get_current_user_id, get_current_user_info
+from utils.config_utils import tenant_config_manager
router = APIRouter(prefix="/summary")
logger = logging.getLogger("knowledge_summary_app")
@@ -31,6 +32,19 @@ async def auto_summary(
authorization, http_request)
service = ElasticSearchService()
+ # Get model_id from tenant config if not provided
+ if model_id is None and tenant_id:
+ try:
+ tenant_config = tenant_config_manager.load_config(tenant_id)
+ model_id_str = tenant_config.get("LLM_ID")
+ if model_id_str:
+ model_id = int(model_id_str)
+ logger.info(f"Using LLM_ID {model_id} from tenant config for auto-summary")
+ else:
+ logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary may be placeholder")
+ except Exception as e:
+ logger.warning(f"Failed to get LLM_ID from tenant config: {e}")
+
return await service.summary_index_name(
index_name=index_name,
batch_size=batch_size,
diff --git a/backend/apps/mcp_management_app.py b/backend/apps/mcp_management_app.py
new file mode 100644
index 000000000..cfb0c292a
--- /dev/null
+++ b/backend/apps/mcp_management_app.py
@@ -0,0 +1,302 @@
+import logging
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request
+from fastapi.responses import JSONResponse
+from http import HTTPStatus
+
+from consts.exceptions import (
+ MCPConnectionError,
+ McpNotFoundError,
+ McpValidationError,
+ UnauthorizedError,
+)
+from consts.model import (
+ RegistryListQuery,
+ CommunityListRequest,
+ CommunityPublishRequest,
+ CommunityUpdateRequest,
+)
+from services.mcp_management_service import (
+ list_community_mcp_services,
+ list_community_mcp_tag_stats,
+ list_my_community_mcp_services,
+ list_registry_mcp_services,
+ publish_community_mcp_service,
+ update_community_mcp_service,
+ delete_community_mcp_service,
+)
+from utils.auth_utils import get_current_user_info
+
+router = APIRouter(prefix="/mcp-tools")
+logger = logging.getLogger("mcp_management_app")
+
+
+# ---------------------------------------------------------------------------
+# Registry Endpoints (MCP Registry - external service)
+# ---------------------------------------------------------------------------
+
+@router.get("/registry/list")
+async def list_registry_mcp_services_api(
+ query: RegistryListQuery = Depends(),
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ List MCP services from the official MCP Registry.
+ """
+ try:
+ get_current_user_info(authorization, http_request)
+
+ data = await list_registry_mcp_services(
+ search=query.search,
+ include_deleted=query.include_deleted,
+ updated_since=query.updated_since,
+ version=query.version,
+ cursor=query.cursor,
+ limit=query.limit,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content=data,
+ )
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to list MCP registry services: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to list MCP registry services"
+ )
+
+
+# ---------------------------------------------------------------------------
+# Community Endpoints
+# ---------------------------------------------------------------------------
+
+@router.get("/community/list")
+async def list_community_mcp_services_api(
+ query: CommunityListRequest = Depends(),
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ List public community MCP services.
+ """
+ try:
+ get_current_user_info(authorization, http_request)
+ data = await list_community_mcp_services(
+ search=query.search,
+ tag=query.tag,
+ transport_type=query.transport_type,
+ cursor=query.cursor,
+ limit=query.limit,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": data},
+ )
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to list MCP community services: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to list MCP community services"
+ )
+
+
+@router.get("/community/tags/stats")
+async def list_community_mcp_tag_stats_api(
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ Get community MCP tag statistics.
+ """
+ try:
+ get_current_user_info(authorization, http_request)
+ stats = list_community_mcp_tag_stats()
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": stats},
+ )
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to list community MCP tag stats: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to list community MCP tag stats"
+ )
+
+
+@router.post("/community/publish")
+async def publish_community_mcp_service_api(
+ payload: CommunityPublishRequest,
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ Publish a local MCP service to the community.
+ """
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+ community_id = await publish_community_mcp_service(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ mcp_id=payload.mcp_id,
+ name=payload.name,
+ description=payload.description,
+ version=payload.version,
+ tags=payload.tags,
+ mcp_server=payload.mcp_server,
+ config_json=payload.config_json,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": {"community_id": community_id}},
+ )
+ except McpNotFoundError as exc:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
+ except McpValidationError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to publish MCP community service: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to publish MCP community service"
+ )
+
+
+@router.put("/community/update")
+async def update_community_mcp_service_api(
+ payload: CommunityUpdateRequest,
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ Update a community MCP service.
+ """
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+ await update_community_mcp_service(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ community_id=payload.community_id,
+ name=payload.name,
+ description=payload.description,
+ tags=payload.tags,
+ version=payload.version,
+ registry_json=payload.registry_json,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success"},
+ )
+ except McpNotFoundError as exc:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
+ except McpValidationError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to update MCP community service: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to update MCP community service"
+ )
+
+
+@router.delete("/community/delete")
+async def delete_community_mcp_service_api(
+ community_id: int = Query(gt=0),
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ Delete a community MCP service.
+ """
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+ await delete_community_mcp_service(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ community_id=community_id,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success"},
+ )
+ except McpNotFoundError as exc:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to delete MCP community service: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to delete MCP community service"
+ )
+
+
+@router.get("/community/mine")
+async def list_my_community_mcp_services_api(
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None,
+):
+ """
+ List MCP services published by the current user to the community.
+ """
+ try:
+ _, tenant_id, _ = get_current_user_info(authorization, http_request)
+ data = await list_my_community_mcp_services(tenant_id=tenant_id)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": data},
+ )
+ except UnauthorizedError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ detail=str(exc),
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(f"Failed to list my MCP community services: {exc}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to list my MCP community services"
+ )
diff --git a/backend/apps/model_managment_app.py b/backend/apps/model_managment_app.py
index 0a5a04139..53dfebb02 100644
--- a/backend/apps/model_managment_app.py
+++ b/backend/apps/model_managment_app.py
@@ -33,7 +33,7 @@
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from http import HTTPStatus
-from typing import List, Optional
+from typing import Annotated, List, Optional
from services.model_health_service import (
check_model_connectivity,
verify_model_config_connectivity,
@@ -264,6 +264,7 @@ async def get_model_list(authorization: Optional[str] = Header(None)):
Returns each model enriched with repo-qualified `model_name` and a normalized
`connect_status` value.
"""
+
try:
user_id, tenant_id = get_current_user_id(authorization)
logger.debug(
@@ -297,7 +298,8 @@ async def get_llm_model_list(authorization: Optional[str] = Header(None)):
@router.post("/healthcheck")
async def check_model_health(
- display_name: str = Query(..., description="Display name to check"),
+ display_name: Annotated[str, Query(..., description="Display name to check")],
+ model_type: Annotated[str, Query(..., description="...")],
authorization: Optional[str] = Header(None)
):
"""Check and update model connectivity, returning the latest status.
@@ -308,7 +310,7 @@ async def check_model_health(
"""
try:
_, tenant_id = get_current_user_id(authorization)
- result = await check_model_connectivity(display_name, tenant_id)
+ result = await check_model_connectivity(display_name, tenant_id, model_type)
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Successfully checked model connectivity",
"data": result
@@ -372,7 +374,10 @@ async def manage_check_model_health(
f"Start to check model connectivity for tenant, user_id: {user_id}, "
f"target_tenant_id: {request.tenant_id}, display_name: {request.display_name}")
- result = await check_model_connectivity(request.display_name, request.tenant_id)
+ result = await check_model_connectivity(
+ request.display_name,
+ request.tenant_id
+ )
return JSONResponse(status_code=HTTPStatus.OK, content={
"message": "Successfully checked model connectivity",
"data": result
diff --git a/backend/apps/monitoring_app.py b/backend/apps/monitoring_app.py
new file mode 100644
index 000000000..f89f4312f
--- /dev/null
+++ b/backend/apps/monitoring_app.py
@@ -0,0 +1,149 @@
+"""
+Model Monitoring API endpoints.
+
+Provides model performance metrics aggregated from model_monitoring_record_t.
+Uses an independent database connection pool to avoid impacting business operations.
+"""
+
+import logging
+from http import HTTPStatus
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Header, HTTPException, Query
+from sqlalchemy import text
+
+from consts.const import (
+ ENABLE_TELEMETRY,
+ MONITORING_DASHBOARD_URL,
+ MONITORING_PROVIDER,
+)
+from consts.model import ConversationResponse
+from database.client import get_monitoring_db_session
+from utils.auth_utils import get_current_user_id
+
+logger = logging.getLogger("monitoring_app")
+
+router = APIRouter(prefix="/monitoring")
+
+
+def _normalize_monitoring_provider(value: str | None) -> str:
+ return str(value or "otlp").strip().lower()
+
+
+def get_monitoring_status() -> dict[str, Any]:
+ """Return telemetry state and the monitoring UI entrypoint for frontend use."""
+ telemetry_enabled = ENABLE_TELEMETRY
+ provider = _normalize_monitoring_provider(MONITORING_PROVIDER)
+ dashboard_url = MONITORING_DASHBOARD_URL.strip() or None
+
+ return {
+ "telemetry_enabled": telemetry_enabled,
+ "provider": provider,
+ "dashboard_url": dashboard_url,
+ "dashboard_port": None,
+ "dashboard_path": None,
+ }
+
+
+def _compute_time_range_filter(time_range: str) -> str:
+ """Convert time_range parameter to SQL timestamp condition."""
+ hours = {"24h": 24, "7d": 168, "30d": 720}.get(time_range, 24)
+ return f"m.create_time >= NOW() - INTERVAL '{hours} hours'"
+
+
+def _query_model_metrics_from_db(
+ time_range: str, tenant_id: str | None = None
+) -> list[dict[str, Any]]:
+ time_filter = _compute_time_range_filter(time_range)
+
+ tenant_filter = ""
+ params: dict[str, str] = {}
+ if tenant_id:
+ tenant_filter = "AND m.tenant_id = :tenant_id"
+ params["tenant_id"] = tenant_id
+
+ query_sql = f"""
+ SELECT
+ m.model_id,
+ m.model_name,
+ MAX(COALESCE(m.model_type, 'llm')) AS model_type,
+ MAX(COALESCE(m.display_name, split_part(m.model_name, '/', -1), 'Unknown')) AS display_name,
+ COUNT(*) AS request_count,
+ ROUND(
+ COALESCE(
+ SUM(CASE WHEN m.is_error = TRUE THEN 1 ELSE 0 END)::numeric
+ * 100.0 / NULLIF(COUNT(*), 0), 0
+ ), 2
+ ) AS error_rate,
+ ROUND(AVG(COALESCE(m.request_duration_ms, 0))::numeric, 1) AS avg_duration,
+ ROUND(AVG(CASE WHEN m.is_streaming = TRUE THEN m.ttft_ms ELSE NULL END)::numeric, 1) AS avg_ttft,
+ ROUND(AVG(CASE WHEN m.is_streaming = TRUE THEN m.generation_rate ELSE NULL END)::numeric, 1) AS token_generation_rate,
+ COALESCE(SUM(COALESCE(m.total_tokens, 0)), 0) AS total_tokens
+ FROM nexent.model_monitoring_record_t m
+ WHERE {time_filter}
+ {tenant_filter}
+ AND m.delete_flag = 'N'
+ GROUP BY m.model_id, m.model_name
+ ORDER BY request_count DESC
+ """
+
+ try:
+ with get_monitoring_db_session() as session:
+ result = session.execute(text(query_sql), params)
+ rows = result.fetchall()
+ return [
+ {
+ "model_id": row.model_id,
+ "model_name": row.model_name,
+ "model_type": row.model_type,
+ "display_name": row.display_name,
+ "request_count": row.request_count,
+ "error_rate": float(row.error_rate) if row.error_rate else 0,
+ "avg_duration": float(row.avg_duration) if row.avg_duration else 0,
+ "avg_ttft": float(row.avg_ttft) if row.avg_ttft else 0,
+ "token_generation_rate": float(row.token_generation_rate)
+ if row.token_generation_rate
+ else 0,
+ "total_tokens": int(row.total_tokens) if row.total_tokens else 0,
+ }
+ for row in rows
+ ]
+ except Exception as e:
+ logger.error(f"Failed to query model metrics from DB: {e}")
+ return []
+
+
+@router.get("/models", response_model=ConversationResponse)
+async def list_models_endpoint(
+ time_range: Annotated[str, Query(
+ description="Time range: 24h, 7d, 30d")] = "24h",
+ page: Annotated[int, Query(ge=1, description="Page number")] = 1,
+ page_size: Annotated[int, Query(
+ ge=1, le=100, description="Items per page")] = 20,
+ authorization: Annotated[str | None, Header()] = None,
+):
+ """List all models with aggregated monitoring metrics from database."""
+ try:
+ _, tenant_id = get_current_user_id(authorization)
+
+ all_metrics = _query_model_metrics_from_db(time_range, tenant_id)
+
+ start = (page - 1) * page_size
+ end = start + page_size
+ paginated = all_metrics[start:end]
+
+ return ConversationResponse(code=0, message="success", data=paginated)
+ except Exception as e:
+ logger.error(f"Failed to list monitoring models: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+@router.get("/status", response_model=ConversationResponse)
+async def get_monitoring_status_endpoint():
+ """Return whether monitoring UI should be shown in the frontend."""
+ return ConversationResponse(
+ code=0,
+ message="success",
+ data=get_monitoring_status(),
+ )
diff --git a/backend/apps/northbound_app.py b/backend/apps/northbound_app.py
index 3f1580271..9f3b7e323 100644
--- a/backend/apps/northbound_app.py
+++ b/backend/apps/northbound_app.py
@@ -1,12 +1,16 @@
import logging
from http import HTTPStatus
from typing import Optional, Dict, Any
+from urllib.parse import urlparse, unquote
+import re
import uuid
-from fastapi import APIRouter, Body, Header, Request, HTTPException, Query
-from fastapi.responses import JSONResponse
+import httpx
+from fastapi import APIRouter, Body, File, Header, HTTPException, Query, Request, UploadFile
+from fastapi.responses import JSONResponse, StreamingResponse
-from consts.exceptions import LimitExceededError, UnauthorizedError
+from consts.exceptions import LimitExceededError, UnauthorizedError, ConversationNotFoundError
+from consts.model import ToolParamsRequest
from services.northbound_service import (
NorthboundContext,
get_conversation_history,
@@ -15,16 +19,35 @@
stop_chat,
get_agent_info_list,
update_conversation_title,
+ upload_files_for_northbound,
)
from utils.auth_utils import validate_bearer_token, get_user_and_tenant_by_access_key
+from .file_management_app import build_content_disposition_header
+
router = APIRouter(prefix="/nb/v1", tags=["northbound"])
__all__ = ["router", "_get_northbound_context"]
+def _resolve_proxy_download_filename(presigned_url: str, content_disposition: str) -> str:
+ """Resolve a stable download filename for the northbound file proxy."""
+ if content_disposition:
+ filename_star_match = re.search(r"filename\*=UTF-8''([^;]+)", content_disposition)
+ if filename_star_match:
+ return unquote(filename_star_match.group(1)) or "download"
+
+ filename_match = re.search(r'filename="?([^";]+)"?', content_disposition)
+ if filename_match:
+ return filename_match.group(1) or "download"
+
+ path = unquote(urlparse(presigned_url).path)
+ filename = path.split("/")[-1].strip()
+ return filename or "download"
+
+
async def _get_northbound_context(request: Request) -> NorthboundContext:
"""
Build northbound context from request.
@@ -107,13 +130,119 @@ async def health_check():
return {"status": "healthy", "service": "northbound-api"}
-@router.post("/chat/run")
+@router.post(
+ "/chat/attachments/upload",
+ summary="Upload chat attachments for northbound runs",
+ description=(
+ "Upload one or more files for later use in `/nb/v1/chat/run`. "
+ "Successful uploads return reusable `s3_url` references."
+ ),
+)
+async def upload_chat_attachments(
+ request: Request,
+ files: list[UploadFile] = File(
+ ...,
+ description="List of files to upload",
+ examples=["report.pdf", "diagram.png"],
+ ),
+):
+ try:
+ ctx: NorthboundContext = await _get_northbound_context(request)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content=await upload_files_for_northbound(ctx=ctx, files=files),
+ )
+ except LimitExceededError as e:
+ logging.error(f"Too Many Requests: rate limit exceeded: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail="Too Many Requests: rate limit exceeded")
+ except ValueError as e:
+ logging.error(f"Invalid northbound upload request: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except PermissionError as e:
+ logging.error(f"Permission denied while uploading northbound files: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=str(e))
+ except HTTPException as e:
+ raise e
+ except Exception as e:
+ logging.error(f"Failed to upload northbound files: {str(e)}", exc_info=e)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error")
+
+
+@router.post(
+ "/chat/run",
+ summary="Start a northbound chat run with optional attachments",
+ description=(
+ "Run a northbound chat request. Upload attachments first through "
+ "`/nb/v1/chat/attachments/upload`, then pass the returned `s3_url` values "
+ "through the `attachments` field."
+ ),
+)
async def run_chat(
request: Request,
- conversation_id: Optional[int] = Body(None, embed=True),
- agent_name: str = Body(..., embed=True),
- query: str = Body(..., embed=True),
- meta_data: Optional[Dict[str, Any]] = Body(None, embed=True),
+ conversation_id: Optional[int] = Body(
+ None,
+ embed=True,
+ description="Existing conversation ID. Omit to create a new conversation.",
+ examples=[123],
+ ),
+ agent_name: str = Body(
+ ...,
+ embed=True,
+ description="Target agent name.",
+ examples=["general-assistant"],
+ ),
+ query: str = Body(
+ ...,
+ embed=True,
+ description="User input to send to the agent.",
+ examples=["Summarize the uploaded report and list the key risks."],
+ ),
+ attachments: Optional[list] = Body(
+ None,
+ embed=True,
+ description="Attachments for the chat. Can be either a list of S3 URL strings"
+ "or a list of attachment objects with full metadata.",
+ examples=[["s3://nexent/attachments/user123/20260609_report.pdf"]],
+ ),
+ meta_data: Optional[Dict[str, Any]] = Body(
+ None,
+ embed=True,
+ description="Optional metadata passed through for audit and usage logging.",
+ examples=[{"source": "crm", "ticket_id": "INC-1001"}],
+ ),
+ tool_params: Optional[ToolParamsRequest] = Body(
+ None,
+ embed=True,
+ description="Optional request-scoped overrides for tool initialization parameters. "
+ "Overrides DB-persisted params (ag_tool_instance_t.params) on a per-run basis. "
+ "Conflict resolution: request value wins over DB value. "
+ "Structure: agents -> {agent_name} -> tools -> {tool_name} -> {param_name: param_value}. "
+ "tool_name matching: first by tool.name, then by tool.class_name. "
+ "Unknown param names cause a ValidationError (400). "
+ "Metadata-derived fields (e.g., vdb_core, embedding_model) are recalculated "
+ "from merged params for tools like KnowledgeBaseSearchTool, DifySearchTool, DataMateSearchTool.",
+ examples=[{
+ "agents": {
+ "common_sense_qa_assistant": {
+ "tools": {
+ "analyze_text_file": {
+ "chunk_size": 4000,
+ "summary_only": True,
+ "prompt": "Please provide a concise summary of this document focusing on key facts."
+ },
+ "knowledge_base_search": {
+ "top_k": 10,
+ "rerank": True,
+ "rerank_model_name": "gte-rerank-v2",
+ "index_names": ["nexent-docs", "faq-index"]
+ }
+ }
+ }
+ }
+ }],
+ ),
idempotency_key: Optional[str] = Header(None, alias="Idempotency-Key"),
):
try:
@@ -123,13 +252,21 @@ async def run_chat(
conversation_id=conversation_id,
agent_name=agent_name,
query=query,
+ attachments=attachments,
meta_data=meta_data,
+ tool_params=tool_params,
idempotency_key=idempotency_key,
)
except LimitExceededError as e:
logging.error(f"Too Many Requests: rate limit exceeded: {str(e)}", exc_info=e)
raise HTTPException(status_code=HTTPStatus.TOO_MANY_REQUESTS,
detail="Too Many Requests: rate limit exceeded")
+ except ValueError as e:
+ logging.error(f"Invalid northbound chat request: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except PermissionError as e:
+ logging.error(f"Permission denied while running northbound chat: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=str(e))
except HTTPException as e:
raise e
except Exception as e:
@@ -252,9 +389,98 @@ async def update_convs_title(
logging.error(f"Too Many Requests: rate limit exceeded: {str(e)}", exc_info=e)
raise HTTPException(status_code=HTTPStatus.TOO_MANY_REQUESTS,
detail="Too Many Requests: rate limit exceeded")
+ except ConversationNotFoundError as e:
+ logging.error(f"Conversation not found while updating title: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
except HTTPException as e:
raise e
except Exception as e:
logging.error(f"Failed to update conversation title: {str(e)}", exc_info=e)
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error")
+
+
+@router.get("/file/fetch")
+async def fetch_file_from_presigned_url(
+ presigned_url: str = Query(..., description="Presigned URL from MinIO storage"),
+):
+ """
+ Fetch file content from a MinIO presigned URL.
+
+ This endpoint acts as a proxy - it downloads the file from MinIO
+ (which is only accessible from within the container network) and
+ returns the file content to external callers (e.g., MCP tools).
+
+ The presigned_url parameter should be URL-encoded by the caller.
+
+ NOTE: No authentication required for this endpoint.
+ """
+ if not presigned_url:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="presigned_url is required"
+ )
+
+ try:
+ parsed = urlparse(presigned_url)
+ if parsed.scheme not in ("http", "https"):
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="Invalid URL scheme. Must be http or https"
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ logging.error(f"Invalid presigned_url format: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="Invalid presigned_url format"
+ )
+
+ try:
+ async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+ response = await client.get(presigned_url)
+
+ if response.status_code != 200:
+ logging.error(f"Failed to fetch file from presigned_url, status: {response.status_code}")
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_GATEWAY,
+ detail=f"Failed to fetch file from storage, status: {response.status_code}"
+ )
+
+ content_type = response.headers.get("Content-Type", "application/octet-stream")
+ content_disposition = response.headers.get("Content-Disposition", "")
+ download_filename = _resolve_proxy_download_filename(presigned_url, content_disposition)
+
+ headers = {
+ "Content-Type": content_type,
+ "Content-Disposition": build_content_disposition_header(download_filename),
+ }
+
+ return StreamingResponse(
+ content=response.aiter_bytes(),
+ status_code=HTTPStatus.OK,
+ headers=headers,
+ media_type=content_type
+ )
+
+ except httpx.TimeoutException:
+ logging.error(f"Timeout fetching file from presigned_url")
+ raise HTTPException(
+ status_code=HTTPStatus.GATEWAY_TIMEOUT,
+ detail="Timeout fetching file from storage"
+ )
+ except httpx.RequestError as e:
+ logging.error(f"Request error fetching file from presigned_url: {str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_GATEWAY,
+ detail=f"Failed to fetch file from storage: {str(e)}"
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ logging.error(f"Unexpected error fetching file: {str(e)}", exc_info=e)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Internal server error"
+ )
diff --git a/backend/apps/northbound_base_app.py b/backend/apps/northbound_base_app.py
index db303e00f..66d937b52 100644
--- a/backend/apps/northbound_base_app.py
+++ b/backend/apps/northbound_base_app.py
@@ -16,6 +16,7 @@
from apps.app_factory import create_app
from .northbound_app import router as northbound_router
+from .northbound_knowledge_app import router as northbound_knowledge_router
class A2AServerSettings(BaseModel):
@@ -49,6 +50,7 @@ class A2AServerSettings(BaseModel):
)
northbound_app.include_router(northbound_router)
+northbound_app.include_router(northbound_knowledge_router)
# =============================================================================
diff --git a/backend/apps/northbound_knowledge_app.py b/backend/apps/northbound_knowledge_app.py
new file mode 100644
index 000000000..02739d138
--- /dev/null
+++ b/backend/apps/northbound_knowledge_app.py
@@ -0,0 +1,505 @@
+import base64
+import logging
+from http import HTTPStatus
+from typing import Optional, Dict, Any, List, Annotated
+
+from fastapi import APIRouter, Body, File, Form, Path, Path as PathParam, Query, Request, HTTPException, UploadFile
+from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
+
+from consts.const import ASSET_OWNER_TENANT_ID, VectorDatabaseType
+from consts.exceptions import (
+ LimitExceededError,
+ UnauthorizedError,
+)
+from consts.model import ProcessParams
+from services.file_management_service import (
+ upload_files_impl,
+ get_file_url_impl,
+ get_file_stream_impl,
+ check_file_access,
+)
+from services.northbound_service import NorthboundContext
+from services.redis_service import get_redis_service
+from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
+from utils.auth_utils import generate_session_jwt
+from utils.file_management_utils import trigger_data_process
+
+from .file_management_app import build_content_disposition_header
+from .northbound_app import _get_northbound_context
+
+
+logger = logging.getLogger("northbound_knowledge_app")
+
+router = APIRouter(prefix="/nb/v1/knowledge", tags=["northbound"])
+
+__all__ = ["router"]
+
+RATE_LIMIT_EXCEEDED_DETAIL = "Too Many Requests: rate limit exceeded"
+
+
+async def _require_asset_owner_context(request: Request) -> NorthboundContext:
+ """Resolve northbound context and ensure the caller belongs to the asset-owner tenant."""
+ ctx = await _get_northbound_context(request)
+ if ctx.tenant_id != ASSET_OWNER_TENANT_ID:
+ raise HTTPException(
+ status_code=HTTPStatus.FORBIDDEN,
+ detail="This endpoint is restricted to asset administrators.",
+ )
+ return ctx
+
+
+@router.get("/indices")
+async def get_list_indices(
+ request: Request,
+ pattern: Annotated[str, Query(
+ description="Pattern to match index names")] = "*",
+):
+ """List knowledge bases visible to the asset-owner tenant.
+
+ Restricted to asset administrators (same auth as create_new_index).
+ """
+ try:
+ ctx = await _require_asset_owner_context(request)
+ vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH)
+ return ElasticSearchService.list_indices(
+ pattern, True, ctx.tenant_id, ctx.user_id, vdb_core
+ )
+ except LimitExceededError as e:
+ logger.exception("Rate limit exceeded while listing knowledge bases")
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("Error listing knowledge bases")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Error listing knowledge bases")
+
+
+@router.post("/indices/{index_name}")
+async def create_new_index(
+ request: Request,
+ index_name: Annotated[str, Path(..., description="Name of the index to create")],
+ embedding_dim: Annotated[
+ Optional[int],
+ Query(description="Dimension of the embedding vectors"),
+ ] = None,
+ body: Annotated[
+ Optional[Dict[str, Any]],
+ Body(
+ description=(
+ "Request body with optional fields (ingroup_permission, group_ids, embedding_model_name, preserve_source_file)"
+ ),
+ ),
+ ] = None,
+):
+ """Create a new vector index and store it in the knowledge table.
+
+ Restricted to the asset-owner tenant: only callers whose access key resolves
+ to the asset-owner tenant are allowed to create knowledge bases through the
+ northbound API.
+ """
+ try:
+ ctx = await _require_asset_owner_context(request)
+ vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH)
+
+ ingroup_permission = None
+ group_ids = None
+ embedding_model_name = None
+ preserve_source_file = None
+ if body:
+ ingroup_permission = body.get("ingroup_permission")
+ group_ids = body.get("group_ids")
+ embedding_model_name = body.get("embedding_model_name")
+ preserve_source_file = body.get("preserve_source_file")
+
+ return ElasticSearchService.create_knowledge_base(
+ knowledge_name=index_name,
+ embedding_dim=embedding_dim,
+ vdb_core=vdb_core,
+ user_id=ctx.user_id,
+ tenant_id=ctx.tenant_id,
+ ingroup_permission=ingroup_permission,
+ group_ids=group_ids,
+ embedding_model_name=embedding_model_name,
+ preserve_source_file=preserve_source_file,
+ )
+ except LimitExceededError as e:
+ logger.exception("Rate limit exceeded while creating index")
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("Error creating index")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Error creating index")
+
+
+@router.delete("/indices/{index_name}")
+async def delete_index(
+ request: Request,
+ index_name: Annotated[str, Path(..., description="Name of the index to delete")],
+):
+ """Delete a knowledge base and all related data.
+
+ Restricted to asset administrators (same auth as create_new_index).
+ """
+ logger.debug("Received northbound request to delete knowledge base")
+ try:
+ ctx = await _require_asset_owner_context(request)
+ vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH)
+ return await ElasticSearchService.full_delete_knowledge_base(
+ index_name, vdb_core, ctx.user_id
+ )
+ except LimitExceededError as e:
+ logger.exception("Rate limit exceeded while deleting index")
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("Error deleting index")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Error deleting index")
+
+
+@router.get("/indices/{index_name}/files")
+async def get_index_files(
+ request: Request,
+ index_name: Annotated[str, Path(..., description="Name of the index")],
+):
+ """Get all files from an index, including those that are not yet stored in ES.
+
+ Restricted to asset administrators (same auth as get_list_indices).
+ """
+ try:
+ ctx = await _require_asset_owner_context(request)
+ vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH)
+ logger.debug(
+ "Listing files for index %s, tenant_id=%s, user_id=%s",
+ index_name,
+ ctx.tenant_id,
+ ctx.user_id,
+ )
+ result = await ElasticSearchService.list_files(
+ index_name, include_chunks=False, vdb_core=vdb_core
+ )
+ return {
+ "status": "success",
+ "files": result.get("files", []),
+ }
+ except LimitExceededError as e:
+ logger.exception("Rate limit exceeded while listing files")
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("Error getting files for index")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Error getting index files")
+
+
+@router.delete("/indices/{index_name}/documents")
+async def delete_documents(
+ request: Request,
+ index_name: Annotated[str, Path(..., description="Name of the index")],
+ path_or_url: Annotated[str, Query(..., description="Path or URL of documents to delete")],
+ scope: Annotated[
+ str,
+ Query(
+ description=(
+ "source_only: delete MinIO source only; "
+ "full: delete ES, MinIO, and Redis records"
+ ),
+ ),
+ ] = "full",
+):
+ """Delete a document by scope. Restricted to asset administrators."""
+ try:
+ await _require_asset_owner_context(request)
+ vdb_core = get_vector_db_core(db_type=VectorDatabaseType.ELASTICSEARCH)
+ logger.debug(
+ "Deleting documents for index %s scope=%s", index_name, scope
+ )
+ result = await ElasticSearchService.delete_document_by_scope(
+ index_name, path_or_url, scope, vdb_core
+ )
+
+ if scope == "full":
+ try:
+ redis_service = get_redis_service()
+ redis_cleanup_result = redis_service.delete_document_records(
+ index_name, path_or_url
+ )
+ result["redis_cleanup"] = redis_cleanup_result
+ original_message = result.get(
+ "message", "Documents deleted successfully"
+ )
+ result["message"] = (
+ f"{original_message}. "
+ f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records "
+ f"({redis_cleanup_result['celery_tasks_deleted']} tasks, "
+ f"{redis_cleanup_result['cache_keys_deleted']} cache keys)."
+ )
+ if redis_cleanup_result.get("errors"):
+ result["redis_warnings"] = redis_cleanup_result["errors"]
+ except Exception as redis_error:
+ logger.warning(
+ "Redis cleanup failed for index %s: %s",
+ index_name,
+ redis_error,
+ )
+ result["redis_cleanup_error"] = str(redis_error)
+ original_message = result.get(
+ "message", "Documents deleted successfully"
+ )
+ result["message"] = (
+ f"{original_message}, but Redis cleanup encountered an error: "
+ f"{str(redis_error)}"
+ )
+
+ return result
+ except ValueError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)
+ )
+ except LimitExceededError as e:
+ logger.exception("Rate limit exceeded while deleting documents")
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("Error deleting documents for index")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Error deleting documents")
+
+
+@router.post("/file/upload")
+async def upload_files(
+ request: Request,
+ file: Annotated[List[UploadFile], File(..., alias="file")],
+ index_name: str = Form(..., description="Knowledge base index"),
+):
+ """Upload files to MinIO and trigger knowledge base data processing.
+
+ Uses chunking_strategy=basic. Restricted to asset administrators
+ (same auth as create_new_index).
+ """
+ try:
+ ctx = await _require_asset_owner_context(request)
+ destination = "minio"
+ if not file:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="No files in the request",
+ )
+
+ errors, uploaded_file_paths, uploaded_filenames = await upload_files_impl(
+ destination, file, None, index_name, ctx.user_id, uploader_tenant_id=ctx.tenant_id
+ )
+
+ if uploaded_file_paths:
+ files = [
+ {"path_or_url": path, "filename": name}
+ for path, name in zip(uploaded_file_paths, uploaded_filenames)
+ ]
+ # Internal data-process / ES indexing expects JWT, not northbound API key
+ internal_jwt = generate_session_jwt(ctx.user_id)
+ process_params = ProcessParams(
+ chunking_strategy="basic",
+ source_type="minio",
+ index_name=index_name,
+ authorization=internal_jwt,
+ )
+ process_result = await trigger_data_process(files, process_params)
+
+ if process_result is None or (
+ isinstance(process_result, dict)
+ and process_result.get("status") == "error"
+ ):
+ error_message = "Data process service failed"
+ if isinstance(process_result, dict) and "message" in process_result:
+ error_message = process_result["message"]
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=error_message,
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.CREATED,
+ content={
+ "message": (
+ "Files uploaded and processing triggered successfully"
+ ),
+ "uploaded_filenames": uploaded_filenames,
+ "uploaded_file_paths": uploaded_file_paths,
+ "errors": errors,
+ "process_tasks": process_result,
+ },
+ )
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="No valid files uploaded",
+ )
+ except LimitExceededError as e:
+ logger.exception("Rate limit exceeded while uploading files")
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("File upload error")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="File upload error.")
+
+
+@router.get("/file/download/{object_name:path}")
+async def get_storage_file(
+ request: Request,
+ object_name: str = PathParam(..., description="File object name"),
+ download: str = Query(
+ "ignore",
+ description=(
+ "How to get the file: "
+ "'ignore' (default, return file info), "
+ "'stream' (return file stream), "
+ "'redirect' (redirect to download URL), "
+ "'base64' (return base64-encoded content for images)."
+ ),
+ ),
+ expires: int = Query(86400, description="URL validity period (seconds)"),
+ filename: Optional[str] = Query(
+ None, description="Original filename for download (optional)"),
+):
+ """Get file information, download link, or file stream.
+
+ Restricted to asset administrators (same auth as create_new_index).
+ """
+ try:
+ ctx = await _require_asset_owner_context(request)
+
+ if not check_file_access(object_name, ctx.user_id, ctx.tenant_id):
+ logger.warning(
+ "[get_storage_file] Access denied: user_id=%s",
+ ctx.user_id,
+ )
+ raise HTTPException(
+ status_code=HTTPStatus.FORBIDDEN,
+ detail="You don't have permission to access this file",
+ )
+
+ logger.info(
+ "[get_storage_file] download=%s",
+ download,
+ )
+ if download == "redirect":
+ result = await get_file_url_impl(
+ object_name=object_name, expires=expires)
+ return RedirectResponse(url=result["url"])
+ if download == "stream":
+ file_stream, content_type = await get_file_stream_impl(
+ object_name=object_name)
+ logger.info(
+ "Streaming file: object_name=%s, content_type=%s",
+ object_name,
+ content_type,
+ )
+
+ download_filename = filename
+ if not download_filename:
+ download_filename = (
+ object_name.split("/")[-1]
+ if "/" in object_name
+ else object_name
+ )
+
+ content_disposition = build_content_disposition_header(
+ download_filename)
+
+ return StreamingResponse(
+ file_stream,
+ media_type=content_type,
+ headers={
+ "Content-Disposition": content_disposition,
+ "Cache-Control": "public, max-age=3600",
+ "ETag": f'"{object_name}"',
+ },
+ )
+ if download == "base64":
+ file_stream, content_type = await get_file_stream_impl(
+ object_name=object_name)
+ try:
+ data = file_stream.read()
+ except Exception as exc:
+ logger.error(
+ "Failed to read file stream for base64: %s", str(exc))
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to read file content for base64 encoding",
+ )
+
+ base64_content = base64.b64encode(data).decode("utf-8")
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "success": True,
+ "base64": base64_content,
+ "content_type": content_type,
+ "object_name": object_name,
+ },
+ )
+ return await get_file_url_impl(
+ object_name=object_name, expires=expires)
+ except LimitExceededError as e:
+ logger.error(
+ "%s: %s",
+ RATE_LIMIT_EXCEEDED_DETAIL,
+ str(e),
+ exc_info=e,
+ )
+ raise HTTPException(
+ status_code=HTTPStatus.TOO_MANY_REQUESTS,
+ detail=RATE_LIMIT_EXCEEDED_DETAIL)
+ except UnauthorizedError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except HTTPException:
+ raise
+ except Exception:
+ logger.exception("Failed to get file")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to get file.")
diff --git a/backend/apps/oauth_app.py b/backend/apps/oauth_app.py
new file mode 100644
index 000000000..f05102d0c
--- /dev/null
+++ b/backend/apps/oauth_app.py
@@ -0,0 +1,352 @@
+import logging
+
+from fastapi import APIRouter, Header, HTTPException, Request
+from fastapi.responses import JSONResponse, RedirectResponse
+from http import HTTPStatus
+from typing import Optional
+
+from pydantic import ValidationError as PydanticValidationError
+
+from consts.model import OAuthCompleteRequest
+from consts.exceptions import OAuthLinkError, OAuthProviderError, UnauthorizedError
+from consts.oauth_providers import get_all_provider_definitions
+from database.oauth_account_db import get_oauth_account_by_provider
+from services.oauth_service import (
+ complete_pending_oauth_account,
+ create_or_update_oauth_account,
+ ensure_user_tenant_exists,
+ exchange_code_for_provider_token,
+ find_supabase_user_id_by_email,
+ generate_pending_oauth_token,
+ get_authorize_url,
+ get_enabled_providers,
+ get_pending_oauth_info,
+ get_provider_user_info,
+ list_linked_accounts,
+ parse_state,
+ unlink_account,
+)
+from utils.auth_utils import (
+ calculate_expires_at,
+ generate_session_jwt,
+ get_current_user_id,
+ get_supabase_admin_client,
+)
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/user/oauth", tags=["oauth"])
+
+
+@router.get("/providers")
+async def get_providers():
+ providers = get_enabled_providers()
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "success", "data": providers},
+ )
+
+
+@router.get("/authorize")
+async def authorize(provider: str):
+ try:
+ url = get_authorize_url(provider)
+ return RedirectResponse(url=url, status_code=HTTPStatus.FOUND)
+ except OAuthProviderError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"OAuth authorize failed: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="OAuth authorization failed",
+ )
+
+
+@router.get("/link")
+async def link(provider: str, authorization: Optional[str] = Header(None)):
+ if not authorization:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in")
+
+ try:
+ user_id, _ = get_current_user_id(authorization)
+ url = get_authorize_url(provider, link_user_id=user_id)
+ return RedirectResponse(url=url, status_code=HTTPStatus.FOUND)
+ except UnauthorizedError:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in")
+ except OAuthProviderError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"OAuth link failed: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="OAuth link failed",
+ )
+
+
+@router.get("/callback")
+async def callback(
+ provider: str,
+ code: str = "",
+ state: str = "",
+ error: Optional[str] = None,
+ error_description: Optional[str] = None,
+):
+ if error:
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={
+ "message": "OAuth provider returned an error",
+ "data": {
+ "oauth_error": error,
+ "oauth_error_description": error_description or "Unknown error",
+ },
+ },
+ )
+
+ if not code:
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={
+ "message": "No authorization code received",
+ "data": {
+ "oauth_error": "no_code",
+ "oauth_error_description": "No authorization code received",
+ },
+ },
+ )
+
+ if provider not in get_all_provider_definitions():
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={
+ "message": "Unsupported OAuth provider",
+ "data": {
+ "oauth_error": "unsupported_provider",
+ "oauth_error_description": f"Provider '{provider}' is not supported",
+ },
+ },
+ )
+
+ state_info = parse_state(state)
+ link_user_id = state_info.get("link_user_id", "")
+
+ try:
+ token_data = exchange_code_for_provider_token(provider, code)
+ provider_access_token = token_data["access_token"]
+
+ user_info = get_provider_user_info(
+ provider,
+ provider_access_token,
+ openid=token_data.get("openid", ""),
+ )
+
+ provider_user_id = user_info["id"]
+ email = user_info["email"]
+ username = user_info["username"]
+
+ if link_user_id:
+ supabase_user_id = link_user_id
+ else:
+ # First check if this OAuth account is already bound to a user
+ existing_binding = get_oauth_account_by_provider(provider, provider_user_id)
+ if existing_binding:
+ supabase_user_id = existing_binding["user_id"]
+ else:
+ supabase_user_id = None
+ if email:
+ admin_client = get_supabase_admin_client()
+ if not admin_client:
+ raise RuntimeError("Supabase admin client not available")
+ supabase_user_id = find_supabase_user_id_by_email(
+ admin_client,
+ email,
+ )
+
+ if not supabase_user_id:
+ pending_token = generate_pending_oauth_token(
+ provider=provider,
+ provider_user_id=provider_user_id,
+ provider_email=email,
+ provider_username=username,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "OAuth account information required",
+ "data": {
+ "requires_account_completion": True,
+ "pending_token": pending_token,
+ "provider": provider,
+ "provider_username": username,
+ "provider_email": email,
+ "email_required": not bool(email),
+ },
+ },
+ )
+
+ ensure_user_tenant_exists(user_id=supabase_user_id, email=email)
+
+ create_or_update_oauth_account(
+ user_id=supabase_user_id,
+ provider=provider,
+ provider_user_id=provider_user_id,
+ email=email,
+ username=username,
+ )
+
+ expiry_seconds = 3600
+ jwt_token = generate_session_jwt(supabase_user_id, expires_in=expiry_seconds)
+ expires_at = calculate_expires_at(jwt_token)
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "OAuth login successful",
+ "data": {
+ "user": {
+ "id": str(supabase_user_id),
+ "email": email,
+ },
+ "session": {
+ "access_token": jwt_token,
+ "refresh_token": "",
+ "expires_at": expires_at,
+ "expires_in_seconds": expiry_seconds,
+ },
+ },
+ },
+ )
+
+ except OAuthLinkError as e:
+ logger.warning(f"OAuth callback link failed for provider={provider}: {e}")
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={
+ "message": "OAuth account link failed",
+ "data": {
+ "oauth_error": "oauth_account_already_bound",
+ "oauth_error_description": "OAuth account is already bound to another user",
+ },
+ },
+ )
+ except Exception as e:
+ logger.error(f"OAuth callback failed for provider={provider}: {e}")
+ return JSONResponse(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ content={
+ "message": "OAuth login failed",
+ "data": {
+ "oauth_error": "callback_failed",
+ "oauth_error_description": "OAuth login failed",
+ },
+ },
+ )
+
+
+@router.get("/pending")
+async def get_pending(
+ pending_token: Optional[str] = Header(None, alias="X-OAuth-Pending-Token"),
+):
+ try:
+ pending = get_pending_oauth_info(pending_token or "")
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "success", "data": pending},
+ )
+ except OAuthLinkError as e:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail=str(e))
+ except OAuthProviderError as e:
+ raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e))
+ except Exception as e:
+ logger.error(f"Failed to get pending OAuth info: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to get pending OAuth info",
+ )
+
+
+@router.post("/complete")
+async def complete(
+ request: Request,
+ pending_token: Optional[str] = Header(None, alias="X-OAuth-Pending-Token"),
+):
+ try:
+ request_data = OAuthCompleteRequest(**(await request.json()))
+ result = await complete_pending_oauth_account(
+ pending_token=pending_token or "",
+ email=str(request_data.email) if request_data.email else None,
+ password=request_data.password,
+ invite_code=request_data.invite_code,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "OAuth account completed", "data": result},
+ )
+ except OAuthLinkError as e:
+ status_code = (
+ HTTPStatus.CONFLICT
+ if "Email already exists" in str(e)
+ else HTTPStatus.BAD_REQUEST
+ )
+ raise HTTPException(status_code=status_code, detail=str(e))
+ except PydanticValidationError as e:
+ raise HTTPException(
+ status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
+ detail=e.errors(),
+ )
+ except OAuthProviderError as e:
+ raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e))
+ except Exception as e:
+ logger.error(f"Failed to complete OAuth account: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to complete OAuth account",
+ )
+
+
+@router.get("/accounts")
+async def get_accounts(authorization: Optional[str] = Header(None)):
+ if not authorization:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in")
+
+ try:
+ user_id, _ = get_current_user_id(authorization)
+ accounts = list_linked_accounts(user_id)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "success", "data": accounts},
+ )
+ except UnauthorizedError:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in")
+ except Exception as e:
+ logger.error(f"Failed to get OAuth accounts: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to get OAuth accounts",
+ )
+
+
+@router.delete("/accounts/{provider}")
+async def delete_account(provider: str, authorization: Optional[str] = Header(None)):
+ if not authorization:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in")
+
+ try:
+ user_id, _ = get_current_user_id(authorization)
+ unlink_account(user_id, provider)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "success",
+ "data": {"provider": provider, "unlinked": True},
+ },
+ )
+ except OAuthLinkError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except UnauthorizedError:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="Not logged in")
+ except Exception as e:
+ logger.error(f"Failed to unlink OAuth account: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to unlink OAuth account",
+ )
diff --git a/backend/apps/prompt_app.py b/backend/apps/prompt_app.py
index 7c0b799dc..6b82a5c82 100644
--- a/backend/apps/prompt_app.py
+++ b/backend/apps/prompt_app.py
@@ -1,11 +1,22 @@
import logging
from http import HTTPStatus
from typing import Optional
-from fastapi import APIRouter, Header, HTTPException, Request
-from fastapi.responses import StreamingResponse
+from fastapi import APIRouter, Header, Request
+from fastapi.responses import JSONResponse, StreamingResponse
-from consts.model import GeneratePromptRequest
-from services.prompt_service import gen_system_prompt_streamable
+from consts.model import (
+ GeneratePromptRequest,
+ OptimizePromptSectionRequest,
+ OptimizePromptBadCaseRequest,
+ OptimizePromptFromDebugRequest,
+)
+from services.prompt_service import (
+ gen_system_prompt_streamable,
+ OptimizeRequest,
+ OptimizeResult,
+ PromptOptimizationService,
+)
+from adapters.exception import NexentCapabilityError
from utils.auth_utils import get_current_user_info
router = APIRouter(prefix="/prompt")
@@ -25,13 +36,160 @@ async def generate_and_save_system_prompt_api(
agent_id=prompt_request.agent_id,
model_id=prompt_request.model_id,
task_description=prompt_request.task_description,
+ prompt_template_id=prompt_request.prompt_template_id,
user_id=user_id,
tenant_id=tenant_id,
language=language,
tool_ids=prompt_request.tool_ids,
- sub_agent_ids=prompt_request.sub_agent_ids
+ sub_agent_ids=prompt_request.sub_agent_ids,
+ knowledge_base_display_names=prompt_request.knowledge_base_display_names,
+ has_selected_resources=prompt_request.has_selected_resources,
), media_type="text/event-stream")
except Exception as e:
logger.exception(f"Error occurred while generating system prompt: {e}")
- raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Error occurred while generating system prompt.")
+ raise
+
+
+@router.post("/optimize")
+async def optimize_prompt_section_api(
+ optimize_request: OptimizePromptSectionRequest,
+ http_request: Request,
+ authorization: Optional[str] = Header(None)
+):
+ _, tenant_id, language = get_current_user_info(
+ authorization, http_request)
+
+ service = PromptOptimizationService(
+ model_id=optimize_request.model_id,
+ tenant_id=tenant_id,
+ language=language,
+ )
+
+ try:
+ result = service.optimize(
+ OptimizeRequest(
+ agent_id=optimize_request.agent_id,
+ model_id=optimize_request.model_id,
+ task_description=optimize_request.task_description,
+ section_type=optimize_request.section_type,
+ section_title=optimize_request.section_title,
+ current_content=optimize_request.current_content,
+ feedback=optimize_request.feedback,
+ mode=optimize_request.mode,
+ start_pos=optimize_request.start_pos,
+ end_pos=optimize_request.end_pos,
+ tool_ids=optimize_request.tool_ids,
+ sub_agent_ids=optimize_request.sub_agent_ids,
+ knowledge_base_display_names=optimize_request.knowledge_base_display_names,
+ )
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "Success",
+ "data": {
+ "optimized_content": result.optimized_content,
+ "section_type": result.section_type,
+ "section_title": result.section_title,
+ "original_content": result.original_content,
+ }
+ },
+ headers={"X-Prompt-Source": result.source},
+ )
+ except NexentCapabilityError as e:
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={"message": str(e)},
+ )
+ except Exception as exc:
+ logger.exception(f"Error occurred while optimizing prompt section: {exc}")
+ raise
+
+
+@router.post("/optimize/badcase")
+async def optimize_prompt_badcase_api(
+ badcase_request: OptimizePromptBadCaseRequest,
+ http_request: Request,
+ authorization: Optional[str] = Header(None)
+):
+ _, tenant_id, language = get_current_user_info(
+ authorization, http_request)
+
+ service = PromptOptimizationService(
+ model_id=badcase_request.model_id,
+ tenant_id=tenant_id,
+ language=language,
+ )
+
+ try:
+ result = service.optimize_badcase(
+ current_content=badcase_request.current_content,
+ bad_cases=badcase_request.bad_cases,
+ agent_id=badcase_request.agent_id,
+ section_type=badcase_request.section_type,
+ section_title=badcase_request.section_title,
+ tool_ids=badcase_request.tool_ids,
+ sub_agent_ids=badcase_request.sub_agent_ids,
+ knowledge_base_display_names=badcase_request.knowledge_base_display_names,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "Success",
+ "data": {
+ "optimized_content": result.optimized_content,
+ "section_type": result.section_type,
+ "section_title": result.section_title,
+ "original_content": result.original_content,
+ }
+ },
+ headers={"X-Prompt-Source": result.source},
+ )
+ except NexentCapabilityError as e:
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={"message": str(e)},
+ )
+
+
+@router.post("/optimize/from_debug")
+async def optimize_prompt_from_debug_api(
+ optimize_request: OptimizePromptFromDebugRequest,
+ http_request: Request,
+ authorization: Optional[str] = Header(None)
+):
+ _, tenant_id, language = get_current_user_info(
+ authorization, http_request)
+
+ service = PromptOptimizationService(
+ model_id=optimize_request.model_id,
+ tenant_id=tenant_id,
+ language=language,
+ )
+
+ try:
+ result = service.optimize_from_debug(
+ agent_id=optimize_request.agent_id,
+ feedback=optimize_request.feedback,
+ selected=optimize_request.selected,
+ history=optimize_request.history,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "Success",
+ "data": {
+ "original_full_prompt": result.original_content,
+ "optimized_full_prompt": result.optimized_content,
+ }
+ },
+ headers={"X-Prompt-Source": result.source},
+ )
+ except NexentCapabilityError as e:
+ return JSONResponse(
+ status_code=HTTPStatus.BAD_REQUEST,
+ content={"message": str(e)},
+ )
+ except Exception as exc:
+ logger.exception(f"Error occurred while optimizing prompt from debug: {exc}")
+ raise
diff --git a/backend/apps/prompt_template_app.py b/backend/apps/prompt_template_app.py
new file mode 100644
index 000000000..0f12bd614
--- /dev/null
+++ b/backend/apps/prompt_template_app.py
@@ -0,0 +1,143 @@
+import logging
+from http import HTTPStatus
+from typing import Optional
+
+from fastapi import APIRouter, Header, HTTPException
+from starlette.responses import JSONResponse
+
+from consts.exceptions import DuplicateError, NotFoundException, ValidationError
+from consts.model import PromptTemplateRequest
+from services.prompt_template_service import (
+ create_prompt_template_impl,
+ delete_prompt_template_impl,
+ get_prompt_template_detail_impl,
+ list_prompt_templates_impl,
+ update_prompt_template_impl,
+)
+from utils.auth_utils import get_current_user_id
+
+router = APIRouter(prefix="/prompt_templates")
+logger = logging.getLogger("prompt_template_app")
+
+
+@router.get("")
+async def list_prompt_templates_api(
+ authorization: Optional[str] = Header(None),
+):
+ """List prompt templates for the current user."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ result = list_prompt_templates_impl(tenant_id=tenant_id, user_id=user_id)
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except Exception as exc:
+ logger.error(f"Prompt template list error: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Prompt template list error.",
+ )
+
+
+@router.get("/{template_id}")
+async def get_prompt_template_api(
+ template_id: int,
+ authorization: Optional[str] = Header(None),
+):
+ """Get prompt template detail."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ result = get_prompt_template_detail_impl(
+ template_id=template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except NotFoundException as exc:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
+ except Exception as exc:
+ logger.error(f"Prompt template detail error: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Prompt template detail error.",
+ )
+
+
+@router.post("")
+async def create_prompt_template_api(
+ request: PromptTemplateRequest,
+ authorization: Optional[str] = Header(None),
+):
+ """Create a prompt template."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ result = create_prompt_template_impl(
+ request=request,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except DuplicateError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except ValidationError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except Exception as exc:
+ logger.error(f"Prompt template create error: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Prompt template create error.",
+ )
+
+
+@router.put("/{template_id}")
+async def update_prompt_template_api(
+ template_id: int,
+ request: PromptTemplateRequest,
+ authorization: Optional[str] = Header(None),
+):
+ """Update a prompt template."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ result = update_prompt_template_impl(
+ template_id=template_id,
+ request=request,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except NotFoundException as exc:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
+ except DuplicateError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except ValidationError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except Exception as exc:
+ logger.error(f"Prompt template update error: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Prompt template update error.",
+ )
+
+
+@router.delete("/{template_id}")
+async def delete_prompt_template_api(
+ template_id: int,
+ authorization: Optional[str] = Header(None),
+):
+ """Delete a prompt template."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ result = delete_prompt_template_impl(
+ template_id=template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+ return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except NotFoundException as exc:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(exc))
+ except ValidationError as exc:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
+ except Exception as exc:
+ logger.error(f"Prompt template delete error: {str(exc)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Prompt template delete error.",
+ )
diff --git a/backend/apps/remote_mcp_app.py b/backend/apps/remote_mcp_app.py
index 0dd6127fd..3993e24ce 100644
--- a/backend/apps/remote_mcp_app.py
+++ b/backend/apps/remote_mcp_app.py
@@ -6,12 +6,27 @@
from fastapi.responses import JSONResponse, StreamingResponse
from http import HTTPStatus
-from consts.const import NEXENT_MCP_DOCKER_IMAGE, ENABLE_UPLOAD_IMAGE
-from consts.exceptions import MCPConnectionError, MCPNameIllegal, MCPContainerError
-from consts.model import MCPConfigRequest, MCPUpdateRequest
+from consts.const import ENABLE_UPLOAD_IMAGE
+from consts.exceptions import (
+ MCPConnectionError,
+ MCPNameIllegal,
+ MCPContainerError,
+ McpNotFoundError,
+ McpValidationError,
+ McpNameConflictError,
+ McpPortConflictError,
+)
+from consts.model import (
+ MCPConfigRequest,
+ AddMcpServiceRequest,
+ AddContainerMcpServiceRequest,
+ UpdateMcpServiceRequest,
+ EnableMcpServiceRequest,
+ DisableMcpServiceRequest,
+ HealthcheckMcpServiceRequest,
+ ListMcpServicesQuery,
+)
from services.remote_mcp_service import (
- add_remote_mcp_server_list,
- delete_remote_mcp_server_list,
get_remote_mcp_server_list,
check_mcp_health_and_update_db,
delete_mcp_by_container_id,
@@ -19,8 +34,16 @@
update_remote_mcp_server_list,
attach_mcp_container_permissions,
get_mcp_record_by_id,
+ list_mcp_service_tools_by_id,
+ add_mcp_service,
+ add_container_mcp_service,
+ update_mcp_service,
+ update_mcp_service_enabled,
+ delete_mcp_service,
+ check_mcp_service_health,
+ check_container_port_conflict,
+ suggest_container_port,
)
-from database.remote_mcp_db import check_mcp_name_exists
from services.tool_configuration_service import get_tool_from_remote_mcp_server
from services.mcp_container_service import MCPContainerManager
from utils.auth_utils import get_current_user_info
@@ -29,454 +52,388 @@
logger = logging.getLogger("remote_mcp_app")
-@router.post("/tools")
-async def get_tools_from_remote_mcp(
- service_name: str,
- mcp_url: str,
+# ---------------------------------------------------------------------------
+# Tools Endpoint
+# ---------------------------------------------------------------------------
+
+@router.get("/tools")
+async def get_tools_from_mcp(
+ mcp_id: int = Query(..., description="MCP service ID"),
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Used to list tool information from the remote MCP server """
+ """
+ Get tools from MCP server by MCP ID.
+ """
try:
- _, tenant_id, _ = get_current_user_info(
- authorization, http_request)
- tools_info = await get_tool_from_remote_mcp_server(
- mcp_server_name=service_name,
- remote_mcp_server=mcp_url,
- tenant_id=tenant_id
+ _, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ tools_info = await list_mcp_service_tools_by_id(
+ tenant_id=tenant_id,
+ mcp_id=mcp_id,
)
+
return JSONResponse(
status_code=HTTPStatus.OK,
content={
- "tools": [tool.__dict__ for tool in tools_info], "status": "success"}
+ "tools": [t.model_dump() if hasattr(t, 'model_dump') else t for t in tools_info],
+ "status": "success"
+ }
)
+ except McpNotFoundError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
except MCPConnectionError as e:
- logger.error(f"Failed to get tools from remote MCP server: {e}")
- raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail="MCP connection failed")
+ logger.error(f"Failed to get tools from MCP server: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+ detail="MCP connection failed"
+ )
except Exception as e:
- logger.error(f"get tools from remote MCP server failed, error: {e}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to get tools from remote MCP server.")
+ logger.error(f"get tools from MCP server failed, error: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to get tools from MCP server."
+ )
+# ---------------------------------------------------------------------------
+# Add Endpoints
+# ---------------------------------------------------------------------------
+
@router.post("/add")
-async def add_remote_proxies(
- mcp_url: str,
- service_name: str,
- authorization_token: Optional[str] = Query(
- None, description="Authorization token for MCP server authentication (e.g., Bearer token)"),
- tenant_id: Optional[str] = Query(
- None, description="Tenant ID for filtering (uses auth if not provided)"),
+async def add_mcp_service_endpoint(
+ payload: AddMcpServiceRequest,
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Used to add a remote MCP server """
+ """
+ Add an MCP service.
+ Supports both remote MCP (URL-based) and local MCP (record-based).
+ """
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
- effective_tenant_id = tenant_id or auth_tenant_id
- await add_remote_mcp_server_list(tenant_id=effective_tenant_id,
- user_id=user_id,
- remote_mcp_server=mcp_url,
- remote_mcp_server_name=service_name,
- container_id=None,
- authorization_token=authorization_token)
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ await add_mcp_service(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=payload.name,
+ description=payload.description,
+ source=payload.source.value if hasattr(payload.source, 'value') else payload.source,
+ server_url=payload.server_url,
+ tags=payload.tags,
+ authorization_token=payload.authorization_token,
+ custom_headers=payload.custom_headers,
+ container_config=payload.container_config,
+ registry_json=payload.registry_json,
+ enabled=payload.enabled if payload.enabled is not None else False,
+ )
+
return JSONResponse(
status_code=HTTPStatus.OK,
- content={"message": "Successfully added remote MCP proxy",
- "status": "success"}
+ content={"message": "Successfully added MCP service", "status": "success"}
)
except MCPNameIllegal as e:
- logger.error(f"Failed to add remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.CONFLICT,
- detail="MCP name already exists")
+ logger.error(f"Failed to add MCP service: {e}")
+ raise HTTPException(status_code=HTTPStatus.CONFLICT, detail="MCP name already exists")
except MCPConnectionError as e:
- logger.error(f"Failed to add remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail="MCP connection failed")
+ logger.error(f"Failed to add MCP service: {e}")
+ raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="MCP connection failed")
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
- logger.error(f"Failed to add remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to add remote MCP proxy")
+ logger.error(f"Failed to add MCP service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to add MCP service"
+ )
-@router.delete("")
-async def delete_remote_proxies(
- service_name: str,
- mcp_url: str,
- tenant_id: Optional[str] = Query(
- None, description="Tenant ID for filtering (uses auth if not provided)"),
+@router.post("/add-from-config")
+async def add_container_mcp_service_endpoint(
+ payload: AddContainerMcpServiceRequest,
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Used to delete a remote MCP server """
+ """
+ Add a container-based MCP service with full configuration.
+ Endpoint path is kept as /add-from-config for backward compatibility.
+ """
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
- effective_tenant_id = tenant_id or auth_tenant_id
- await delete_remote_mcp_server_list(tenant_id=effective_tenant_id,
- user_id=user_id,
- remote_mcp_server=mcp_url,
- remote_mcp_server_name=service_name)
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ container_info = await add_container_mcp_service(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=payload.name,
+ description=payload.description,
+ source=payload.source.value if hasattr(payload.source, 'value') else payload.source,
+ tags=payload.tags,
+ authorization_token=payload.authorization_token,
+ registry_json=payload.registry_json,
+ port=payload.port,
+ mcp_config=payload.mcp_config,
+ )
+
return JSONResponse(
status_code=HTTPStatus.OK,
- content={"message": "Successfully deleted remote MCP proxy",
- "status": "success"}
+ content={
+ "status": "success",
+ "data": {
+ "service_name": container_info.get("service_name"),
+ "mcp_url": container_info.get("mcp_url"),
+ "container_id": container_info.get("container_id"),
+ "container_name": container_info.get("container_name"),
+ "host_port": container_info.get("host_port"),
+ },
+ },
+ )
+
+ except McpNameConflictError as e:
+ raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e))
+ except McpPortConflictError as e:
+ raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e))
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except MCPContainerError as e:
+ logger.error(f"Failed to start MCP container service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+ detail="Docker service unavailable"
+ )
+ except MCPConnectionError as e:
+ logger.error(f"MCP connection failed when adding container service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+ detail="MCP connection failed"
)
except Exception as e:
- logger.error(f"Failed to delete remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to delete remote MCP proxy")
+ logger.error(f"Failed to add container MCP service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to add container MCP service"
+ )
+
+# ---------------------------------------------------------------------------
+# Update Endpoint
+# ---------------------------------------------------------------------------
@router.put("/update")
-async def update_remote_proxy(
- update_data: MCPUpdateRequest,
+async def update_mcp_service_endpoint(
+ payload: UpdateMcpServiceRequest,
tenant_id: Optional[str] = Query(
None, description="Tenant ID for filtering (uses auth if not provided)"),
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Used to update an existing remote MCP server """
+ """Update an existing MCP service by ID."""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
+ user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request)
effective_tenant_id = tenant_id or auth_tenant_id
- await update_remote_mcp_server_list(
- update_data=update_data,
+
+ update_mcp_service(
tenant_id=effective_tenant_id,
- user_id=user_id
+ user_id=user_id,
+ mcp_id=payload.mcp_id,
+ new_name=payload.name,
+ description=payload.description,
+ server_url=payload.server_url,
+ authorization_token=payload.authorization_token,
+ custom_headers=payload.custom_headers,
+ tags=payload.tags,
)
+
return JSONResponse(
status_code=HTTPStatus.OK,
- content={"message": "Successfully updated remote MCP proxy",
- "status": "success"}
+ content={"message": "Successfully updated MCP service", "status": "success"}
)
- except MCPNameIllegal as e:
- logger.error(f"Failed to update remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.CONFLICT,
- detail=str(e))
- except MCPConnectionError as e:
- logger.error(f"Failed to update remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail=str(e))
+
+ except McpNotFoundError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
- logger.error(f"Failed to update remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to update remote MCP proxy")
+ logger.error(f"Failed to update MCP service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to update MCP service"
+ )
-@router.get("/list")
-async def get_remote_proxies(
+# ---------------------------------------------------------------------------
+# Delete Endpoints
+# ---------------------------------------------------------------------------
+
+@router.delete("/{mcp_id}")
+async def delete_mcp_by_id(
+ mcp_id: int,
tenant_id: Optional[str] = Query(
None, description="Tenant ID for filtering (uses auth if not provided)"),
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Used to get the list of remote MCP servers """
+ """Delete MCP service by ID."""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
+ user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request)
effective_tenant_id = tenant_id or auth_tenant_id
- remote_mcp_server_list = await get_remote_mcp_server_list(
+
+ await delete_mcp_service(
tenant_id=effective_tenant_id,
user_id=user_id,
- is_need_auth=False
+ mcp_id=mcp_id
)
+
return JSONResponse(
status_code=HTTPStatus.OK,
- content={"remote_mcp_server_list": remote_mcp_server_list,
- "enable_upload_image": ENABLE_UPLOAD_IMAGE,
- "status": "success"}
+ content={"message": "Successfully deleted MCP service", "status": "success"}
)
+ except McpNotFoundError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
except Exception as e:
- logger.error(f"Failed to get remote MCP proxy: {e}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to get remote MCP proxy")
+ logger.error(f"Failed to delete MCP service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to delete MCP service"
+ )
-@router.get("/record/{mcp_id}")
-async def get_mcp_record(
- mcp_id: int,
+@router.delete("/container/{container_id}")
+async def stop_mcp_container(
+ container_id: str,
tenant_id: Optional[str] = Query(
None, description="Tenant ID for filtering (uses auth if not provided)"),
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Get single MCP record by ID """
+ """Stop and remove MCP container."""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
+ user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request)
effective_tenant_id = tenant_id or auth_tenant_id
- mcp_record = await get_mcp_record_by_id(
- mcp_id=mcp_id,
- tenant_id=effective_tenant_id
- )
-
- if not mcp_record:
+ try:
+ container_manager = MCPContainerManager()
+ except MCPContainerError as e:
+ logger.error(f"Failed to initialize container manager: {e}")
raise HTTPException(
- status_code=HTTPStatus.NOT_FOUND,
- detail="MCP record not found"
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+ detail="Docker service unavailable"
)
- return JSONResponse(
- status_code=HTTPStatus.OK,
- content={
- "mcp_name": mcp_record.get("mcp_name"),
- "mcp_server": mcp_record.get("mcp_server"),
- "authorization_token": mcp_record.get("authorization_token"),
- "status": "success"
- }
- )
+ success = await container_manager.stop_mcp_container(container_id)
+
+ if success:
+ await delete_mcp_by_container_id(
+ tenant_id=effective_tenant_id,
+ user_id=user_id,
+ container_id=container_id,
+ )
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "Container and MCP service stopped successfully",
+ "status": "success",
+ },
+ )
+ else:
+ return JSONResponse(
+ status_code=HTTPStatus.NOT_FOUND,
+ content={"message": "Container not found", "status": "error"},
+ )
except HTTPException:
raise
except Exception as e:
- logger.error(f"Failed to get MCP record: {e}")
+ logger.error(f"Failed to stop container: {e}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to get MCP record"
+ detail=f"Failed to stop container: {str(e)}"
)
-@router.get("/healthcheck")
-async def check_mcp_health(
- mcp_url: str,
- service_name: str,
- tenant_id: Optional[str] = Query(
- None, description="Tenant ID for filtering (uses auth if not provided)"),
- authorization: Optional[str] = Header(None),
- http_request: Request = None
-):
- """ Used to check the health of the MCP server, the front end can call it,
- and automatically update the database status """
- try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
- effective_tenant_id = tenant_id or auth_tenant_id
- await check_mcp_health_and_update_db(mcp_url, service_name, effective_tenant_id, user_id)
- return JSONResponse(
- status_code=HTTPStatus.OK,
- content={"status": "success"}
- )
- except MCPConnectionError as e:
- logger.error(f"MCP connection failed: {e}")
- raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail="MCP connection failed")
- except Exception as e:
- logger.error(f"Failed to check the health of the MCP server: {e}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Failed to check the health of the MCP server")
+# ---------------------------------------------------------------------------
+# List Endpoints
+# ---------------------------------------------------------------------------
-
-@router.post("/add-from-config")
-async def add_mcp_from_config(
- mcp_config: MCPConfigRequest,
+@router.get("/list")
+async def get_mcp_list(
tenant_id: Optional[str] = Query(
None, description="Tenant ID for filtering (uses auth if not provided)"),
authorization: Optional[str] = Header(None),
http_request: Request = None
):
"""
- Add MCP server by starting a container with command+args config.
- Similar to Cursor's MCP server configuration format.
-
- Example request:
- {
- "mcpServers": {
- "12306-mcp": {
- "command": "npx",
- "args": ["-y", "12306-mcp"],
- "env": {"NODE_ENV": "production"}
- }
- }
- }
+ Get list of MCP services.
+ Returns remote MCP list with full details including container_id, description,
+ enabled, source, update_time, tags, container_port, registry_json, config_json,
+ container_status, and authorization_token.
"""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
+ user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request)
effective_tenant_id = tenant_id or auth_tenant_id
- # Initialize container manager
- try:
- container_manager = MCPContainerManager()
- except MCPContainerError as e:
- logger.error(f"Failed to initialize container manager: {e}")
- raise HTTPException(
- status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail="Docker service unavailable. Please ensure Docker socket is mounted."
- )
-
- results = []
- errors = []
-
- for service_name, config in mcp_config.mcpServers.items():
- try:
- command = config.command
- args = config.args or []
- env_vars = config.env or {}
- port = config.port
-
- if not command:
- errors.append(f"{service_name}: command is required")
- continue
-
- if port is None:
- errors.append(f"{service_name}: port is required")
- continue
-
- # Check if MCP service name already exists before starting container
- if check_mcp_name_exists(mcp_name=service_name, tenant_id=effective_tenant_id):
- errors.append(f"{service_name}: MCP name already exists")
- continue
-
- # Build full command to run inside nexent/nexent-mcp image
- full_command = [
- "python",
- "-m",
- "mcp_proxy",
- "--host",
- "0.0.0.0",
- "--port",
- str(port),
- "--transport",
- "streamablehttp",
- "--",
- command,
- *args,
- ]
-
- # Start container
- container_info = await container_manager.start_mcp_container(
- service_name=service_name,
- tenant_id=effective_tenant_id,
- user_id=user_id,
- env_vars=env_vars,
- host_port=port,
- image=config.image or NEXENT_MCP_DOCKER_IMAGE,
- full_command=full_command,
- )
-
- # Register to remote MCP server list
- await add_remote_mcp_server_list(
- tenant_id=effective_tenant_id,
- user_id=user_id,
- remote_mcp_server=container_info["mcp_url"],
- remote_mcp_server_name=service_name,
- container_id=container_info["container_id"],
- )
-
- results.append({
- "service_name": service_name,
- "status": "success",
- "mcp_url": container_info["mcp_url"],
- "container_id": container_info["container_id"],
- "container_name": container_info.get("container_name"),
- "host_port": container_info.get("host_port")
- })
-
- except MCPContainerError as e:
- logger.error(
- f"Failed to start MCP container {service_name}: {e}")
- error_str = str(e)
- # Check if error is related to image not found
- if "not found" in error_str.lower() or "404" in error_str:
- errors.append(
- f"{service_name}: Image not found - MCP service startup image is missing")
- else:
- errors.append(f"{service_name}: {error_str}")
- except Exception as e:
- logger.error(
- f"Unexpected error adding MCP {service_name}: {e}")
- errors.append(f"{service_name}: {str(e)}")
-
- if errors and not results:
- raise HTTPException(
- status_code=HTTPStatus.BAD_REQUEST,
- detail=f"All MCP servers failed: {errors}"
- )
+ remote_mcp_list = await get_remote_mcp_server_list(
+ tenant_id=effective_tenant_id,
+ user_id=user_id,
+ is_need_auth=True
+ )
return JSONResponse(
status_code=HTTPStatus.OK,
content={
- "message": "MCP servers processed",
- "results": results,
- "errors": errors if errors else None,
+ "remote_mcp_server_list": remote_mcp_list,
+ "enable_upload_image": ENABLE_UPLOAD_IMAGE,
"status": "success"
}
)
-
- except HTTPException:
- raise
except Exception as e:
- logger.error(f"Failed to add MCP from config: {e}")
+ logger.error(f"Failed to get MCP list: {e}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail=f"Failed to add MCP servers: {str(e)}"
+ detail="Failed to get MCP list"
)
-@router.delete("/container/{container_id}")
-async def stop_mcp_container(
- container_id: str,
+@router.get("/record/{mcp_id}")
+async def get_mcp_record(
+ mcp_id: int,
tenant_id: Optional[str] = Query(
None, description="Tenant ID for filtering (uses auth if not provided)"),
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Stop and remove MCP container """
+ """Get single MCP record by ID."""
try:
- user_id, auth_tenant_id, _ = get_current_user_info(
- authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
+ user_id, auth_tenant_id, _ = get_current_user_info(authorization, http_request)
effective_tenant_id = tenant_id or auth_tenant_id
- try:
- container_manager = MCPContainerManager()
- except MCPContainerError as e:
- logger.error(f"Failed to initialize container manager: {e}")
- raise HTTPException(
- status_code=HTTPStatus.SERVICE_UNAVAILABLE,
- detail="Docker service unavailable"
- )
-
- success = await container_manager.stop_mcp_container(container_id)
+ mcp_record = await get_mcp_record_by_id(
+ mcp_id=mcp_id,
+ tenant_id=effective_tenant_id
+ )
- if success:
- # Soft delete the corresponding MCP record (if any) by container ID
- await delete_mcp_by_container_id(
- tenant_id=effective_tenant_id,
- user_id=user_id,
- container_id=container_id,
- )
- return JSONResponse(
- status_code=HTTPStatus.OK,
- content={
- "message": "Container and MCP service stopped successfully",
- "status": "success",
- },
- )
- else:
- return JSONResponse(
+ if not mcp_record:
+ raise HTTPException(
status_code=HTTPStatus.NOT_FOUND,
- content={"message": "Container not found", "status": "error"},
+ detail="MCP record not found"
)
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "mcp_name": mcp_record.get("mcp_name"),
+ "mcp_server": mcp_record.get("mcp_server"),
+ "authorization_token": mcp_record.get("authorization_token"),
+ "custom_headers": mcp_record.get("custom_headers"),
+ "status": "success"
+ }
+ )
except HTTPException:
raise
except Exception as e:
- logger.error(f"Failed to stop container: {e}")
+ logger.error(f"Failed to get MCP record: {e}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail=f"Failed to stop container: {str(e)}"
+ detail="Failed to get MCP record"
)
@@ -487,11 +444,10 @@ async def list_mcp_containers(
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ List all MCP containers for the current tenant """
+ """List all MCP containers for the current tenant."""
try:
user_id, auth_tenant_id, _ = get_current_user_info(
authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
effective_tenant_id = tenant_id or auth_tenant_id
try:
@@ -539,11 +495,10 @@ async def get_container_logs(
authorization: Optional[str] = Header(None),
http_request: Request = None
):
- """ Get logs from MCP container via SSE stream """
+ """Get logs from MCP container via SSE stream."""
try:
user_id, auth_tenant_id, _ = get_current_user_info(
authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
effective_tenant_id = tenant_id or auth_tenant_id
try:
@@ -556,12 +511,11 @@ async def get_container_logs(
)
async def generate_log_stream():
- """Generate SSE stream of container logs"""
+ """Generate SSE stream of container logs."""
try:
async for log_line in container_manager.stream_container_logs(
container_id, tail=tail, follow=follow
):
- # Format as SSE: data: {json}\n\n
payload = json.dumps(
{"logs": log_line, "status": "success"},
ensure_ascii=False
@@ -597,7 +551,185 @@ async def generate_log_stream():
)
-# Conditionally add upload-image route based on ENABLE_UPLOAD_IMAGE setting
+@router.get("/healthcheck")
+async def check_mcp_health(
+ mcp_id: int = Query(..., description="MCP service ID"),
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None
+):
+ """Check MCP service health by ID."""
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ health_status = await check_mcp_service_health(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ mcp_id=mcp_id,
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": {"health_status": health_status}}
+ )
+ except McpNotFoundError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except MCPConnectionError as e:
+ logger.error(f"MCP connection failed: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+ detail=str(e) or "MCP connection failed"
+ )
+ except Exception as e:
+ logger.error(f"Failed to check MCP health: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to check MCP health"
+ )
+
+
+# ---------------------------------------------------------------------------
+# Port Management Endpoints
+# ---------------------------------------------------------------------------
+
+@router.get("/port/check")
+async def check_mcp_port(
+ port: int = Query(..., ge=1, le=65535),
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None
+):
+ """Check if a port is available for MCP container."""
+ try:
+ get_current_user_info(authorization, http_request)
+ available = check_container_port_conflict(port=port)
+ no_cache_headers = {
+ "Cache-Control": "no-cache, no-store, must-revalidate",
+ "Pragma": "no-cache",
+ "Expires": "0",
+ }
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": {"available": available}},
+ headers=no_cache_headers
+ )
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"Failed to check MCP port: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to check MCP port"
+ )
+
+
+@router.get("/port/suggest")
+async def suggest_mcp_port(
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None
+):
+ """Suggest an available port for MCP container."""
+ try:
+ get_current_user_info(authorization, http_request)
+ port = suggest_container_port()
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": {"port": port}}
+ )
+ except McpPortConflictError as e:
+ raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e))
+ except Exception as e:
+ logger.error(f"Failed to suggest MCP port: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to suggest MCP port"
+ )
+
+
+# ---------------------------------------------------------------------------
+# Enable/Disable Endpoints
+# ---------------------------------------------------------------------------
+
+@router.post("/enable")
+async def enable_mcp_service(
+ payload: EnableMcpServiceRequest,
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None
+):
+ """Enable an MCP service by ID."""
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ await update_mcp_service_enabled(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ mcp_id=payload.mcp_id,
+ enabled=True,
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success"}
+ )
+ except McpNotFoundError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
+ except McpNameConflictError as e:
+ raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e))
+ except McpPortConflictError as e:
+ raise HTTPException(status_code=HTTPStatus.CONFLICT, detail=str(e))
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except MCPConnectionError as e:
+ logger.error(f"MCP connection failed while enabling service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+ detail="MCP connection failed"
+ )
+ except Exception as e:
+ logger.error(f"Failed to enable MCP service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to update MCP service status"
+ )
+
+
+@router.post("/disable")
+async def disable_mcp_service(
+ payload: DisableMcpServiceRequest,
+ authorization: Optional[str] = Header(None),
+ http_request: Request = None
+):
+ """Disable an MCP service by ID."""
+ try:
+ user_id, tenant_id, _ = get_current_user_info(authorization, http_request)
+
+ await update_mcp_service_enabled(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ mcp_id=payload.mcp_id,
+ enabled=False,
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success"}
+ )
+ except McpNotFoundError as e:
+ raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=str(e))
+ except McpValidationError as e:
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
+ except Exception as e:
+ logger.error(f"Failed to disable MCP service: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Failed to update MCP service status"
+ )
+
+
+# ---------------------------------------------------------------------------
+# Image Upload Endpoint
+# ---------------------------------------------------------------------------
+
if ENABLE_UPLOAD_IMAGE:
@router.post("/upload-image")
async def upload_mcp_image(
@@ -621,13 +753,10 @@ async def upload_mcp_image(
try:
user_id, auth_tenant_id, _ = get_current_user_info(
authorization, http_request)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
effective_tenant_id = tenant_id or auth_tenant_id
- # Read file content
content = await file.read()
- # Call service layer to handle the business logic
result = await upload_and_start_mcp_image(
tenant_id=effective_tenant_id,
user_id=user_id,
diff --git a/backend/apps/skill_app.py b/backend/apps/skill_app.py
index c9e35b690..a2a3b38cf 100644
--- a/backend/apps/skill_app.py
+++ b/backend/apps/skill_app.py
@@ -1,23 +1,28 @@
"""Skill management HTTP endpoints."""
-import asyncio
+from nexent.core.agents.agent_model import ModelConfig
import logging
-import os
-import threading
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form, Header
from starlette.responses import JSONResponse, StreamingResponse
-from pydantic import BaseModel
+from http import HTTPStatus
+from pydantic import BaseModel, Field
+from consts.const import APP_VERSION, STREAMABLE_CONTENT_TYPES
from consts.exceptions import SkillException, UnauthorizedError
-from services.skill_service import SkillService
-from consts.model import SkillInstanceInfoRequest
+from services.skill_service import (
+ SkillService,
+ skill_creation_task_manager,
+ stream_skill_creation,
+ update_skill_list,
+ get_official_skills_with_status,
+)
+from consts.model import SkillInstanceInfoRequest, SkillCreateRequest, SkillCreateInteractiveRequest, SkillUpdateRequest, SkillResponse
from utils.auth_utils import get_current_user_id, get_current_user_info
-from utils.prompt_template_utils import get_skill_creation_simple_prompt_template
-from nexent.core.agents.agent_model import ModelConfig
-from agents.skill_creation_agent import create_simple_skill_from_request
-from nexent.core.utils.observer import MessageObserver
+from services.asset_owner_visibility import can_view_skill
+
+ASSET_OWNER_SKILL_VIEW_DENIED = {"content": "您无权限查看"}
logger = logging.getLogger(__name__)
@@ -25,52 +30,27 @@
skill_creator_router = APIRouter(prefix="/skills", tags=["nl2skill"])
-class SkillCreateRequest(BaseModel):
- """Request model for creating a skill."""
- name: str
- description: str
- content: str
- tool_ids: Optional[List[int]] = [] # Use tool_id list, link to ag_tool_info_t
- tool_names: Optional[List[str]] = [] # Alternative: use tool name list, will be converted to tool_ids
- tags: Optional[List[str]] = []
- source: Optional[str] = "custom" # official, custom, partner
- params: Optional[Dict[str, Any]] = None # Skill config (JSON object)
-
-
-class SkillUpdateRequest(BaseModel):
- """Request model for updating a skill."""
- description: Optional[str] = None
- content: Optional[str] = None
- tool_ids: Optional[List[int]] = None # Use tool_id list
- tool_names: Optional[List[str]] = None # Alternative: use tool name list, will be converted to tool_ids
- tags: Optional[List[str]] = None
- source: Optional[str] = None
- params: Optional[Dict[str, Any]] = None
-
-
-class SkillResponse(BaseModel):
- """Response model for skill data."""
- skill_id: int
- name: str
- description: str
- content: str
- tool_ids: List[int]
- tags: List[str]
- source: str
- params: Optional[Dict[str, Any]] = None
- created_by: Optional[str] = None
- create_time: Optional[str] = None
- updated_by: Optional[str] = None
- update_time: Optional[str] = None
+def _asset_owner_skill_view_denied_response(skill: Optional[Dict[str, Any]], tenant_id: str):
+ """Return a denial JSONResponse when the caller cannot view an ASSET_OWNER-scoped skill."""
+ if skill and not can_view_skill(tenant_id, skill.get("tenant_id")):
+ return JSONResponse(content=ASSET_OWNER_SKILL_VIEW_DENIED)
+ return None
# List routes first (no path parameters)
@router.get("")
-async def list_skills() -> JSONResponse:
- """List all available skills."""
+async def list_skills(
+ tenant_id: Optional[str] = Query(
+ None, description="Tenant ID for super admin to query specific tenant's skills"),
+ authorization: Optional[str] = Header(None)
+) -> JSONResponse:
+ """List all available skills for the current tenant (or a specific tenant for super admin)."""
try:
- service = SkillService()
- skills = service.list_skills()
+ _, current_tenant_id = get_current_user_id(authorization)
+ # Super admin can query a specific tenant's skills; otherwise use current user's tenant
+ effective_tenant_id = tenant_id if tenant_id else current_tenant_id
+ service = SkillService(tenant_id=effective_tenant_id)
+ skills = service.list_skills(tenant_id=effective_tenant_id)
return JSONResponse(content={"skills": skills})
except SkillException as e:
raise HTTPException(status_code=500, detail=str(e))
@@ -79,6 +59,68 @@ async def list_skills() -> JSONResponse:
raise HTTPException(status_code=500, detail="Internal server error")
+@router.get("/official")
+async def list_official_skills(
+ tenant_id: Optional[str] = Query(
+ None, description="Tenant ID for super admin to query specific tenant's skills"),
+ authorization: Optional[str] = Header(None)
+) -> JSONResponse:
+ """List all official skills with installation status for the current tenant (or a specific tenant for super admin).
+
+ Returns skills that have source='official', each with a status field:
+ - installable: skill exists globally but not yet installed for this tenant
+ - installed: skill already exists for this tenant
+ """
+ try:
+ _, current_tenant_id = get_current_user_id(authorization)
+ effective_tenant_id = tenant_id if tenant_id else current_tenant_id
+ skills = get_official_skills_with_status(tenant_id=effective_tenant_id)
+ return JSONResponse(content={"skills": skills})
+ except Exception as e:
+ logger.error(f"Error listing official skills: {e}")
+ raise HTTPException(status_code=500, detail="Internal server error")
+
+
+class InstallSkillsRequest(BaseModel):
+ skill_names: List[str] = Field(...,
+ description="List of skill names to install")
+ locale: Optional[str] = Field(
+ default="en", description="Frontend locale (zh or en)")
+
+
+@router.post("/install")
+async def install_skills(
+ request: InstallSkillsRequest,
+ tenant_id: Optional[str] = Query(
+ None, description="Tenant ID for super admin to install skills for a specific tenant"),
+ authorization: Optional[str] = Header(None)
+) -> JSONResponse:
+ """Install official skills for the current tenant (or a specific tenant for super admin).
+
+ Uses ZIP-based installation for each skill name provided.
+ Skills that already exist are skipped.
+ """
+ try:
+ user_id, current_tenant_id = get_current_user_id(authorization)
+ from services.skill_service import install_skills_from_zip_for_tenant
+
+ effective_tenant_id = tenant_id if tenant_id else current_tenant_id
+ installed_names = install_skills_from_zip_for_tenant(
+ skill_names=request.skill_names,
+ tenant_id=effective_tenant_id,
+ user_id=user_id,
+ locale=request.locale
+ )
+ return JSONResponse(content={
+ "message": "Skills installed successfully",
+ "installed": installed_names,
+ "total": len(installed_names)
+ })
+ except Exception as e:
+ logger.error(f"Error installing skills: {e}")
+ raise HTTPException(status_code=500, detail="Internal server error")
+
+
# POST routes
@router.post("")
async def create_skill(
@@ -88,12 +130,13 @@ async def create_skill(
"""Create a new skill (JSON format)."""
try:
user_id, tenant_id = get_current_user_id(authorization)
- service = SkillService()
+ service = SkillService(tenant_id=tenant_id)
# Convert tool_names to tool_ids if provided
tool_ids = request.tool_ids or []
if request.tool_names:
- tool_ids = service.repository.get_tool_ids_by_names(request.tool_names, tenant_id)
+ raise NotImplementedError(
+ "Tool names are not supported for skill creation")
skill_data = {
"name": request.name,
@@ -102,9 +145,12 @@ async def create_skill(
"tool_ids": tool_ids,
"tags": request.tags,
"source": request.source,
- "params": request.params,
+ "config_schemas": request.config_schemas,
+ "config_values": request.config_values,
+ "files": request.files if request.files else [],
}
- skill = service.create_skill(skill_data, user_id=user_id)
+ skill = service.create_skill(
+ skill_data, tenant_id=tenant_id, user_id=user_id)
return JSONResponse(content=skill, status_code=201)
except UnauthorizedError as e:
raise HTTPException(status_code=401, detail=str(e))
@@ -121,7 +167,9 @@ async def create_skill(
@router.post("/upload")
async def create_skill_from_file(
file: UploadFile = File(..., description="SKILL.md file or ZIP archive"),
- skill_name: Optional[str] = Form(None, description="Optional skill name override"),
+ skill_name: Optional[str] = Form(
+ None, description="Optional skill name override"),
+ source: Optional[str] = Form("自定义", description="Skill source"),
authorization: Optional[str] = Header(None)
) -> JSONResponse:
"""Create a skill from file upload.
@@ -132,8 +180,7 @@ async def create_skill_from_file(
"""
try:
user_id, tenant_id = get_current_user_id(authorization)
- service = SkillService()
-
+ service = SkillService(tenant_id=tenant_id)
content = await file.read()
file_type = "auto"
@@ -147,34 +194,54 @@ async def create_skill_from_file(
file_content=content,
skill_name=skill_name,
file_type=file_type,
+ source=source,
user_id=user_id,
tenant_id=tenant_id
)
return JSONResponse(content=skill, status_code=201)
except UnauthorizedError as e:
+ logger.warning(f"Unauthorized: {e}")
raise HTTPException(status_code=401, detail=str(e))
except SkillException as e:
error_msg = str(e).lower()
+ logger.warning(f"SkillException: {e}")
if "already exists" in error_msg:
raise HTTPException(status_code=409, detail=str(e))
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
- logger.error(f"Error creating skill from file: {e}")
+ logger.error(
+ f"Unexpected error: {type(e).__name__}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
# Routes with path parameters
@router.get("/{skill_name}/files")
-async def get_skill_file_tree(skill_name: str) -> JSONResponse:
+async def get_skill_file_tree(
+ skill_name: str,
+ authorization: Optional[str] = Header(None)
+) -> JSONResponse:
"""Get file tree structure of a skill."""
try:
- service = SkillService()
+ _, tenant_id = get_current_user_id(authorization)
+ service = SkillService(tenant_id=tenant_id)
+ skill = service.get_skill(skill_name)
+ if not skill:
+ raise HTTPException(
+ status_code=404, detail=f"Skill not found: {skill_name}")
+
+ denied = _asset_owner_skill_view_denied_response(skill, tenant_id)
+ if denied:
+ return denied
+
tree = service.get_skill_file_tree(skill_name)
if not tree:
- raise HTTPException(status_code=404, detail=f"Skill not found: {skill_name}")
+ raise HTTPException(
+ status_code=404, detail=f"Skill not found: {skill_name}")
return JSONResponse(content=tree)
except HTTPException:
raise
+ except UnauthorizedError as e:
+ raise HTTPException(status_code=401, detail=str(e))
except SkillException as e:
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
@@ -185,7 +252,8 @@ async def get_skill_file_tree(skill_name: str) -> JSONResponse:
@router.get("/{skill_name}/files/{file_path:path}")
async def get_skill_file_content(
skill_name: str,
- file_path: str
+ file_path: str,
+ authorization: Optional[str] = Header(None)
) -> JSONResponse:
"""Get content of a specific file within a skill.
@@ -194,13 +262,26 @@ async def get_skill_file_content(
file_path: Relative path to the file within the skill directory
"""
try:
- service = SkillService()
+ _, tenant_id = get_current_user_id(authorization)
+ service = SkillService(tenant_id=tenant_id)
+ skill = service.get_skill(skill_name)
+ if not skill:
+ raise HTTPException(
+ status_code=404, detail=f"Skill not found: {skill_name}")
+
+ denied = _asset_owner_skill_view_denied_response(skill, tenant_id)
+ if denied:
+ return denied
+
content = service.get_skill_file_content(skill_name, file_path)
if content is None:
- raise HTTPException(status_code=404, detail=f"File not found: {file_path}")
+ raise HTTPException(
+ status_code=404, detail=f"File not found: {file_path}")
return JSONResponse(content={"content": content})
except HTTPException:
raise
+ except UnauthorizedError as e:
+ raise HTTPException(status_code=401, detail=str(e))
except SkillException as e:
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
@@ -220,7 +301,7 @@ async def update_skill_from_file(
"""
try:
user_id, tenant_id = get_current_user_id(authorization)
- service = SkillService()
+ service = SkillService(tenant_id=tenant_id)
content = await file.read()
@@ -263,7 +344,7 @@ async def get_skill_instance(
try:
_, tenant_id = get_current_user_id(authorization)
- service = SkillService()
+ service = SkillService(tenant_id=tenant_id)
instance = service.get_skill_instance(
agent_id=agent_id,
skill_id=skill_id,
@@ -277,13 +358,22 @@ async def get_skill_instance(
detail=f"Skill instance not found for agent {agent_id} and skill {skill_id}"
)
- # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, params)
- skill = service.get_skill_by_id(skill_id)
+ # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, config_schemas, config_values)
+ # The instance's per-agent overrides are mapped to config_values for the frontend.
+ skill = service.get_skill_by_id(skill_id, tenant_id)
if skill:
instance["skill_name"] = skill.get("name")
instance["skill_description"] = skill.get("description", "")
instance["skill_content"] = skill.get("content", "")
- instance["skill_params"] = skill.get("params") or {}
+ # Template defaults from YAML-enriched skill
+ instance["config_schemas"] = skill.get("config_schemas") or []
+ instance["config_values"] = skill.get("config_values") or {}
+ # Per-agent overrides from SkillInstance.config_values override the template defaults
+ instance_params = instance.get("config_values") or {}
+ if instance_params:
+ merged = dict(instance.get("config_values") or {})
+ merged.update(instance_params)
+ instance["config_values"] = merged
return JSONResponse(content=instance)
except UnauthorizedError as e:
@@ -309,10 +399,11 @@ async def update_skill_instance(
user_id, tenant_id = get_current_user_id(authorization)
# Validate skill exists
- service = SkillService()
- skill = service.get_skill_by_id(request.skill_id)
+ service = SkillService(tenant_id=tenant_id)
+ skill = service.get_skill_by_id(request.skill_id, tenant_id)
if not skill:
- raise HTTPException(status_code=404, detail=f"Skill with ID {request.skill_id} not found")
+ raise HTTPException(
+ status_code=404, detail=f"Skill with ID {request.skill_id} not found")
# Create or update skill instance
instance = service.create_or_update_skill_instance(
@@ -322,6 +413,18 @@ async def update_skill_instance(
version_no=request.version_no
)
+ # Enrich with template info so the frontend gets config_schemas and config_values
+ instance["skill_name"] = skill.get("name")
+ instance["skill_description"] = skill.get("description", "")
+ instance["skill_content"] = skill.get("content", "")
+ instance["config_schemas"] = skill.get("config_schemas") or []
+ instance["config_values"] = skill.get("config_values") or {}
+ instance_params = instance.get("config_values") or {}
+ if instance_params:
+ merged = dict(instance.get("config_values") or {})
+ merged.update(instance_params)
+ instance["config_values"] = merged
+
return JSONResponse(content={"message": "Skill instance updated", "instance": instance})
except UnauthorizedError as e:
raise HTTPException(status_code=401, detail=str(e))
@@ -336,7 +439,8 @@ async def update_skill_instance(
@router.get("/instance/list")
async def list_skill_instances(
- agent_id: int = Query(..., description="Agent ID to query skill instances"),
+ agent_id: int = Query(...,
+ description="Agent ID to query skill instances"),
version_no: int = Query(0, description="Version number (0 for draft)"),
authorization: Optional[str] = Header(None)
) -> JSONResponse:
@@ -344,7 +448,7 @@ async def list_skill_instances(
try:
_, tenant_id = get_current_user_id(authorization)
- service = SkillService()
+ service = SkillService(tenant_id=tenant_id)
instances = service.list_skill_instances(
agent_id=agent_id,
@@ -352,14 +456,21 @@ async def list_skill_instances(
version_no=version_no
)
- # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, params)
+ # Enrich with skill info from ag_skill_info_t (skill_name, skill_description, skill_content, config_values)
+ # Also include config_schemas and config_values from the template (via YAML enrichment).
+ # The instance's per-agent overrides (config_values) are used as-is for the frontend.
for instance in instances:
- skill = service.get_skill_by_id(instance.get("skill_id"))
+ skill = service.get_skill_by_id(
+ instance.get("skill_id"), tenant_id)
if skill:
instance["skill_name"] = skill.get("name")
instance["skill_description"] = skill.get("description", "")
instance["skill_content"] = skill.get("content", "")
- instance["skill_params"] = skill.get("params") or {}
+ # Template defaults from YAML-enriched skill
+ instance["config_schemas"] = skill.get("config_schemas") or []
+ # Per-agent config_values from SkillInstance override template defaults
+ instance["config_values"] = instance.get(
+ "config_values") or skill.get("config_values") or {}
return JSONResponse(content={"instances": instances})
except UnauthorizedError as e:
@@ -369,14 +480,32 @@ async def list_skill_instances(
raise HTTPException(status_code=500, detail="Internal server error")
+@router.get("/scan_skill")
+async def scan_and_update_skill(authorization: Optional[str] = Header(None)):
+ """Scan local skill directories and update skill list in database."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ await update_skill_list(tenant_id=tenant_id, user_id=user_id)
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "Successfully update skill", "status": "success"}
+ )
+ except Exception as e:
+ logger.error(f"Failed to update skill: {e}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Failed to update skill")
+
+
@router.get("/{skill_name}")
-async def get_skill(skill_name: str) -> JSONResponse:
+async def get_skill(skill_name: str, authorization: Optional[str] = Header(None)) -> JSONResponse:
"""Get a specific skill by name."""
try:
- service = SkillService()
- skill = service.get_skill(skill_name)
+ _, tenant_id = get_current_user_id(authorization)
+ service = SkillService(tenant_id=tenant_id)
+ skill = service.get_skill(skill_name, tenant_id=tenant_id)
if not skill:
- raise HTTPException(status_code=404, detail=f"Skill not found: {skill_name}")
+ raise HTTPException(
+ status_code=404, detail=f"Skill not found: {skill_name}")
return JSONResponse(content=skill)
except HTTPException:
raise
@@ -399,32 +528,32 @@ async def update_skill(
"""
try:
user_id, tenant_id = get_current_user_id(authorization)
- service = SkillService()
+ service = SkillService(tenant_id=tenant_id)
update_data = {}
if request.description is not None:
update_data["description"] = request.description
if request.content is not None:
update_data["content"] = request.content
- if request.tool_ids is not None:
- # Convert tool_names to tool_ids if tool_names provided, else use tool_ids directly
- if request.tool_names:
- update_data["tool_ids"] = service.repository.get_tool_ids_by_names(request.tool_names, tenant_id)
- else:
- update_data["tool_ids"] = request.tool_ids
- elif request.tool_names is not None:
- # Only tool_names provided, convert to tool_ids
- update_data["tool_ids"] = service.repository.get_tool_ids_by_names(request.tool_names, tenant_id)
if request.tags is not None:
update_data["tags"] = request.tags
if request.source is not None:
update_data["source"] = request.source
- if request.params is not None:
- update_data["params"] = request.params
+ if request.config_schemas is not None:
+ update_data["config_schemas"] = request.config_schemas
+ if request.config_values is not None:
+ update_data["config_values"] = request.config_values
+ if request.files is not None:
+ update_data["files"] = [f.model_dump() for f in request.files]
if not update_data:
raise HTTPException(status_code=400, detail="No fields to update")
- skill = service.update_skill(skill_name, update_data, user_id=user_id)
+ skill = service.update_skill(
+ skill_name,
+ update_data,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
return JSONResponse(content=skill)
except UnauthorizedError as e:
raise HTTPException(status_code=401, detail=str(e))
@@ -446,9 +575,9 @@ async def delete_skill(
) -> JSONResponse:
"""Delete a skill."""
try:
- user_id, _ = get_current_user_id(authorization)
- service = SkillService()
- service.delete_skill(skill_name, user_id=user_id)
+ user_id, tenant_id = get_current_user_id(authorization)
+ service = SkillService(tenant_id=tenant_id)
+ service.delete_skill(skill_name, tenant_id=tenant_id, user_id=user_id)
return JSONResponse(content={"message": f"Skill {skill_name} deleted successfully"})
except UnauthorizedError as e:
raise HTTPException(status_code=401, detail=str(e))
@@ -459,12 +588,6 @@ async def delete_skill(
raise HTTPException(status_code=500, detail="Internal server error")
-class SkillCreateSimpleRequest(BaseModel):
- """Request model for interactive skill creation."""
- user_request: str
- existing_skill: Optional[Dict[str, Any]] = None
-
-
def _build_model_config_from_tenant(tenant_id: str) -> ModelConfig:
"""Build ModelConfig from tenant's quick-config LLM model."""
from utils.config_utils import tenant_config_manager, get_model_name_from_config
@@ -489,117 +612,66 @@ def _build_model_config_from_tenant(tenant_id: str) -> ModelConfig:
)
-@skill_creator_router.post("/create-simple")
-async def create_simple_skill(
- request: SkillCreateSimpleRequest,
+@skill_creator_router.post("/create")
+async def create_skill(
+ request: SkillCreateInteractiveRequest,
authorization: Optional[str] = Header(None)
):
- """Create a simple skill interactively via LLM agent.
+ """Create a skill interactively via LLM agent.
- Loads the skill_creation_simple prompt template, runs an internal agent
- with WriteSkillFileTool and ReadSkillMdTool, extracts the block
+ Loads the skill creation prompt template (simple or complicated based on complexity),
+ runs an internal agent with WriteSkillFileTool and ReadSkillMdTool, extracts the skill content
from the final answer, and streams step progress and token content via SSE.
Yields SSE events:
- step_count: Current agent step number
- skill_content: Token-level content (thinking, code, deep_thinking, tool output)
- - final_answer: Complete skill content
+ - final_answer: Complete skill content with and delimiters
- done: Stream completion signal
"""
- # Message types to stream as skill_content (token-level output)
- STREAMABLE_CONTENT_TYPES = frozenset([
- "model_output_thinking",
- "model_output_code",
- "model_output_deep_thinking",
- "tool",
- "execution_logs",
- ])
-
- async def generate():
- import json
- try:
- _, tenant_id, language = get_current_user_info(authorization)
-
- template = get_skill_creation_simple_prompt_template(
- language,
- existing_skill=request.existing_skill
- )
+ try:
+ _, tenant_id, user_language = get_current_user_info(authorization)
+ except Exception as e:
+ logger.error(f"Unauthorized access attempt: {e}")
+ raise HTTPException(status_code=401, detail="Unauthorized")
+
+ # Build model config from tenant
+ model_config = _build_model_config_from_tenant(tenant_id)
+
+ # Get language from request or user preference
+ lang = request.language or user_language or "zh"
+
+ # Delegate to service layer
+ task_id, generator = stream_skill_creation(
+ user_request=request.user_request,
+ language=lang,
+ model_config=model_config,
+ existing_skill=request.existing_skill,
+ complexity=request.complexity or "simple"
+ )
+
+ return StreamingResponse(generator(), media_type="text/event-stream", headers={"X-Task-ID": task_id})
+
+
+@skill_creator_router.get("/stop/{task_id}")
+async def stop_skill_creation(
+ task_id: str,
+ authorization: Optional[str] = Header(None)
+):
+ """Stop an active skill creation task.
+
+ Args:
+ task_id: The task ID returned from the /create endpoint (passed via X-Task-ID header)
+ """
+ try:
+ _, _ = get_current_user_id(authorization)
+ except Exception as e:
+ logger.error(f"Unauthorized access attempt: {e}")
+ raise HTTPException(status_code=401, detail="Unauthorized")
+
+ success = skill_creation_task_manager.stop_task(task_id)
- model_config = _build_model_config_from_tenant(tenant_id)
- observer = MessageObserver(lang=language)
- stop_event = threading.Event()
-
- # Get local_skills_dir from SkillManager
- skill_service = SkillService()
- local_skills_dir = skill_service.skill_manager.local_skills_dir or ""
-
- # Start skill creation in background thread
- def run_task():
- create_simple_skill_from_request(
- system_prompt=template.get("system_prompt", ""),
- user_prompt=request.user_request,
- model_config_list=[model_config],
- observer=observer,
- stop_event=stop_event,
- local_skills_dir=local_skills_dir
- )
-
- thread = threading.Thread(target=run_task)
- thread.start()
-
- # Poll observer for step_count and token content messages
- while thread.is_alive():
- cached = observer.get_cached_message()
- for msg in cached:
- if isinstance(msg, str):
- try:
- data = json.loads(msg)
- msg_type = data.get("type", "")
- content = data.get("content", "")
-
- # Stream step progress
- if msg_type == "step_count":
- yield f"data: {json.dumps({'type': 'step_count', 'content': content}, ensure_ascii=False)}\n\n"
- # Stream token content (thinking, code, deep_thinking, tool output)
- elif msg_type in STREAMABLE_CONTENT_TYPES:
- yield f"data: {json.dumps({'type': 'skill_content', 'content': content}, ensure_ascii=False)}\n\n"
- # Stream final_answer content separately
- elif msg_type == "final_answer":
- yield f"data: {json.dumps({'type': 'final_answer', 'content': content}, ensure_ascii=False)}\n\n"
- except (json.JSONDecodeError, Exception):
- pass
- await asyncio.sleep(0.1)
-
- thread.join()
-
- # Stream any remaining cached messages after thread completes
- remaining = observer.get_cached_message()
- for msg in remaining:
- if isinstance(msg, str):
- try:
- data = json.loads(msg)
- msg_type = data.get("type", "")
- content = data.get("content", "")
-
- if msg_type == "step_count":
- yield f"data: {json.dumps({'type': 'step_count', 'content': content}, ensure_ascii=False)}\n\n"
- elif msg_type in STREAMABLE_CONTENT_TYPES:
- yield f"data: {json.dumps({'type': 'skill_content', 'content': content}, ensure_ascii=False)}\n\n"
- elif msg_type == "final_answer":
- yield f"data: {json.dumps({'type': 'final_answer', 'content': content}, ensure_ascii=False)}\n\n"
- except (json.JSONDecodeError, Exception):
- pass
-
- # Stream final answer content from observer
- final_result = observer.get_final_answer()
- if final_result:
- yield f"data: {json.dumps({'type': 'final_answer', 'content': final_result}, ensure_ascii=False)}\n\n"
-
- # Send done signal
- yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n"
-
- except Exception as e:
- logger.error(f"Error in create_simple_skill stream: {e}")
- yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
-
- return StreamingResponse(generate(), media_type="text/event-stream")
+ if success:
+ return JSONResponse(content={"status": "success", "message": "Skill creation task stopped"})
+ else:
+ return JSONResponse(content={"status": "not_found", "message": "Task not found or already completed"}, status_code=404)
diff --git a/backend/apps/tenant_app.py b/backend/apps/tenant_app.py
index e0d612902..291cd22fa 100644
--- a/backend/apps/tenant_app.py
+++ b/backend/apps/tenant_app.py
@@ -49,7 +49,10 @@ async def create_tenant_endpoint(
# Create tenant
tenant_info = create_tenant(
tenant_name=request.tenant_name,
- created_by=user_id
+ created_by=user_id,
+ skill_ids=request.skill_ids,
+ skill_names=request.skill_names,
+ locale=request.locale,
)
logger.info(f"Created tenant {tenant_info['tenant_id']} by user {user_id}")
diff --git a/backend/apps/tool_config_app.py b/backend/apps/tool_config_app.py
index f0b7f9304..bfc8d5ca0 100644
--- a/backend/apps/tool_config_app.py
+++ b/backend/apps/tool_config_app.py
@@ -160,12 +160,14 @@ async def import_openapi_service_api(
server_url: Base URL of the REST API server
openapi_json: Complete OpenAPI JSON specification
service_description: Optional service description
+ headers_template: Optional default headers template
force_update: If True, replace all existing tools for this service
"""
service_name = openapi_service_request.get("service_name")
server_url = openapi_service_request.get("server_url")
openapi_json = openapi_service_request.get("openapi_json")
service_description = openapi_service_request.get("service_description")
+ headers_template = openapi_service_request.get("headers_template")
force_update = openapi_service_request.get("force_update", False)
if not service_name:
@@ -192,6 +194,7 @@ async def import_openapi_service_api(
tenant_id=tenant_id,
user_id=user_id,
service_description=service_description,
+ headers_template=headers_template,
force_update=force_update
)
diff --git a/backend/apps/user_management_app.py b/backend/apps/user_management_app.py
index d50cdc1f0..e79fde887 100644
--- a/backend/apps/user_management_app.py
+++ b/backend/apps/user_management_app.py
@@ -8,18 +8,29 @@
from supabase_auth.errors import AuthApiError, AuthWeakPasswordError
-from consts.model import UserSignInRequest, UserSignUpRequest
-from consts.exceptions import NoInviteCodeException, IncorrectInviteCodeException, UserRegistrationException
+from consts.const import ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL
+from consts.model import UserSignInRequest, UserSignUpRequest, UpdatePasswordRequest
+from consts.exceptions import (
+ NoInviteCodeException,
+ IncorrectInviteCodeException,
+ UserRegistrationException,
+ AppException,
+ UnauthorizedError,
+ ValidationError,
+)
+from consts.error_code import ErrorCode
+from services.cas_service import build_logout_url, CasAuthenticationError
from services.user_management_service import get_authorized_client, validate_token, \
check_auth_service_health, signup_user_with_invitation, signin_user, refresh_user_token, \
- get_session_by_authorization, get_user_info, create_token, list_tokens_by_user, delete_token
+ get_session_by_authorization, get_user_info, create_token, list_tokens_by_user, delete_token, \
+ update_password
from services.user_service import delete_user_and_cleanup
-from consts.exceptions import UnauthorizedError
-from utils.auth_utils import get_current_user_id
+from utils.auth_utils import get_current_user_id, extract_session_id_from_authorization
load_dotenv()
logging.getLogger("httpx").setLevel(logging.WARNING)
+logger = logging.getLogger("user_management_app")
router = APIRouter(prefix="/user", tags=["user"])
@@ -33,10 +44,12 @@ async def service_health():
content={"message": "Auth service is available"})
except ConnectionError as e:
logging.error(f"Auth service health check failed: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="Auth service is unavailable")
+ raise HTTPException(
+ status_code=HTTPStatus.SERVICE_UNAVAILABLE, detail="Auth service is unavailable")
except Exception as e:
logging.error(f"Auth service health check failed: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Auth service is unavailable")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Auth service is unavailable")
@router.post("/signup")
@@ -49,7 +62,7 @@ async def signup(request: UserSignUpRequest):
auto_login=request.auto_login)
success_message = "🎉 User account registered successfully! Please start experiencing the AI assistant service."
return JSONResponse(status_code=HTTPStatus.OK,
- content={"message":success_message, "data":user_data})
+ content={"message": success_message, "data": user_data})
except NoInviteCodeException as e:
logging.error(f"User registration failed by invite code: {str(e)}")
raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
@@ -58,18 +71,28 @@ async def signup(request: UserSignUpRequest):
logging.error(f"User registration failed by invite code: {str(e)}")
raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
detail="INVITE_CODE_INVALID")
+ except ValidationError as e:
+ detail = str(e)
+ if detail == ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL:
+ logging.warning(
+ "User registration rejected: asset owner invite requires OAuth")
+ else:
+ logging.warning(
+ f"User registration rejected by validation: {detail}")
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=detail)
except UserRegistrationException as e:
- logging.error(f"User registration failed by registration service: {str(e)}")
+ logging.error(
+ f"User registration failed by registration service: {str(e)}")
raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
detail="REGISTRATION_SERVICE_ERROR")
- except AuthApiError as e:
- logging.error(f"User registration failed by email already exists: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.CONFLICT,
- detail="EMAIL_ALREADY_EXISTS")
except AuthWeakPasswordError as e:
logging.error(f"User registration failed by weak password: {str(e)}")
- raise HTTPException(status_code=HTTPStatus.NOT_ACCEPTABLE,
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
detail="WEAK_PASSWORD")
+ except AuthApiError as e:
+ logging.error(f"User registration failed by auth error: {str(e)}")
+ raise HTTPException(status_code=HTTPStatus.CONFLICT,
+ detail="EMAIL_ALREADY_EXISTS")
except Exception as e:
logging.error(f"User registration failed, unknown error: {str(e)}")
raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
@@ -81,13 +104,16 @@ async def signin(request: UserSignInRequest):
"""User login"""
try:
signin_content = await signin_user(email=request.email,
- password=request.password)
+ password=request.password)
return JSONResponse(status_code=HTTPStatus.OK,
content=signin_content)
except AuthApiError as e:
logging.error(f"User login failed: {str(e)}")
raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED,
detail="Email or password error")
+ except ValidationError as e:
+ logging.warning(f"User login rejected by feature flag: {str(e)}")
+ raise HTTPException(status_code=HTTPStatus.BAD_REQUEST, detail=str(e))
except Exception as e:
logging.error(f"User login failed, unknown error: {str(e)}")
raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
@@ -108,7 +134,7 @@ async def user_refresh_token(request: Request):
raise ValueError("No refresh token provided")
session_info = await refresh_user_token(authorization, refresh_token)
return JSONResponse(status_code=HTTPStatus.OK,
- content={"message":"Token refresh successful", "data":{"session": session_info}})
+ content={"message": "Token refresh successful", "data": {"session": session_info}})
except ValueError as e:
logging.error(f"Refresh token failed: {str(e)}")
raise HTTPException(status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
@@ -125,7 +151,18 @@ async def logout(request: Request):
authorization = request.headers.get("Authorization")
try:
# Make logout idempotent: if no token or token expired, still return success
+ session_id = None
+ cas_logout_url = ""
if authorization:
+ session_id = extract_session_id_from_authorization(authorization)
+ if session_id:
+ from database.cas_session_db import revoke_cas_session_by_session_id
+
+ revoke_cas_session_by_session_id(session_id, actor="user")
+ try:
+ cas_logout_url = build_logout_url()
+ except CasAuthenticationError as cas_err:
+ logging.warning(f"CAS logout URL is unavailable: {str(cas_err)}")
client = get_authorized_client(authorization)
try:
client.auth.sign_out()
@@ -134,7 +171,12 @@ async def logout(request: Request):
logging.warning(
f"Sign out encountered an error but will be ignored: {str(signout_err)}")
return JSONResponse(status_code=HTTPStatus.OK,
- content={"message":"Logout successful"})
+ content={
+ "message": "Logout successful",
+ "data": {
+ "cas_logout_url": cas_logout_url
+ }
+ })
except Exception as e:
logging.error(f"User logout failed: {str(e)}")
@@ -154,8 +196,8 @@ async def get_session(request: Request):
try:
data = await get_session_by_authorization(authorization)
return JSONResponse(status_code=HTTPStatus.OK,
- content={"message": "Session is valid",
- "data": data})
+ content={"message": "Session is valid",
+ "data": data})
except UnauthorizedError as e:
logging.error(f"Get user session unauthorized: {str(e)}")
raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED,
@@ -189,6 +231,10 @@ async def get_user_information(request: Request):
if not user_info:
raise UnauthorizedError("User information not found")
+ user_info["user"]["auth_provider"] = (
+ "cas" if extract_session_id_from_authorization(authorization) else "local"
+ )
+
return JSONResponse(status_code=HTTPStatus.OK,
content={"message": "Success",
"data": user_info})
@@ -276,6 +322,7 @@ async def revoke_user_account(request: Request):
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="User revoke failed")
+
@router.post("/tokens")
async def create_token_endpoint(
authorization: Optional[str] = Header(None)
@@ -379,3 +426,49 @@ async def delete_token_endpoint(
logging.error(f"Failed to delete token: {str(e)}", exc_info=e)
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Internal Server Error")
+
+
+@router.put("/password")
+async def update_password_endpoint(
+ request: UpdatePasswordRequest,
+ authorization: Optional[str] = Header(None)
+):
+ """Update current user's password.
+
+ This endpoint requires the user to provide their current password for verification
+ before setting a new password.
+ """
+ try:
+ if not authorization:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED,
+ detail="Unauthorized: No authorization token provided")
+
+ user_id, _ = get_current_user_id(authorization)
+ if not user_id:
+ raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED,
+ detail="Unauthorized: missing user_id in JWT token")
+
+ await update_password(
+ user_id=str(user_id),
+ old_password=request.old_password,
+ new_password=request.new_password
+ )
+
+ logger.info(f"Password updated successfully for user {user_id}")
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"message": "Password updated successfully"}
+ )
+
+ except UnauthorizedError as e:
+ logger.warning(f"Password update unauthorized for user: {str(e)}")
+ raise AppException(ErrorCode.PROFILE_INVALID_CREDENTIALS, str(e))
+ except AppException as e:
+ logger.warning(
+ f"Password update business error: {e.error_code} - {str(e)}")
+ raise e # Let app_exception_handler format the response
+ except Exception as e:
+ logging.error(f"Failed to update password: {str(e)}", exc_info=e)
+ raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail="Internal Server Error")
diff --git a/backend/apps/vectordatabase_app.py b/backend/apps/vectordatabase_app.py
index 872b5387b..505c39559 100644
--- a/backend/apps/vectordatabase_app.py
+++ b/backend/apps/vectordatabase_app.py
@@ -1,29 +1,51 @@
import logging
import json
from http import HTTPStatus
-from typing import Any, Dict, List, Optional
+from typing import Annotated, Any, Dict, List, Optional
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
from fastapi.responses import JSONResponse
import re
+from consts.const import ASSET_OWNER_TENANT_ID, PERMISSION_READ
from consts.model import ChunkCreateRequest, ChunkUpdateRequest, HybridSearchRequest, IndexingResponse
+from consts.scheduler import VALID_SUMMARY_FREQUENCIES, SUMMARY_FREQUENCY_OPTIONS_FOR_API
from nexent.vector_database.base import VectorDatabaseCore
from services.vectordatabase_service import (
ElasticSearchService,
- get_embedding_model,
+ get_embedding_model_by_id,
get_vector_db_core,
check_knowledge_base_exist_impl,
+ KnowledgeBaseNeedsModelConfigError,
)
+from services.file_management_service import check_file_access
from services.redis_service import get_redis_service
from utils.auth_utils import get_current_user_id
from utils.file_management_utils import get_all_files_status
from database.knowledge_db import get_index_name_by_knowledge_name, get_knowledge_record
+from database.model_management_db import get_model_by_model_id
router = APIRouter(prefix="/indices")
service = ElasticSearchService()
logger = logging.getLogger("vectordatabase_app")
+INTERNAL_INDEX_NAME_DESC = "Internal index_name from knowledge_record_t"
+
+
+@router.get("/summary_frequency_options")
+async def get_summary_frequency_options():
+ """
+ Get valid summary frequency options for frontend.
+ Frontend should call this API to get the list of valid frequencies.
+ """
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "options": SUMMARY_FREQUENCY_OPTIONS_FOR_API,
+ "valid_values": VALID_SUMMARY_FREQUENCIES,
+ }
+ )
+
@router.post("/check_exist")
async def check_knowledge_base_exist(
@@ -54,7 +76,7 @@ def create_new_index(
embedding_dim: Optional[int] = Query(
None, description="Dimension of the embedding vectors"),
request: Dict[str, Any] = Body(
- None, description="Request body with optional fields (ingroup_permission, group_ids, embedding_model_name)"),
+ None, description="Request body with optional fields (ingroup_permission, group_ids, embedding_model_name, preserve_source_file)"),
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
authorization: Optional[str] = Header(None)
):
@@ -65,11 +87,15 @@ def create_new_index(
# Extract optional fields from request body
ingroup_permission = None
group_ids = None
- embedding_model_name = None
+ embedding_model_name: Optional[str] = None
+ is_multimodal: Optional[bool] = None
+ preserve_source_file: Optional[bool] = None
if request:
ingroup_permission = request.get("ingroup_permission")
group_ids = request.get("group_ids")
- embedding_model_name = request.get("embedding_model_name")
+ embedding_model_name = request.get("embeddingModel")
+ is_multimodal = request.get("is_multimodal")
+ preserve_source_file = request.get("preserve_source_file")
# Treat path parameter as user-facing knowledge base name for new creations
return ElasticSearchService.create_knowledge_base(
@@ -81,6 +107,8 @@ def create_new_index(
ingroup_permission=ingroup_permission,
group_ids=group_ids,
embedding_model_name=embedding_model_name,
+ is_multimodal=is_multimodal,
+ preserve_source_file=preserve_source_file,
)
except Exception as e:
raise HTTPException(
@@ -160,6 +188,222 @@ async def update_index(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating index: {str(exc)}")
+@router.patch("/{index_name}/summary_frequency")
+async def update_summary_frequency_endpoint(
+ index_name: Annotated[str, Path(..., description="Name of the index to update")],
+ request: Annotated[Dict[str, Any], Body(..., description="Update payload with summary_frequency")],
+ authorization: Annotated[Optional[str], Header()] = None,
+):
+ """Update the auto-summary frequency for a knowledge base."""
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+ summary_frequency = request.get("summary_frequency")
+
+ valid_frequencies = VALID_SUMMARY_FREQUENCIES
+ if summary_frequency not in valid_frequencies:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail=f"Invalid summary_frequency. Must be one of: {valid_frequencies}"
+ )
+
+ from database.knowledge_db import update_summary_frequency
+ success = update_summary_frequency(
+ index_name=index_name,
+ summary_frequency=summary_frequency,
+ _tenant_id=tenant_id,
+ user_id=user_id
+ )
+
+ if success:
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={
+ "message": "Summary frequency updated successfully", "status": "success"}
+ )
+ else:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=f"Knowledge base '{index_name}' not found"
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.exception("Error updating summary frequency")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error updating summary frequency: {str(exc)}"
+ )
+
+
+@router.get("/{index_name}/embedding-model-status")
+def get_embedding_model_status(
+ index_name: str = Path(..., description="Name of the index to check"),
+ authorization: Optional[str] = Header(None)
+):
+ """
+ Check the embedding model status of a knowledge base.
+ Returns information about whether a model is configured and if an update is needed.
+
+ This endpoint is used by the frontend to determine whether to show
+ a dialog prompting the user to select an embedding model for knowledge bases
+ that were created before the model ID feature was added.
+
+ Note: The path parameter is the internal index_name.
+ """
+ try:
+ _, tenant_id = get_current_user_id(authorization)
+
+ # Get the knowledge base record by index_name
+ knowledge_record = get_knowledge_record({
+ "index_name": index_name,
+ "tenant_id": tenant_id,
+ "include_asset_owner_assets": True,
+ })
+
+ if not knowledge_record:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=f"Knowledge base '{index_name}' not found"
+ )
+
+ # Check if model_id exists
+ model_id = knowledge_record.get("embedding_model_id")
+ embedding_model_name = knowledge_record.get("embedding_model_name")
+
+ # Get model info if model_id exists
+ model_info = None
+ if model_id:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model:
+ model_info = {
+ "model_id": model.get("model_id"),
+ "model_name": model.get("model_name"),
+ "display_name": model.get("display_name"),
+ "model_type": model.get("model_type"),
+ }
+
+ # Determine status
+ if model_id and model_info:
+ status = "configured"
+ message = f"Embedding model '{model_info.get('display_name', model_info.get('model_name'))}' is configured"
+ needs_config = False
+ elif embedding_model_name:
+ # Has model name but no model_id (legacy data)
+ status = "legacy"
+ message = "This knowledge base was created with an older version. Please select an embedding model to ensure proper functionality."
+ needs_config = True
+ else:
+ # No model configured at all
+ status = "missing"
+ message = "No embedding model configured. Please select an embedding model."
+ needs_config = True
+
+ # Get actual internal index_name from the database record
+ actual_index_name = knowledge_record.get("index_name")
+
+ return {
+ "status": status,
+ "needs_config": needs_config,
+ "index_name": actual_index_name,
+ "knowledge_name": knowledge_record.get("knowledge_name"),
+ "model_id": model_id,
+ "embedding_model_name": embedding_model_name,
+ "model_info": model_info,
+ "message": message,
+ }
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(
+ f"Error getting embedding model status for '{index_name}': {e}", exc_info=True)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=f"Error checking embedding model status: {str(e)}"
+ )
+
+
+@router.put("/{index_name}/embedding-model")
+def update_embedding_model(
+ index_name: str = Path(
+ ..., description="Internal index name of the knowledge base to update"),
+ request: Dict[str, Any] = Body(...,
+ description="Update payload with model_id"),
+ authorization: Optional[str] = Header(None)
+):
+ """
+ Update the embedding model for a knowledge base.
+ This is used when a user selects an embedding model from the dialog
+ for knowledge bases that don't have a model configured.
+ """
+ try:
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ model_id = request.get("model_id")
+ if not model_id:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST,
+ detail="model_id is required"
+ )
+
+ result = ElasticSearchService.update_embedding_model(
+ index_name=index_name,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content=result
+ )
+
+ except ValueError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail=str(exc)
+ )
+ except HTTPException:
+ raise
+ except Exception as exc:
+ logger.error(
+ f"Error updating embedding model for '{index_name}': {exc}", exc_info=True)
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=f"Error updating embedding model: {str(exc)}"
+ )
+
+
+def _apply_read_only_to_asset_indices_info(asset_result: Dict[str, Any]) -> Dict[str, Any]:
+ """Force READ_ONLY permission on asset-owner indices_info before merge."""
+ indices_info = asset_result.get("indices_info")
+ if not indices_info:
+ return asset_result
+ normalized = dict(asset_result)
+ normalized["indices_info"] = [
+ {**info, "permission": PERMISSION_READ} for info in indices_info
+ ]
+ return normalized
+
+
+def _merge_list_indices_results(
+ primary: Dict[str, Any],
+ asset_owner: Dict[str, Any],
+) -> Dict[str, Any]:
+ """Merge tenant and ASSET_OWNER list_indices responses (concat, no dedup)."""
+ merged_indices = primary.get("indices", []) + \
+ asset_owner.get("indices", [])
+ merged: Dict[str, Any] = {
+ "indices": merged_indices,
+ "count": len(merged_indices),
+ }
+ if "indices_info" in primary or "indices_info" in asset_owner:
+ merged["indices_info"] = (
+ primary.get("indices_info", []) +
+ asset_owner.get("indices_info", [])
+ )
+ return merged
+
+
@router.get("")
def get_list_indices(
pattern: str = Query("*", description="Pattern to match index names"),
@@ -173,9 +417,21 @@ def get_list_indices(
"""List all user indices with optional stats"""
try:
user_id, auth_tenant_id = get_current_user_id(authorization)
- # Use explicit tenant_id if provided, otherwise fall back to auth tenant_id
- effective_tenant_id = tenant_id or auth_tenant_id
- return ElasticSearchService.list_indices(pattern, include_stats, effective_tenant_id, user_id, vdb_core)
+ if tenant_id is None:
+ result = ElasticSearchService.list_indices(
+ pattern, include_stats, auth_tenant_id, user_id, vdb_core
+ )
+ if auth_tenant_id != ASSET_OWNER_TENANT_ID:
+ asset_result = ElasticSearchService.list_indices(
+ pattern, include_stats, ASSET_OWNER_TENANT_ID, user_id, vdb_core
+ )
+ asset_result = _apply_read_only_to_asset_indices_info(
+ asset_result)
+ return _merge_list_indices_results(result, asset_result)
+ return result
+ return ElasticSearchService.list_indices(
+ pattern, include_stats, tenant_id, user_id, vdb_core
+ )
except Exception as e:
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error get index: {str(e)}")
@@ -191,6 +447,8 @@ def create_index_documents(
authorization: Optional[str] = Header(None),
task_id: Optional[str] = Header(
None, alias="X-Task-Id", description="Task ID for progress tracking"),
+ large_mode: bool = Query(
+ False, description="Force large-batch path when current request chunk count is below threshold"),
):
"""
Index documents with embeddings, creating the index if it doesn't exist.
@@ -198,22 +456,26 @@ def create_index_documents(
"""
try:
user_id, tenant_id = get_current_user_id(authorization)
-
+
# Get the knowledge base record to retrieve the saved embedding model
knowledge_record = get_knowledge_record({'index_name': index_name})
- saved_embedding_model_name = None
+ saved_embedding_model_id = None
if knowledge_record:
- saved_embedding_model_name = knowledge_record.get('embedding_model_name')
-
- # Use the saved model from knowledge base, fallback to tenant default if not set
- embedding_model = get_embedding_model(tenant_id, saved_embedding_model_name)
-
+ saved_embedding_model_id = knowledge_record.get(
+ 'embedding_model_id')
+
+ # Use the saved model from knowledge base by model_id
+ embedding_model, _ = get_embedding_model_by_id(
+ tenant_id, saved_embedding_model_id) if saved_embedding_model_id else (None, None)
+
return ElasticSearchService.index_documents(
embedding_model=embedding_model,
index_name=index_name,
data=data,
vdb_core=vdb_core,
task_id=task_id,
+ large_mode=large_mode,
+ model_id=saved_embedding_model_id,
)
except Exception as e:
error_msg = str(e)
@@ -246,54 +508,70 @@ async def get_index_files(
@router.delete("/{index_name}/documents")
-def delete_documents(
+async def delete_documents(
index_name: str = Path(..., description="Name of the index"),
path_or_url: str = Query(...,
description="Path or URL of documents to delete"),
+ scope: str = Query(
+ "full",
+ description=(
+ "source_only: delete MinIO source only, keep ES chunks/vectors; "
+ "full: delete ES documents, MinIO source, and Redis task records"
+ ),
+ ),
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
):
- """Delete documents by path or URL and clean up related Redis records"""
+ """Delete a document by scope: source file only or full removal from the index."""
try:
- # First delete the documents using existing service
- result = ElasticSearchService.delete_documents(
- index_name, path_or_url, vdb_core)
-
- # Then clean up Redis records related to this specific document
- try:
- redis_service = get_redis_service()
- redis_cleanup_result = redis_service.delete_document_records(
- index_name, path_or_url)
-
- # Add Redis cleanup info to the result
- result["redis_cleanup"] = redis_cleanup_result
-
- # Update the message to include Redis cleanup info
- original_message = result.get(
- "message", "Documents deleted successfully")
- result["message"] = (
- f"{original_message}. "
- f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records "
- f"({redis_cleanup_result['celery_tasks_deleted']} tasks, "
- f"{redis_cleanup_result['cache_keys_deleted']} cache keys)."
- )
-
- if redis_cleanup_result.get("errors"):
- result["redis_warnings"] = redis_cleanup_result["errors"]
+ result = await ElasticSearchService.delete_document_by_scope(
+ index_name, path_or_url, scope, vdb_core
+ )
- except Exception as redis_error:
- logger.warning(
- f"Redis cleanup failed for document {path_or_url} in index {index_name}: {str(redis_error)}")
- result["redis_cleanup_error"] = str(redis_error)
- original_message = result.get(
- "message", "Documents deleted successfully")
- result[
- "message"] = f"{original_message}, but Redis cleanup encountered an error: {str(redis_error)}"
+ if scope == "full":
+ try:
+ redis_service = get_redis_service()
+ redis_cleanup_result = redis_service.delete_document_records(
+ index_name, path_or_url
+ )
+ result["redis_cleanup"] = redis_cleanup_result
+ original_message = result.get(
+ "message", "Documents deleted successfully"
+ )
+ result["message"] = (
+ f"{original_message}. "
+ f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records "
+ f"({redis_cleanup_result['celery_tasks_deleted']} tasks, "
+ f"{redis_cleanup_result['cache_keys_deleted']} cache keys)."
+ )
+ if redis_cleanup_result.get("errors"):
+ result["redis_warnings"] = redis_cleanup_result["errors"]
+ except Exception as redis_error:
+ logger.warning(
+ "Redis cleanup failed for document %s in index %s: %s",
+ path_or_url,
+ index_name,
+ redis_error,
+ )
+ result["redis_cleanup_error"] = str(redis_error)
+ original_message = result.get(
+ "message", "Documents deleted successfully"
+ )
+ result["message"] = (
+ f"{original_message}, but Redis cleanup encountered an error: "
+ f"{str(redis_error)}"
+ )
return result
+ except ValueError as exc:
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST, detail=str(exc)
+ )
except Exception as e:
raise HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error delete indexing documents: {e}")
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+ detail=f"Error delete indexing documents: {e}",
+ )
@router.get("/{index_name}/documents/{path_or_url:path}/error-info")
@@ -364,13 +642,14 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)):
# Try to list indices as a health check
return ElasticSearchService.health_check(vdb_core)
except Exception as e:
- raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}")
+ raise HTTPException(
+ status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}")
@router.post("/{index_name}/chunks")
def get_index_chunks(
index_name: str = Path(...,
- description="Name of the index (or knowledge_name) to get chunks from"),
+ description=INTERNAL_INDEX_NAME_DESC),
page: int = Query(
None, description="Page number (1-based) for pagination"),
page_size: int = Query(
@@ -382,12 +661,18 @@ def get_index_chunks(
):
"""Get chunks from the specified index, with optional pagination support"""
try:
- _, tenant_id = get_current_user_id(authorization)
- actual_index_name = get_index_name_by_knowledge_name(
- index_name, tenant_id)
+ user_id, tenant_id = get_current_user_id(authorization)
+
+ if path_or_url is not None and not check_file_access(
+ path_or_url, user_id, tenant_id
+ ):
+ raise HTTPException(
+ status_code=HTTPStatus.FORBIDDEN,
+ detail="You don't have permission to access this file",
+ )
result = ElasticSearchService.get_index_chunks(
- index_name=actual_index_name,
+ index_name=index_name,
page=page,
page_size=page_size,
path_or_url=path_or_url,
@@ -401,8 +686,6 @@ def get_index_chunks(
)
except Exception as e:
error_msg = str(e)
- logger.error(
- f"Error getting chunks for index '{index_name}': {error_msg}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}")
@@ -410,7 +693,7 @@ def get_index_chunks(
@router.post("/{index_name}/chunk")
def create_chunk(
index_name: str = Path(...,
- description="Name of the index (or knowledge_name)"),
+ description=INTERNAL_INDEX_NAME_DESC),
payload: ChunkCreateRequest = Body(..., description="Chunk data"),
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
authorization: Optional[str] = Header(None),
@@ -418,10 +701,8 @@ def create_chunk(
"""Create a manual chunk."""
try:
user_id, tenant_id = get_current_user_id(authorization)
- actual_index_name = get_index_name_by_knowledge_name(
- index_name, tenant_id)
result = ElasticSearchService.create_chunk(
- index_name=actual_index_name,
+ index_name=index_name,
chunk_request=payload,
vdb_core=vdb_core,
user_id=user_id,
@@ -445,7 +726,7 @@ def create_chunk(
@router.put("/{index_name}/chunk/{chunk_id}")
def update_chunk(
index_name: str = Path(...,
- description="Name of the index (or knowledge_name)"),
+ description=INTERNAL_INDEX_NAME_DESC),
chunk_id: str = Path(..., description="Chunk identifier"),
payload: ChunkUpdateRequest = Body(...,
description="Chunk update payload"),
@@ -455,14 +736,13 @@ def update_chunk(
"""Update an existing chunk."""
try:
user_id, tenant_id = get_current_user_id(authorization)
- actual_index_name = get_index_name_by_knowledge_name(
- index_name, tenant_id)
result = ElasticSearchService.update_chunk(
- index_name=actual_index_name,
+ index_name=index_name,
chunk_id=chunk_id,
chunk_request=payload,
vdb_core=vdb_core,
user_id=user_id,
+ tenant_id=tenant_id,
)
return JSONResponse(status_code=HTTPStatus.OK, content=result)
except ValueError as e:
@@ -486,18 +766,16 @@ def update_chunk(
@router.delete("/{index_name}/chunk/{chunk_id}")
def delete_chunk(
index_name: str = Path(...,
- description="Name of the index (or knowledge_name)"),
+ description=INTERNAL_INDEX_NAME_DESC),
chunk_id: str = Path(..., description="Chunk identifier"),
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
authorization: Optional[str] = Header(None),
):
"""Delete a chunk."""
try:
- _, tenant_id = get_current_user_id(authorization)
- actual_index_name = get_index_name_by_knowledge_name(
- index_name, tenant_id)
+ get_current_user_id(authorization)
result = ElasticSearchService.delete_chunk(
- index_name=actual_index_name,
+ index_name=index_name,
chunk_id=chunk_id,
vdb_core=vdb_core,
)
@@ -529,8 +807,17 @@ async def hybrid_search(
"""Run a hybrid (accurate + semantic) search across indices."""
try:
_, tenant_id = get_current_user_id(authorization)
+ resolved_index_names: List[str] = []
+ for requested_name in payload.index_names:
+ try:
+ resolved_name = get_index_name_by_knowledge_name(
+ requested_name, tenant_id
+ )
+ except Exception:
+ resolved_name = requested_name
+ resolved_index_names.append(resolved_name)
result = ElasticSearchService.search_hybrid(
- index_names=payload.index_names,
+ index_names=resolved_index_names,
query=payload.query,
tenant_id=tenant_id,
top_k=payload.top_k,
@@ -538,9 +825,20 @@ async def hybrid_search(
vdb_core=vdb_core,
)
return JSONResponse(status_code=HTTPStatus.OK, content=result)
+ except KnowledgeBaseNeedsModelConfigError as exc:
+ # Return a specific error that frontend can detect to show the config dialog
+ raise HTTPException(
+ status_code=HTTPStatus.CONFLICT,
+ detail={
+ "error_type": "KNOWLEDGE_BASE_NEEDS_MODEL_CONFIG",
+ "index_name": exc.index_name,
+ "message": exc.message,
+ "suggestion": "Please select an embedding model for this knowledge base before searching."
+ }
+ )
except ValueError as exc:
- raise HTTPException(status_code=HTTPStatus.BAD_REQUEST,
- detail=str(exc))
+ raise HTTPException(
+ status_code=HTTPStatus.BAD_REQUEST, detail=str(exc))
except Exception as exc:
logger.error(f"Hybrid search failed: {exc}", exc_info=True)
raise HTTPException(
diff --git a/backend/apps/voice_app.py b/backend/apps/voice_app.py
index 8f517cd07..cc1b37e87 100644
--- a/backend/apps/voice_app.py
+++ b/backend/apps/voice_app.py
@@ -2,14 +2,14 @@
import logging
from http import HTTPStatus
-from fastapi import APIRouter, WebSocket, HTTPException, Body, Query
+from fastapi import APIRouter, WebSocket, HTTPException
from fastapi.responses import JSONResponse
from consts.exceptions import (
VoiceServiceException,
STTConnectionException,
TTSConnectionException,
- VoiceConfigException
+ VoiceConfigException,
)
from consts.model import VoiceConnectivityRequest, VoiceConnectivityResponse
from services.voice_service import get_voice_service
@@ -26,10 +26,29 @@ async def stt_websocket(websocket: WebSocket):
logger.info("STT WebSocket connection attempt...")
await websocket.accept()
logger.info("STT WebSocket connection accepted")
-
+
+ # Receive config from client
+ client_config = {}
+ try:
+ msg = await websocket.receive()
+ if msg["type"] == "websocket.receive":
+ import json
+ client_config = json.loads(msg["text"])
+ logger.info(f"Received client config: {client_config}")
+ elif msg["type"] == "bytes":
+ try:
+ import json
+ client_config = json.loads(msg["bytes"].decode('utf-8'))
+ logger.info(f"Received client config from bytes: {client_config}")
+ except Exception as e:
+ logger.warning(f"Failed to parse bytes as JSON: {e}")
+ except Exception as e:
+ logger.error(f"Error receiving config: {e}")
+ client_config = {}
+
try:
voice_service = get_voice_service()
- await voice_service.start_stt_streaming_session(websocket)
+ await voice_service.start_stt_streaming_session(websocket, stt_config=client_config)
except STTConnectionException as e:
logger.error(f"STT WebSocket error: {str(e)}")
await websocket.send_json({"error": str(e)})
@@ -48,18 +67,60 @@ async def tts_websocket(websocket: WebSocket):
logger.info("TTS WebSocket connection accepted")
try:
- # Receive text from client (single request)
- data = await websocket.receive_json()
- text = data.get("text")
+ # Receive config and text from client
+ msg = await websocket.receive()
+ client_config = {}
+ text = None
+
+ if msg["type"] == "websocket.receive":
+ if "text" in msg:
+ import json
+ client_config = json.loads(msg["text"])
+ text = client_config.get("text")
+ elif "bytes" in msg:
+ try:
+ import json
+ client_config = json.loads(msg["bytes"].decode('utf-8'))
+ text = client_config.get("text")
+ except Exception as e:
+ logger.warning(f"Failed to parse bytes as JSON: {e}")
if not text:
if websocket.client_state.name == "CONNECTED":
await websocket.send_json({"error": "No text provided"})
return
+ # Extract config from client
+ tenant_id = client_config.get("tenant_id")
+ model_factory = client_config.get("model_factory")
+ model_name = client_config.get("model_name")
+ api_key = client_config.get("api_key")
+ model_appid = client_config.get("model_appid")
+ access_token = client_config.get("access_token")
+ base_url = client_config.get("base_url")
+
+ logger.info(f"TTS request - model_name: {model_name}, model_factory: {model_factory}, "
+ f"has_api_key: {bool(api_key)}")
+
+ # Build tts_config dict for voice service
+ tts_config = {
+ "model_factory": model_factory,
+ "api_key": api_key,
+ "model_appid": model_appid,
+ "access_token": access_token,
+ "base_url": base_url,
+ "model_name": model_name,
+ }
+
# Stream TTS audio to WebSocket
voice_service = get_voice_service()
- await voice_service.stream_tts_to_websocket(websocket, text)
+ await voice_service.stream_tts_to_websocket(
+ websocket,
+ text,
+ tenant_id=tenant_id,
+ model_name=model_name,
+ tts_config=tts_config
+ )
except TTSConnectionException as e:
logger.error(f"TTS WebSocket error: {str(e)}")
@@ -78,17 +139,17 @@ async def tts_websocket(websocket: WebSocket):
async def check_voice_connectivity(request: VoiceConnectivityRequest):
"""
Check voice service connectivity
-
+
Args:
request: VoiceConnectivityRequest containing model_type
-
+
Returns:
VoiceConnectivityResponse with connectivity status
"""
try:
voice_service = get_voice_service()
connected = await voice_service.check_voice_connectivity(request.model_type)
-
+
return JSONResponse(
status_code=HTTPStatus.OK,
content=VoiceConnectivityResponse(
diff --git a/backend/assets/test_voice.pcm b/backend/assets/test_voice.pcm
new file mode 100644
index 000000000..0a78f9a15
Binary files /dev/null and b/backend/assets/test_voice.pcm differ
diff --git a/backend/consts/agent_unavailable_reasons.py b/backend/consts/agent_unavailable_reasons.py
new file mode 100644
index 000000000..4e710ee7d
--- /dev/null
+++ b/backend/consts/agent_unavailable_reasons.py
@@ -0,0 +1,43 @@
+"""
+Agent Unavailable Reason Constants
+
+Centralized definition of all possible reasons why an agent may be unavailable.
+These values are returned to the frontend via the 'unavailable_reasons' field.
+"""
+
+
+class AgentUnavailableReason:
+ """Reason codes for agent unavailability."""
+
+ # Identity conflicts
+ DUPLICATE_NAME = "duplicate_name"
+ DUPLICATE_DISPLAY_NAME = "duplicate_display_name"
+
+ # Model issues
+ MODEL_NOT_CONFIGURED = "model_not_configured"
+ MODEL_UNAVAILABLE = "model_unavailable"
+
+ # Tool issues
+ TOOL_UNAVAILABLE = "tool_unavailable"
+ ALL_TOOLS_DISABLED = "all_tools_disabled"
+
+ # Agent issues
+ AGENT_NOT_FOUND = "agent_not_found"
+
+ @classmethod
+ def all_reasons(cls) -> list[str]:
+ """Return all defined unavailable reason codes."""
+ return [
+ cls.DUPLICATE_NAME,
+ cls.DUPLICATE_DISPLAY_NAME,
+ cls.MODEL_NOT_CONFIGURED,
+ cls.MODEL_UNAVAILABLE,
+ cls.TOOL_UNAVAILABLE,
+ cls.ALL_TOOLS_DISABLED,
+ cls.AGENT_NOT_FOUND,
+ ]
+
+ @classmethod
+ def is_valid_reason(cls, reason: str) -> bool:
+ """Check if a reason string is a valid reason code."""
+ return reason in cls.all_reasons()
diff --git a/backend/consts/const.py b/backend/consts/const.py
index bccb91ccd..574d550c0 100644
--- a/backend/consts/const.py
+++ b/backend/consts/const.py
@@ -7,9 +7,12 @@
load_dotenv(override=True)
# TODO: Analyze every variable if this is used
-# Test voice file path
+# Test voice file path (WAV format for volcengine STT)
TEST_VOICE_PATH = os.path.join(os.path.dirname(
os.path.dirname(__file__)), 'assets', 'test.wav')
+# Test PCM file path (raw PCM format for Ali STT)
+TEST_PCM_PATH = os.path.join(os.path.dirname(
+ os.path.dirname(__file__)), 'assets', 'test_voice.pcm')
# Vector database providers
@@ -28,6 +31,10 @@ class VectorDatabaseType(str, Enum):
# Data Processing Service Configuration
DATA_PROCESS_SERVICE = os.getenv("DATA_PROCESS_SERVICE")
CLIP_MODEL_PATH = os.getenv("CLIP_MODEL_PATH")
+TABLE_TRANSFORMER_MODEL_PATH = os.getenv("TABLE_TRANSFORMER_MODEL_PATH")
+UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH = os.getenv(
+ "UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH"
+)
# Upload Configuration
@@ -36,9 +43,16 @@ class VectorDatabaseType(str, Enum):
UPLOAD_FOLDER = os.getenv('UPLOAD_FOLDER', 'uploads')
ROOT_DIR = os.getenv("ROOT_DIR")
+PER_WAVE_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_PER_WAVE_S", "30"))
+MAX_TIMEOUT = int(os.getenv("DP_SPLIT_WAIT_TIMEOUT_MAX_S", "1800"))
+
+
# Container-internal skills storage path
CONTAINER_SKILLS_PATH = os.getenv("SKILLS_PATH")
+# Container-internal official skills ZIP directory
+OFFICIAL_SKILLS_ZIP_PATH = "/mnt/nexent/official-skills-zip"
+
# Preview Configuration
FILE_PREVIEW_SIZE_LIMIT = 100 * 1024 * 1024 # 100MB
@@ -66,7 +80,39 @@ class VectorDatabaseType(str, Enum):
SERVICE_ROLE_KEY = os.getenv('SERVICE_ROLE_KEY', SUPABASE_KEY)
# JWT secret for verifying Supabase-signed access tokens.
# GoTrue uses GOTRUE_JWT_SECRET (= JWT_SECRET in docker setup) to sign tokens.
-SUPABASE_JWT_SECRET = os.getenv('SUPABASE_JWT_SECRET') or os.getenv('JWT_SECRET', '')
+SUPABASE_JWT_SECRET = os.getenv(
+ 'SUPABASE_JWT_SECRET') or os.getenv('JWT_SECRET', '')
+
+
+# OAuth Configuration
+OAUTH_CALLBACK_BASE_URL = os.getenv("OAUTH_CALLBACK_BASE_URL", "")
+OAUTH_SSL_VERIFY = os.getenv("OAUTH_SSL_VERIFY", "true").lower() == "true"
+OAUTH_CA_BUNDLE = os.getenv("OAUTH_CA_BUNDLE", "")
+
+
+# CAS SSO Configuration
+CAS_ENABLED = os.getenv("CAS_ENABLED", "false").lower() in ("true", "1", "yes", "on")
+CAS_SERVER_URL = os.getenv("CAS_SERVER_URL", "").rstrip("/")
+CAS_VALIDATE_PATH = os.getenv("CAS_VALIDATE_PATH", "/p3/serviceValidate")
+CAS_CALLBACK_BASE_URL = os.getenv("CAS_CALLBACK_BASE_URL", OAUTH_CALLBACK_BASE_URL).rstrip("/")
+# CAS login mode:
+# - disabled: disable CAS login entry and automatic CAS redirects.
+# - button: show CAS as an optional login entry.
+# - force: automatically redirect unauthenticated users to CAS login.
+CAS_LOGIN_MODE = os.getenv("CAS_LOGIN_MODE", "disabled").lower()
+CAS_USER_ATTRIBUTE = os.getenv("CAS_USER_ATTRIBUTE", "")
+CAS_EMAIL_ATTRIBUTE = os.getenv("CAS_EMAIL_ATTRIBUTE", "email")
+CAS_ROLE_ATTRIBUTE = os.getenv("CAS_ROLE_ATTRIBUTE", "role")
+CAS_TENANT_ATTRIBUTE = os.getenv("CAS_TENANT_ATTRIBUTE", "tenant_id")
+CAS_ROLE_MAP_JSON = os.getenv("CAS_ROLE_MAP_JSON", "")
+CAS_SESSION_MAX_AGE_SECONDS = int(os.getenv("CAS_SESSION_MAX_AGE_SECONDS", "3600") or 3600)
+LOCAL_SESSION_MAX_AGE_SECONDS = int(os.getenv("LOCAL_SESSION_MAX_AGE_SECONDS", "3600") or 3600)
+CAS_RENEW_BEFORE_SECONDS = int(os.getenv("CAS_RENEW_BEFORE_SECONDS", "300") or 300)
+CAS_RENEW_TIMEOUT_SECONDS = int(os.getenv("CAS_RENEW_TIMEOUT_SECONDS", "10") or 10)
+CAS_SYNTHETIC_EMAIL_DOMAIN = os.getenv("CAS_SYNTHETIC_EMAIL_DOMAIN", "cas.local")
+CAS_LOGOUT_URL = os.getenv("CAS_LOGOUT_URL", "")
+CAS_SSL_VERIFY = os.getenv("CAS_SSL_VERIFY", "true").lower() == "true"
+CAS_CA_BUNDLE = os.getenv("CAS_CA_BUNDLE", "")
# ===== To be migrated to frontend configuration =====
@@ -91,15 +137,37 @@ class VectorDatabaseType(str, Enum):
DEFAULT_USER_ID = "user_id"
DEFAULT_TENANT_ID = "tenant_id"
+# Invitation code type for asset administrator registration
+ASSET_OWNER_INVITE_CODE_TYPE = "ASSET_OWNER_INVITE"
+
+# User role identifier for asset administrators
+ASSET_OWNER_ROLE = "ASSET_OWNER"
+
+# Tenant ID for asset administrators (virtual tenant, not a real tenant)
+ASSET_OWNER_TENANT_ID = "asset_owner_tenant_id"
+
+# MinIO prefix for ASSET_OWNER-scoped attachment uploads (attachments/asset_owner/{user_id}/...)
+ASSET_OWNER_ATTACHMENTS_PREFIX = "attachments/asset_owner"
+
+# When false, block ASSET_OWNER invites, registrations, and sign-in.
+ENABLE_ASSET_OWNER_ROLE = os.getenv(
+ "ENABLE_ASSET_OWNER_ROLE", "false").lower() == "true"
+
+# HTTP detail key: asset owner must register via OAuth, not email/password signup.
+ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL = "ASSET_OWNER_USE_OAUTH"
+
# Roles that can edit all resources within a tenant (permission = EDIT).
# Keep this centralized to avoid drifting role logic across modules.
-CAN_EDIT_ALL_USER_ROLES = {"SU", "ADMIN", "SPEED"}
+CAN_EDIT_ALL_USER_ROLES = {"SU", "ADMIN", "SPEED", "ASSET_OWNER"}
# Permission constants used by list endpoints (e.g., /agent/list, /mcp/list).
PERMISSION_READ = "READ_ONLY"
PERMISSION_EDIT = "EDIT"
PERMISSION_PRIVATE = "PRIVATE"
+# Response flag when system prompts are withheld from non-ASSET_OWNER callers.
+AGENT_PROMPTS_HIDDEN_FLAG = "prompts_hidden"
+
# Deployment Version Configuration
DEPLOYMENT_VERSION = os.getenv("DEPLOYMENT_VERSION", "speed")
@@ -115,6 +183,7 @@ class VectorDatabaseType(str, Enum):
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")
MINIO_REGION = os.getenv("MINIO_REGION")
MINIO_DEFAULT_BUCKET = os.getenv("MINIO_DEFAULT_BUCKET")
+S3_URL_PREFIX = "s3://"
# Postgres Configuration
@@ -143,7 +212,7 @@ class VectorDatabaseType(str, Enum):
RAY_ACTOR_NUM_CPUS = int(os.getenv("RAY_ACTOR_NUM_CPUS", "2"))
RAY_DASHBOARD_PORT = int(os.getenv("RAY_DASHBOARD_PORT", "8265"))
RAY_DASHBOARD_HOST = os.getenv("RAY_DASHBOARD_HOST", "0.0.0.0")
-RAY_NUM_CPUS = os.getenv("RAY_NUM_CPUS")
+RAY_NUM_CPUS = int(os.getenv("RAY_NUM_CPUS", "4"))
RAY_OBJECT_STORE_MEMORY_GB = float(
os.getenv("RAY_OBJECT_STORE_MEMORY_GB", "0.25"))
RAY_TEMP_DIR = os.getenv("RAY_TEMP_DIR", "/tmp/ray")
@@ -164,6 +233,7 @@ class VectorDatabaseType(str, Enum):
"NEXENT_MCP_DOCKER_IMAGE", "nexent/nexent-mcp:latest")
ENABLE_UPLOAD_IMAGE = os.getenv(
"ENABLE_UPLOAD_IMAGE", "false").lower() == "true"
+ENABLE_JIUWEN_SDK = os.getenv("NEXENT_ENABLE_JIUWEN_SDK", "true").lower() == "true"
# Celery Configuration
@@ -176,10 +246,21 @@ class VectorDatabaseType(str, Enum):
# Worker Configuration
RAY_ADDRESS = os.getenv("RAY_ADDRESS", "auto")
-QUEUES = os.getenv("QUEUES", "process_q,forward_q")
+QUEUES = os.getenv("QUEUES", "process_q,process_part_q,forward_q")
# Will be dynamically set based on PID if not provided
WORKER_NAME = os.getenv("WORKER_NAME")
WORKER_CONCURRENCY = int(os.getenv("WORKER_CONCURRENCY", "4"))
+RAY_WARM_ACTOR_POOL_SIZE_PART = int(
+ os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PART", "2"))
+RAY_WARM_ACTOR_POOL_SIZE_PROCESS = int(
+ os.getenv("RAY_WARM_ACTOR_POOL_SIZE_PROCESS", "1"))
+# Global Ray actor pool (shared by process_q/process_part_q workers)
+RAY_GLOBAL_ACTOR_POOL_SIZE = int(os.getenv("RAY_GLOBAL_ACTOR_POOL_SIZE", "3"))
+RAY_ACTOR_WARM_TIMEOUT_S = float(os.getenv("RAY_ACTOR_WARM_TIMEOUT_S", "60"))
+RAY_GLOBAL_ACTOR_POOL_NAME = os.getenv(
+ "RAY_GLOBAL_ACTOR_POOL_NAME", "nexent_global_data_processor_pool")
+RAY_GLOBAL_ACTOR_POOL_NAMESPACE = os.getenv(
+ "RAY_GLOBAL_ACTOR_POOL_NAMESPACE", "nexent-data-process")
# Voice Service Configuration
@@ -279,6 +360,8 @@ class VectorDatabaseType(str, Enum):
"multiEmbedding": "MULTI_EMBEDDING_ID",
"rerank": "RERANK_ID",
"vlm": "VLM_ID",
+ "vlm2": "VLM2_ID",
+ "vlm3": "VLM3_ID",
"stt": "STT_ID",
"tts": "TTS_ID"
}
@@ -310,19 +393,78 @@ class VectorDatabaseType(str, Enum):
THINK_END_PATTERN = ""
-# Telemetry and Monitoring Configuration
-ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true"
-SERVICE_NAME = os.getenv("SERVICE_NAME", "nexent-backend")
-JAEGER_ENDPOINT = os.getenv(
- "JAEGER_ENDPOINT", "http://localhost:14268/api/traces")
-PROMETHEUS_PORT = int(os.getenv("PROMETHEUS_PORT", "8000"))
-TELEMETRY_SAMPLE_RATE = float(os.getenv("TELEMETRY_SAMPLE_RATE", "1.0"))
-
-# Performance monitoring thresholds
-LLM_SLOW_REQUEST_THRESHOLD_SECONDS = float(
- os.getenv("LLM_SLOW_REQUEST_THRESHOLD_SECONDS", "5.0"))
-LLM_SLOW_TOKEN_RATE_THRESHOLD = float(
- os.getenv("LLM_SLOW_TOKEN_RATE_THRESHOLD", "10.0")) # tokens per second
+# Telemetry and Monitoring Configuration (OTLP Protocol)
+MONITORING_PROVIDER = os.getenv("MONITORING_PROVIDER", "")
+ENABLE_TELEMETRY_RAW = os.getenv("ENABLE_TELEMETRY")
+ENABLE_TELEMETRY = (ENABLE_TELEMETRY_RAW or "false").lower() == "true"
+OTEL_SERVICE_NAME_RAW = os.getenv("OTEL_SERVICE_NAME")
+OTEL_SERVICE_NAME = OTEL_SERVICE_NAME_RAW or "nexent-backend"
+OTEL_EXPORTER_OTLP_ENDPOINT_RAW = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
+OTEL_EXPORTER_OTLP_ENDPOINT = OTEL_EXPORTER_OTLP_ENDPOINT_RAW or "http://localhost:4318"
+OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = os.getenv(
+ "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "")
+OTEL_EXPORTER_OTLP_METRICS_ENDPOINT = os.getenv(
+ "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "")
+OTEL_EXPORTER_OTLP_PROTOCOL_RAW = os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
+OTEL_EXPORTER_OTLP_PROTOCOL = OTEL_EXPORTER_OTLP_PROTOCOL_RAW or "http"
+OTEL_EXPORTER_OTLP_HEADERS_RAW = os.getenv("OTEL_EXPORTER_OTLP_HEADERS")
+OTEL_EXPORTER_OTLP_HEADERS = OTEL_EXPORTER_OTLP_HEADERS_RAW or ""
+OTEL_EXPORTER_OTLP_AUTHORIZATION = os.getenv(
+ "OTEL_EXPORTER_OTLP_AUTHORIZATION", "")
+OTEL_EXPORTER_OTLP_X_API_KEY = os.getenv("OTEL_EXPORTER_OTLP_X_API_KEY", "")
+OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION = os.getenv(
+ "OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION", "")
+LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY", "")
+LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT", "")
+OTEL_EXPORTER_OTLP_METRICS_ENABLED_RAW = os.getenv(
+ "OTEL_EXPORTER_OTLP_METRICS_ENABLED")
+OTEL_EXPORTER_OTLP_METRICS_ENABLED = (
+ OTEL_EXPORTER_OTLP_METRICS_ENABLED_RAW or "true").lower() == "true"
+MONITORING_INSTRUMENT_REQUESTS_RAW = os.getenv(
+ "MONITORING_INSTRUMENT_REQUESTS")
+MONITORING_INSTRUMENT_REQUESTS = (
+ MONITORING_INSTRUMENT_REQUESTS_RAW or "false").lower() == "true"
+MONITORING_FASTAPI_INCLUDED_URLS = os.getenv(
+ "MONITORING_FASTAPI_INCLUDED_URLS", "")
+MONITORING_FASTAPI_EXCLUDED_URLS = os.getenv(
+ "MONITORING_FASTAPI_EXCLUDED_URLS", "")
+MONITORING_FASTAPI_EXCLUDE_SPANS = os.getenv(
+ "MONITORING_FASTAPI_EXCLUDE_SPANS", "receive,send")
+MONITORING_PROJECT_NAME = os.getenv("MONITORING_PROJECT_NAME", "")
+MONITORING_DASHBOARD_URL = os.getenv("MONITORING_DASHBOARD_URL", "")
+MONITORING_TRACE_CONTENT_MODE = os.getenv(
+ "MONITORING_TRACE_CONTENT_MODE", "summary")
+MONITORING_TRACE_MAX_CHARS = os.getenv("MONITORING_TRACE_MAX_CHARS", "4000")
+MONITORING_TRACE_MAX_ITEMS = os.getenv("MONITORING_TRACE_MAX_ITEMS", "20")
+TELEMETRY_SAMPLE_RATE_RAW = os.getenv("TELEMETRY_SAMPLE_RATE")
+TELEMETRY_SAMPLE_RATE = float(TELEMETRY_SAMPLE_RATE_RAW or "1.0")
+
+# Parse OTLP headers into dict format
+
+
+def _parse_otlp_headers(headers_str: str) -> dict:
+ """Parse OTLP headers string into dict. Format: 'key1=value1,key2=value2'"""
+ if not headers_str:
+ return {}
+ headers = {}
+ for pair in headers_str.split(","):
+ if "=" in pair:
+ key, value = pair.split("=", 1)
+ headers[key.strip()] = value.strip()
+ return headers
+
+
+OTLP_HEADERS = _parse_otlp_headers(OTEL_EXPORTER_OTLP_HEADERS)
+if OTEL_EXPORTER_OTLP_AUTHORIZATION:
+ OTLP_HEADERS["Authorization"] = OTEL_EXPORTER_OTLP_AUTHORIZATION
+if OTEL_EXPORTER_OTLP_X_API_KEY:
+ OTLP_HEADERS["x-api-key"] = OTEL_EXPORTER_OTLP_X_API_KEY
+elif LANGSMITH_API_KEY:
+ OTLP_HEADERS["x-api-key"] = LANGSMITH_API_KEY
+if LANGSMITH_PROJECT:
+ OTLP_HEADERS["Langsmith-Project"] = LANGSMITH_PROJECT
+if OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION:
+ OTLP_HEADERS["x-langfuse-ingestion-version"] = OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION
DEFAULT_ZH_TITLE = "新对话"
@@ -334,15 +476,24 @@ class VectorDatabaseType(str, Enum):
# Container Platform Configuration
-IS_DEPLOYED_BY_KUBERNETES = os.getenv("IS_DEPLOYED_BY_KUBERNETES", "false").lower() == "true"
+IS_DEPLOYED_BY_KUBERNETES = os.getenv(
+ "IS_DEPLOYED_BY_KUBERNETES", "false").lower() == "true"
KUBERNETES_NAMESPACE = os.getenv("KUBERNETES_NAMESPACE", "nexent")
-# Northbound API External URL (used for A2A Agent Card URLs)
-# When accessed through reverse proxy, set this to the public-facing URL
-# Falls back to http://localhost:5013 for local development
-_northbound_url = os.getenv("NORTHBOUND_EXTERNAL_URL", "")
-NORTHBOUND_EXTERNAL_URL = _northbound_url.rstrip("/") if _northbound_url else "http://localhost:5013"
+# Northbound API public base URL (used for A2A agent cards and external file proxy links)
+NORTHBOUND_EXTERNAL_URL = os.getenv(
+ "NORTHBOUND_EXTERNAL_URL", "http://localhost:5013/api").rstrip("/")
# APP Version
-APP_VERSION = "v2.0.2"
+APP_VERSION = "v2.2.1"
+
+
+# Skill Creation Streaming Configuration
+STREAMABLE_CONTENT_TYPES = frozenset([
+ "model_output_thinking",
+ "model_output_code",
+ "model_output_deep_thinking",
+ "tool",
+ "execution_logs",
+])
diff --git a/backend/consts/error_code.py b/backend/consts/error_code.py
index 072243de4..fd2987309 100644
--- a/backend/consts/error_code.py
+++ b/backend/consts/error_code.py
@@ -141,6 +141,23 @@ class ErrorCode(Enum):
PROFILE_UPDATE_FAILED = "110102" # Profile update failed
PROFILE_USER_ALREADY_EXISTS = "110103" # User already exists
PROFILE_INVALID_CREDENTIALS = "110104" # Invalid credentials
+ # 02 - Password
+ PROFILE_PASSWORD_WEAK = "110201" # Password does not meet strength requirements
+ PROFILE_PASSWORD_SAME_AS_OLD = "110202" # New password cannot be the same as old password
+
+ # ==================== 16 OAuth / 第三方登录 ====================
+ # 01 - Provider
+ OAUTH_PROVIDER_NOT_CONFIGURED = "160101" # OAuth provider not configured
+ OAUTH_PROVIDER_DISABLED = "160102" # OAuth provider disabled
+ OAUTH_PROVIDER_UNSUPPORTED = "160103" # OAuth provider not supported
+ OAUTH_PROVIDER_ERROR = "160104" # OAuth provider returned an error
+
+ # 02 - Account Linking
+ OAUTH_LINK_FAILED = "160201" # Failed to link OAuth account
+ OAUTH_UNLINK_FAILED = "160202" # Failed to unlink OAuth account
+ OAUTH_UNLINK_LAST_METHOD = "160203" # Cannot unlink last auth method
+ OAUTH_ACCOUNT_NOT_FOUND = "160204" # OAuth account link not found
+ OAUTH_ACCOUNT_ALREADY_LINKED = "160205" # OAuth account already linked
# ==================== 12 TenantResource / 租户资源 ====================
# 01 - Tenant
@@ -172,6 +189,12 @@ class ErrorCode(Enum):
IDATA_RATE_LIMIT = "130405" # iData rate limit
IDATA_RESPONSE_ERROR = "130406" # iData response error
+ # 05 - AIDP Service
+ AIDP_SERVICE_ERROR = "130501" # AIDP service error
+ AIDP_CONFIG_INVALID = "130502" # Invalid AIDP configuration
+ AIDP_CONNECTION_ERROR = "130503" # AIDP connection error
+ AIDP_AUTH_ERROR = "130504" # AIDP auth error
+
# ==================== 14 Northbound / 北向接口 ====================
# 01 - Request
NORTHBOUND_REQUEST_FAILED = "140101" # Northbound request failed
@@ -237,4 +260,22 @@ class ErrorCode(Enum):
ErrorCode.IDATA_CONNECTION_ERROR: 502,
ErrorCode.IDATA_RESPONSE_ERROR: 502,
ErrorCode.IDATA_RATE_LIMIT: 429,
+ # AIDP (module 13)
+ ErrorCode.AIDP_CONFIG_INVALID: 400,
+ ErrorCode.AIDP_AUTH_ERROR: 401,
+ ErrorCode.AIDP_CONNECTION_ERROR: 502,
+ # OAuth (module 16)
+ ErrorCode.OAUTH_PROVIDER_NOT_CONFIGURED: 400,
+ ErrorCode.OAUTH_PROVIDER_DISABLED: 400,
+ ErrorCode.OAUTH_PROVIDER_UNSUPPORTED: 400,
+ ErrorCode.OAUTH_PROVIDER_ERROR: 502,
+ ErrorCode.OAUTH_LINK_FAILED: 500,
+ ErrorCode.OAUTH_UNLINK_FAILED: 500,
+ ErrorCode.OAUTH_UNLINK_LAST_METHOD: 400,
+ ErrorCode.OAUTH_ACCOUNT_NOT_FOUND: 404,
+ ErrorCode.OAUTH_ACCOUNT_ALREADY_LINKED: 409,
+ # Profile - Password (module 11)
+ ErrorCode.PROFILE_INVALID_CREDENTIALS: 400,
+ ErrorCode.PROFILE_PASSWORD_WEAK: 400,
+ ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD: 400,
}
diff --git a/backend/consts/error_message.py b/backend/consts/error_message.py
index 4ff1141c7..bb3641604 100644
--- a/backend/consts/error_message.py
+++ b/backend/consts/error_message.py
@@ -5,6 +5,8 @@
Frontend should use i18n for localized messages.
"""
+from typing import Dict, Tuple
+
from .error_code import ErrorCode
@@ -102,6 +104,9 @@ class ErrorMessage:
ErrorCode.PROFILE_UPDATE_FAILED: "Profile update failed.",
ErrorCode.PROFILE_USER_ALREADY_EXISTS: "User already exists.",
ErrorCode.PROFILE_INVALID_CREDENTIALS: "Invalid username or password.",
+ # Profile - Password
+ ErrorCode.PROFILE_PASSWORD_WEAK: "Password does not meet security requirements. Please use a stronger password.",
+ ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD: "New password cannot be the same as the old password.",
# ==================== 12 TenantResource / 租户资源 ====================
ErrorCode.TENANT_NOT_FOUND: "Tenant not found.",
@@ -118,6 +123,16 @@ class ErrorMessage:
ErrorCode.DIFY_AUTH_ERROR: "Dify authentication failed. Please check your API key.",
ErrorCode.DIFY_RATE_LIMIT: "Dify API rate limit exceeded. Please try again later.",
ErrorCode.ME_CONNECTION_FAILED: "Failed to connect to ME service.",
+ ErrorCode.IDATA_SERVICE_ERROR: "iData service error.",
+ ErrorCode.IDATA_CONFIG_INVALID: "iData configuration invalid. Please check URL and API key format.",
+ ErrorCode.IDATA_CONNECTION_ERROR: "Failed to connect to iData. Please check network connection and URL.",
+ ErrorCode.IDATA_RESPONSE_ERROR: "Failed to parse iData response. Please check API URL.",
+ ErrorCode.IDATA_AUTH_ERROR: "iData authentication failed. Please check your API key.",
+ ErrorCode.IDATA_RATE_LIMIT: "iData API rate limit exceeded. Please try again later.",
+ ErrorCode.AIDP_SERVICE_ERROR: "AIDP service error.",
+ ErrorCode.AIDP_CONFIG_INVALID: "AIDP configuration invalid. Please check URL and API key format.",
+ ErrorCode.AIDP_CONNECTION_ERROR: "Failed to connect to AIDP. Please check network connection and URL.",
+ ErrorCode.AIDP_AUTH_ERROR: "AIDP authentication failed. Please check your API key.",
# ==================== 14 Northbound / 北向接口 ====================
ErrorCode.NORTHBOUND_REQUEST_FAILED: "Northbound request failed.",
@@ -145,11 +160,11 @@ def get_message(cls, error_code: ErrorCode) -> str:
return cls._MESSAGES.get(error_code, "An error occurred. Please try again later.")
@classmethod
- def get_message_with_code(cls, error_code: ErrorCode) -> tuple[int, str]:
+ def get_message_with_code(cls, error_code: ErrorCode) -> Tuple[int, str]:
"""Get error code and message as tuple."""
return (error_code.value, cls.get_message(error_code))
@classmethod
- def get_all_messages(cls) -> dict:
+ def get_all_messages(cls) -> Dict:
"""Get all error code to message mappings."""
return {code.value: msg for code, msg in cls._MESSAGES.items()}
diff --git a/backend/consts/exceptions.py b/backend/consts/exceptions.py
index 074b4a5b0..e5e4c7a89 100644
--- a/backend/consts/exceptions.py
+++ b/backend/consts/exceptions.py
@@ -6,13 +6,13 @@
1. New Framework (with ErrorCode):
from consts.error_code import ErrorCode
from consts.exceptions import AppException
-
+
raise AppException(ErrorCode.COMMON_VALIDATION_ERROR, "Validation failed")
raise AppException(ErrorCode.MCP_CONNECTION_FAILED, "Connection timeout", details={"host": "localhost"})
2. Legacy Framework (simple exceptions):
from consts.exceptions import ValidationError, NotFoundException, MCPConnectionError
-
+
raise ValidationError("Tenant name cannot be empty")
raise NotFoundException("Tenant 123 not found")
raise MCPConnectionError("MCP connection failed")
@@ -22,10 +22,12 @@
from .error_code import ErrorCode, ERROR_CODE_HTTP_STATUS
from .error_message import ErrorMessage
+from typing import List
# ==================== New Framework: AppException with ErrorCode ====================
+
class AppException(Exception):
"""
Base application exception with ErrorCode.
@@ -35,7 +37,9 @@ class AppException(Exception):
raise AppException(ErrorCode.MCP_CONNECTION_FAILED, "Timeout", details={"host": "x"})
"""
- def __init__(self, error_code: ErrorCode, message: str = None, details: dict = None):
+ def __init__(
+ self, error_code: ErrorCode, message: str = None, details: dict = None
+ ):
self.error_code = error_code
self.message = message or ErrorMessage.get_message(error_code)
self.details = details or {}
@@ -43,9 +47,11 @@ def __init__(self, error_code: ErrorCode, message: str = None, details: dict = N
def to_dict(self) -> dict:
return {
- "code": str(self.error_code.value), # Keep as string to preserve leading zeros
+ "code": str(
+ self.error_code.value
+ ), # Keep as string to preserve leading zeros
"message": self.message,
- "details": self.details if self.details else None
+ "details": self.details if self.details else None,
}
@property
@@ -70,136 +76,200 @@ def raise_error(error_code: ErrorCode, message: str = None, details: dict = None
# These do NOT require ErrorCode - they are simple Exception subclasses.
# Exception handler will infer ErrorCode from class name.
+
class AgentRunException(Exception):
"""Exception raised when agent run fails."""
+
pass
class LimitExceededError(Exception):
"""Raised when an outer platform calling too frequently"""
+
pass
class UnauthorizedError(Exception):
"""Raised when a user from outer platform is unauthorized."""
+
pass
class SignatureValidationError(Exception):
"""Raised when X-Signature header is missing or does not match the expected HMAC value."""
+
pass
class MemoryPreparationException(Exception):
"""Raised when memory preprocessing or retrieval fails prior to agent run."""
+
pass
class MCPConnectionError(Exception):
"""Raised when MCP connection fails."""
+
pass
class MCPNameIllegal(Exception):
"""Raised when MCP name is illegal."""
+
+ pass
+
+
+class McpNotFoundError(Exception):
+ """Raised when MCP resource is not found."""
+ pass
+
+
+class McpValidationError(Exception):
+ """Raised when MCP payload or runtime data is invalid."""
+ pass
+
+
+class McpNameConflictError(Exception):
+ """Raised when MCP name conflicts with an existing enabled service."""
+ pass
+
+
+class McpPortConflictError(Exception):
+ """Raised when an MCP container port conflicts with an existing service or runtime port."""
pass
class NoInviteCodeException(Exception):
"""Raised when invite code is not found."""
+
pass
class IncorrectInviteCodeException(Exception):
"""Raised when invite code is incorrect."""
+
pass
class OfficeConversionException(Exception):
"""Raised when Office-to-PDF conversion via data-process service fails."""
+
pass
class UnsupportedFileTypeException(Exception):
"""Raised when a file type is not supported for the requested operation."""
+
pass
class FileTooLargeException(Exception):
"""Raised when a file exceeds the maximum allowed size for the requested operation."""
+
pass
class UserRegistrationException(Exception):
"""Raised when user registration fails."""
+
pass
class TimeoutException(Exception):
"""Raised when timeout occurs."""
+
pass
class ValidationError(Exception):
"""Raised when validation fails."""
+
pass
class NotFoundException(Exception):
"""Raised when not found exception occurs."""
+
pass
class MEConnectionException(Exception):
"""Raised when ME connection fails."""
+
pass
class VoiceServiceException(Exception):
"""Raised when voice service fails."""
+
+ pass
+
+
+class VoiceConfigException(Exception):
+ """Raised when voice configuration is invalid or missing."""
+
pass
class STTConnectionException(Exception):
"""Raised when STT service connection fails."""
+
pass
class TTSConnectionException(Exception):
"""Raised when TTS service connection fails."""
- pass
-
-class VoiceConfigException(Exception):
- """Raised when voice configuration is invalid."""
pass
class ToolExecutionException(Exception):
"""Raised when mcp tool execution failed."""
+
pass
class MCPContainerError(Exception):
"""Raised when MCP container operation fails."""
+
pass
class DuplicateError(Exception):
"""Raised when a duplicate resource already exists."""
+
pass
class DataMateConnectionError(Exception):
"""Raised when DataMate connection fails or URL is not configured."""
+
pass
+class SkillDuplicateError(Exception):
+ """Raised when importing an agent with skills that have duplicate names in target tenant."""
+ def __init__(self, duplicate_names: List[str]):
+ self.duplicate_names = duplicate_names
+
+
class SkillException(Exception):
"""Raised when skill operations fail."""
pass
+class OAuthProviderError(Exception):
+ """Raised when OAuth provider configuration is invalid or provider returns an error."""
+
+ pass
+
+
+class OAuthLinkError(Exception):
+ """Raised when linking or unlinking an OAuth account fails."""
+
+ pass
+
+
class TaskNotFoundError(Exception):
"""Raised when A2A task is not found (per A2A spec Section 3.4.2)."""
pass
@@ -251,5 +321,10 @@ class UnsupportedOperationError(Exception):
DifyServiceException = Exception # Generic fallback
ExternalAPIError = Exception # Generic fallback
+# OAuth aliases
+OAuthProviderNotConfiguredError = OAuthProviderError
+OAuthProviderDisabledError = OAuthProviderError
+OAuthAccountNotFoundError = NotFoundException
+
# Signature aliases
# SignatureValidationError already defined above
diff --git a/backend/consts/model.py b/backend/consts/model.py
index 91cf7d1b6..00e5b8a0a 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -1,8 +1,10 @@
from enum import Enum
-from typing import Optional, Any, List, Dict
+from typing import Optional, Any, List, Dict, Literal
-from pydantic import BaseModel, Field, EmailStr
-from nexent.core.agents.agent_model import ToolConfig
+from pydantic import BaseModel, Field, EmailStr, ConfigDict, field_validator
+from nexent.core.agents.agent_model import AgentVerificationConfig, ToolConfig
+
+from consts.prompt_template import PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP
class ModelConnectStatusEnum(Enum):
@@ -29,7 +31,7 @@ def get_value(cls, status: Optional[str]) -> str:
class UserSignUpRequest(BaseModel):
"""User registration request model"""
email: EmailStr
- password: str = Field(..., min_length=6)
+ password: str = Field(..., min_length=8)
invite_code: Optional[str] = None
auto_login: Optional[bool] = True # Whether to return session after signup
@@ -40,6 +42,19 @@ class UserSignInRequest(BaseModel):
password: str
+class OAuthCompleteRequest(BaseModel):
+ """Complete a pending OAuth signup."""
+ email: Optional[EmailStr] = None
+ password: str = Field(..., min_length=6)
+ invite_code: str = Field(..., min_length=1)
+
+
+class UpdatePasswordRequest(BaseModel):
+ """Password update request model for changing user password"""
+ old_password: str = Field(..., min_length=1, description="Current password for verification")
+ new_password: str = Field(..., min_length=8, description="New password to set (min 8 characters)")
+
+
class UserUpdateRequest(BaseModel):
"""User update request model"""
username: Optional[str] = Field(None, min_length=1, max_length=50)
@@ -52,6 +67,52 @@ class UserDeleteRequest(BaseModel):
new_owner_id: Optional[str] = None
+class OAuthProviderDefinition(BaseModel):
+ name: str
+ display_name: str
+ icon: str
+
+ authorize_url: str
+ authorize_method: str = "GET"
+ authorize_params: Dict[str, str] = {}
+ authorize_fragment: str = ""
+ authorize_param_map: Dict[str, str] = {
+ "client_id": "client_id",
+ "redirect_uri": "redirect_uri",
+ "scope": "scope",
+ "state": "state",
+ }
+ encode_redirect_uri: bool = False
+
+ token_url: str
+ token_method: str = "POST"
+ token_params_map: Dict[str, str] = {
+ "client_id": "client_id",
+ "client_secret": "client_secret",
+ "code": "code",
+ "grant_type": "grant_type",
+ }
+ token_extra_params: Dict[str, str] = {}
+ token_error_key: Optional[str] = None
+ token_error_message_key: Optional[str] = None
+ token_response_id_key: Optional[str] = None
+
+ userinfo_url: str
+ userinfo_auth_scheme: str = "Bearer"
+ userinfo_params: Dict[str, str] = {}
+ userinfo_field_map: Dict[str, str] = {
+ "id": "id",
+ "email": "email",
+ "username": "login",
+ }
+ userinfo_needs_email_fetch: bool = False
+ userinfo_email_url: Optional[str] = None
+
+ client_id_env: str
+ client_secret_env: str
+ enabled_check: Optional[str] = None
+
+
# Response models for model management
class ModelResponse(BaseModel):
code: int = 200
@@ -72,6 +133,11 @@ class ModelRequest(BaseModel):
expected_chunk_size: Optional[int] = None
maximum_chunk_size: Optional[int] = None
chunk_batch: Optional[int] = None
+ # STT specific fields
+ model_appid: Optional[str] = None
+ access_token: Optional[str] = None
+ timeout_seconds: Optional[int] = None
+ concurrency_limit: Optional[int] = None
class ProviderModelRequest(BaseModel):
@@ -101,14 +167,44 @@ class SingleModelConfig(BaseModel):
dimension: Optional[int] = None
+class STTModelConfig(BaseModel):
+ """STT model specific configuration with factory, appid, and access token fields"""
+ modelName: str
+ displayName: str
+ apiConfig: Optional[ModelApiConfig] = None
+ modelFactory: Optional[str] = None
+ modelAppid: Optional[str] = None
+ accessToken: Optional[str] = None
+
+
+def _empty_model_config() -> SingleModelConfig:
+ return SingleModelConfig(
+ modelName="",
+ displayName="",
+ apiConfig=ModelApiConfig(apiKey="", modelUrl="")
+ )
+
+
+class TTSModelConfig(BaseModel):
+ """TTS model specific configuration with factory, appid, and access token fields"""
+ modelName: str
+ displayName: str
+ apiConfig: Optional[ModelApiConfig] = None
+ modelFactory: Optional[str] = None
+ modelAppid: Optional[str] = None
+ accessToken: Optional[str] = None
+
+
class ModelConfig(BaseModel):
llm: SingleModelConfig
embedding: SingleModelConfig
multiEmbedding: SingleModelConfig
rerank: SingleModelConfig
vlm: SingleModelConfig
- stt: SingleModelConfig
- tts: SingleModelConfig
+ vlm2: SingleModelConfig = Field(default_factory=_empty_model_config)
+ vlm3: SingleModelConfig = Field(default_factory=_empty_model_config)
+ stt: STTModelConfig
+ tts: TTSModelConfig
class AppConfig(BaseModel):
@@ -128,16 +224,41 @@ class GlobalConfig(BaseModel):
# Request models
+class HistoryItem(BaseModel):
+ role: str
+ content: str
+ minio_files: Optional[List[Dict[str, Any]]] = None
+
+
+class AgentToolParamsRequest(BaseModel):
+ """Request-scoped tool parameter overrides for a single agent."""
+
+ tools: Dict[str, Dict[str, Any]] = Field(
+ default_factory=dict,
+ description="Mapping from tool identifier to request-scoped override params",
+ )
+
+
+class ToolParamsRequest(BaseModel):
+ """Request-scoped tool parameter overrides for main and managed agents."""
+
+ agents: Dict[str, AgentToolParamsRequest] = Field(
+ default_factory=dict,
+ description="Mapping from agent identifier to tool parameter overrides",
+ )
+
+
class AgentRequest(BaseModel):
query: str
conversation_id: Optional[int] = None
- history: Optional[List[Dict]] = None
+ history: Optional[List[HistoryItem]] = None
# Complete list of attachment information
minio_files: Optional[List[Dict[str, Any]]] = None
agent_id: Optional[int] = None
model_id: Optional[int] = None
version_no: Optional[int] = None
is_debug: Optional[bool] = False
+ tool_params: Optional[ToolParamsRequest] = None
class MessageUnit(BaseModel):
@@ -236,6 +357,7 @@ class ProcessParams(BaseModel):
source_type: str
index_name: str
authorization: Optional[str] = None
+ model_id: Optional[int] = None
class OpinionRequest(BaseModel):
@@ -248,10 +370,110 @@ class GeneratePromptRequest(BaseModel):
task_description: str
agent_id: int
model_id: int
+ prompt_template_id: Optional[int] = None
tool_ids: Optional[List[int]] = Field(
None, description="Optional: tool IDs from frontend (takes precedence over database query)")
sub_agent_ids: Optional[List[int]] = Field(
None, description="Optional: sub-agent IDs from frontend (takes precedence over database query)")
+ knowledge_base_display_names: Optional[List[str]] = Field(
+ None, description="Optional: knowledge base display names from frontend (takes precedence over database query)")
+ has_selected_resources: bool = Field(
+ True, description="Whether tools or sub-agents are selected; when False, skips generating constraint and few_shots sections")
+
+
+class PromptTemplateContentRequest(BaseModel):
+ model_config = ConfigDict(populate_by_name=True)
+
+ duty_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["duty_system_prompt"]
+ )
+ constraint_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["constraint_system_prompt"]
+ )
+ few_shots_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["few_shots_system_prompt"]
+ )
+ agent_variable_name_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_variable_name_system_prompt"]
+ )
+ agent_display_name_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_system_prompt"]
+ )
+ agent_description_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_description_system_prompt"]
+ )
+ user_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["user_prompt"]
+ )
+ agent_name_regenerate_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_name_regenerate_system_prompt"]
+ )
+ agent_name_regenerate_user_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_name_regenerate_user_prompt"]
+ )
+ agent_display_name_regenerate_system_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_regenerate_system_prompt"]
+ )
+ agent_display_name_regenerate_user_prompt: str = Field(
+ alias=PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP["agent_display_name_regenerate_user_prompt"]
+ )
+
+
+class PromptTemplateRequest(BaseModel):
+ template_name: str
+ description: Optional[str] = None
+ template_type: str = "agent_generate"
+ template_content_zh: PromptTemplateContentRequest
+ template_content_en: Optional[PromptTemplateContentRequest] = None
+class OptimizePromptSectionRequest(BaseModel):
+ task_description: str
+ agent_id: int
+ model_id: int
+ section_type: str
+ section_title: str
+ current_content: str
+ feedback: str
+ mode: Literal["general", "insert", "select"] = "general"
+ start_pos: Optional[int] = Field(None, description="Start position for insert/select mode")
+ end_pos: Optional[int] = Field(None, description="End position for insert/select mode")
+ tool_ids: Optional[List[int]] = Field(
+ None, description="Optional: tool IDs from frontend (takes precedence over database query)")
+ sub_agent_ids: Optional[List[int]] = Field(
+ None, description="Optional: sub-agent IDs from frontend (takes precedence over database query)")
+ knowledge_base_display_names: Optional[List[str]] = Field(
+ None, description="Optional: knowledge base display names from frontend (takes precedence over database query)")
+
+
+class BadCaseItem(BaseModel):
+ question: str
+ answer: str
+ label: Optional[str] = None
+ reason: Optional[str] = None
+
+
+class OptimizePromptBadCaseRequest(BaseModel):
+ agent_id: int
+ model_id: int
+ current_content: str
+ bad_cases: List[BadCaseItem]
+ section_type: str
+ section_title: str
+ tool_ids: Optional[List[int]] = Field(None)
+ sub_agent_ids: Optional[List[int]] = Field(None)
+ knowledge_base_display_names: Optional[List[str]] = Field(None)
+
+
+class OptimizeFromDebugSelected(BaseModel):
+ user_question: str
+ assistant_answer: str
+
+
+class OptimizePromptFromDebugRequest(BaseModel):
+ agent_id: int
+ model_id: int
+ feedback: str
+ selected: OptimizeFromDebugSelected
+ history: Optional[List[HistoryItem]] = None
class GenerateTitleRequest(BaseModel):
@@ -269,7 +491,7 @@ class AgentInfoRequest(BaseModel):
author: Optional[str] = None
model_name: Optional[str] = None
model_id: Optional[int] = None
- max_steps: Optional[int] = None
+ max_steps: Optional[int] = Field(default=None, ge=1, le=30)
provide_run_summary: Optional[bool] = None
duty_prompt: Optional[str] = None
constraint_prompt: Optional[str] = None
@@ -277,13 +499,27 @@ class AgentInfoRequest(BaseModel):
enabled: Optional[bool] = None
business_logic_model_name: Optional[str] = None
business_logic_model_id: Optional[int] = None
+ prompt_template_id: Optional[int] = None
+ prompt_template_name: Optional[str] = None
enabled_tool_ids: Optional[List[int]] = None
enabled_skill_ids: Optional[List[int]] = None
related_agent_ids: Optional[List[int]] = None
+ related_external_agent_ids: Optional[List[int]] = None
group_ids: Optional[List[int]] = None
ingroup_permission: Optional[str] = None
+ enable_context_manager: Optional[bool] = None
+ verification_config: Optional[Dict[str, Any]] = None
+ greeting_message: Optional[str] = None
+ example_questions: Optional[List[str]] = None
version_no: int = 0
+ @field_validator("verification_config", mode="before")
+ @classmethod
+ def normalize_verification_config(cls, value):
+ if value is None:
+ return None
+ return AgentVerificationConfig.model_validate(value).model_dump()
+
class AgentIDRequest(BaseModel):
agent_id: int
@@ -307,6 +543,7 @@ class SkillInstanceInfoRequest(BaseModel):
agent_id: int
enabled: bool = True
version_no: int = 0
+ config_values: Optional[Dict[str, Any]] = None
class ToolInstanceSearchRequest(BaseModel):
@@ -347,6 +584,7 @@ class MessageIdRequest(BaseModel):
class ExportAndImportAgentInfo(BaseModel):
agent_id: int
+ tenant_id: Optional[str] = None
name: str
display_name: Optional[str] = None
description: str
@@ -354,6 +592,7 @@ class ExportAndImportAgentInfo(BaseModel):
author: Optional[str] = None
max_steps: int
provide_run_summary: bool
+ verification_config: Optional[Dict[str, Any]] = None
duty_prompt: Optional[str] = None
constraint_prompt: Optional[str] = None
few_shots_prompt: Optional[str] = None
@@ -364,6 +603,9 @@ class ExportAndImportAgentInfo(BaseModel):
model_name: Optional[str] = None
business_logic_model_id: Optional[int] = None
business_logic_model_name: Optional[str] = None
+ skill_names: Optional[List[str]] = None
+ prompt_template_id: Optional[int] = None
+ prompt_template_name: Optional[str] = None
class Config:
arbitrary_types_allowed = True
@@ -380,9 +622,21 @@ class ExportAndImportDataFormat(BaseModel):
mcp_info: List[MCPInfo]
+class AgentRepositorySnapshot(ExportAndImportDataFormat):
+ """Frozen marketplace snapshot: export format plus optional skill ZIP payloads."""
+ skills: Optional[List["SkillZipEntry"]] = None
+
+
+class SkillZipEntry(BaseModel):
+ """A skill bundled inside an agent export ZIP."""
+ skill_name: str
+ skill_zip_base64: str
+
+
class AgentImportRequest(BaseModel):
agent_info: ExportAndImportDataFormat
force_import: bool = False
+ skills: Optional[List[SkillZipEntry]] = None
class AgentNameBatchRegenerateItem(BaseModel):
@@ -448,19 +702,6 @@ class VoiceConnectivityResponse(BaseModel):
message: str = Field(..., description="Status message")
-class TTSRequest(BaseModel):
- """Request model for TTS text-to-speech conversion"""
- text: str = Field(..., min_length=1,
- description="Text to convert to speech")
- stream: bool = Field(True, description="Whether to stream the audio")
-
-
-class TTSResponse(BaseModel):
- """Response model for TTS conversion"""
- status: str = Field(..., description="Status of the TTS conversion")
- message: Optional[str] = Field(None, description="Additional message")
-
-
class ToolValidateRequest(BaseModel):
"""Request model for tool validation"""
name: str = Field(..., description="Tool name to validate")
@@ -510,6 +751,8 @@ class MCPUpdateRequest(BaseModel):
new_mcp_url: str = Field(..., description="New MCP server URL")
new_authorization_token: Optional[str] = Field(
None, description="New authorization token for MCP server authentication (e.g., Bearer token)")
+ custom_headers: Optional[Dict[str, Any]] = Field(
+ None, description="Custom HTTP headers as JSON object")
# Tenant Management Data Models
@@ -518,6 +761,22 @@ class TenantCreateRequest(BaseModel):
"""Request model for creating a tenant"""
tenant_name: str = Field(..., min_length=1,
description="Tenant display name")
+ skill_ids: Optional[List[int]] = Field(
+ default=None,
+ description="Skill IDs to install for the new tenant (legacy, use skill_names instead)"
+ )
+ skill_names: Optional[List[str]] = Field(
+ default=None,
+ description="Skill names to install for the new tenant. "
+ "Each name is used to derive a .zip filename from "
+ "OFFICIAL_SKILLS_ZIP_PATH and installed via upload."
+ )
+ locale: Optional[str] = Field(
+ default=None,
+ description="Frontend locale when creating the tenant (e.g. 'zh' or 'en'). "
+ "Determines the source label for auto-installed skills: "
+ "'zh' → '官方', other locales → 'official'."
+ )
class TenantUpdateRequest(BaseModel):
@@ -689,15 +948,20 @@ class ManageTenantModelCreateRequest(BaseModel):
tenant_id: str = Field(..., min_length=1, description="Target tenant ID to create model for")
model_repo: Optional[str] = Field('', description="Model repository path")
model_name: str = Field(..., description="Model name")
- model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'tts', 'stt')")
+ model_type: str = Field(..., description="Model type (e.g., 'llm', 'embedding', 'vlm', 'stt')")
api_key: Optional[str] = Field('', description="API key for the model")
base_url: Optional[str] = Field('', description="Base URL for the model API")
max_tokens: Optional[int] = Field(0, description="Maximum tokens for the model")
display_name: Optional[str] = Field('', description="Display name for the model")
- model_factory: Optional[str] = Field('OpenAI-API-Compatible', description="Model factory/provider name")
+ model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model")
expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models")
maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models")
chunk_batch: Optional[int] = Field(None, description="Batch size for chunking")
+ # STT specific fields
+ model_appid: Optional[str] = Field(None, description="Application ID for STT models (e.g., Volcano Engine)")
+ access_token: Optional[str] = Field(None, description="Access token for STT models (e.g., Volcano Engine)")
+ timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds")
+ concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model")
class ManageTenantModelUpdateRequest(BaseModel):
@@ -711,10 +975,15 @@ class ManageTenantModelUpdateRequest(BaseModel):
base_url: Optional[str] = Field(None, description="Base URL for the model API")
max_tokens: Optional[int] = Field(None, description="Maximum tokens for the model")
display_name: Optional[str] = Field(None, description="New display name for the model")
- model_factory: Optional[str] = Field(None, description="Model factory/provider name")
+ model_factory: Optional[str] = Field(None, description="Model factory/vendor for the model")
expected_chunk_size: Optional[int] = Field(None, description="Expected chunk size for embedding models")
maximum_chunk_size: Optional[int] = Field(None, description="Maximum chunk size for embedding models")
chunk_batch: Optional[int] = Field(None, description="Batch size for chunking")
+ # STT specific fields
+ model_appid: Optional[str] = Field(None, description="Application ID for STT models")
+ access_token: Optional[str] = Field(None, description="Access token for STT models")
+ timeout_seconds: Optional[int] = Field(None, description="Request timeout in seconds")
+ concurrency_limit: Optional[int] = Field(None, description="Maximum concurrent requests for this model")
class ManageTenantModelDeleteRequest(BaseModel):
@@ -772,6 +1041,7 @@ class VersionListItemResponse(BaseModel):
source_version_no: Optional[int] = Field(None, description="Source version number if rollback")
source_type: Optional[str] = Field(None, description="Source type: NORMAL / ROLLBACK")
status: str = Field(..., description="Version status: RELEASED / DISABLED / ARCHIVED")
+ is_a2a: bool = Field(False, description="Whether this version is published as an A2A Server agent")
created_by: str = Field(..., description="User who published this version")
create_time: Optional[str] = Field(None, description="Publish timestamp")
@@ -791,6 +1061,7 @@ class VersionDetailResponse(BaseModel):
source_version_no: Optional[int] = Field(None, description="Source version number")
source_type: Optional[str] = Field(None, description="Source type")
status: str = Field(..., description="Version status")
+ is_a2a: bool = Field(False, description="Whether this version is published as an A2A Server agent")
created_by: str = Field(..., description="User who published this version")
create_time: Optional[str] = Field(None, description="Publish timestamp")
agent_info: Optional[dict] = Field(None, description="Agent info snapshot")
@@ -831,3 +1102,261 @@ class CurrentVersionResponse(BaseModel):
release_note: Optional[str] = Field(None, description="Release notes")
created_by: str = Field(..., description="User who published this version")
create_time: Optional[str] = Field(None, description="Publish timestamp")
+
+
+# Skill Management Data Models
+# ---------------------------------------------------------------------------
+class SkillCreateRequest(BaseModel):
+ """Request model for creating a skill via JSON."""
+ name: str
+ description: str
+ content: str
+ tool_ids: Optional[List[int]] = []
+ tool_names: Optional[List[str]] = []
+ tags: Optional[List[str]] = []
+ source: Optional[str] = "custom"
+ config_schemas: Optional[Dict[str, Any]] = None
+ config_values: Optional[Dict[str, Any]] = None
+ files: Optional[List[Dict[str, str]]] = Field(
+ default_factory=list,
+ description="Additional skill files beyond SKILL.md. "
+ "Each entry has 'path' (relative path) and 'content'. "
+ "SKILL.md may also be sent here; the 'content' field is the primary SKILL.md source."
+ )
+
+
+class SkillFileData(BaseModel):
+ """A single file within a skill."""
+ path: str = Field(description="Relative file path within the skill (e.g. 'SKILL.md', 'scripts/run.py')")
+ content: str = Field(description="Full file content")
+
+
+class SkillUpdateRequest(BaseModel):
+ """Request model for updating a skill."""
+ description: Optional[str] = None
+ content: Optional[str] = None
+ tool_ids: Optional[List[int]] = None
+ tool_names: Optional[List[str]] = None
+ tags: Optional[List[str]] = None
+ source: Optional[str] = None
+ config_schemas: Optional[Dict[str, Any]] = None
+ config_values: Optional[Dict[str, Any]] = None
+ files: Optional[List[SkillFileData]] = Field(
+ default_factory=list,
+ description="Updated skill files. Each entry has file_path and content. "
+ "Pass 'SKILL.md' here to update the main skill file; other files are written as-is."
+ )
+
+
+class SkillResponse(BaseModel):
+ """Response model for skill data."""
+ skill_id: int
+ name: str
+ description: str
+ content: str
+ tool_ids: List[int]
+ tags: List[str]
+ source: str
+ config_schemas: Optional[Dict[str, Any]] = None
+ config_values: Optional[Dict[str, Any]] = None
+ created_by: Optional[str] = None
+ create_time: Optional[str] = None
+ updated_by: Optional[str] = None
+ update_time: Optional[str] = None
+
+
+class SkillCreateInteractiveRequest(BaseModel):
+ """Request model for interactive skill creation via LLM agent."""
+ user_request: str
+ existing_skill: Optional[Dict[str, Any]] = None
+ complexity: Optional[str] = "simple"
+ language: Optional[str] = "zh"
+
+
+# ---------------------------------------------------------------------------
+# MCP Management Data Models
+# ---------------------------------------------------------------------------
+
+class MCPSourceType(str, Enum):
+ """MCP source type enumeration"""
+ LOCAL = "local"
+ MCP_REGISTRY = "mcp_registry"
+ COMMUNITY = "community"
+
+
+class AddMcpServiceRequest(BaseModel):
+ """Request model for adding an MCP service"""
+ name: str = Field(..., min_length=1, description="MCP service name")
+ server_url: str = Field(..., min_length=1, description="MCP server URL")
+ description: Optional[str] = Field(None, description="MCP service description")
+ source: MCPSourceType = Field(default=MCPSourceType.LOCAL, description="MCP source type")
+ tags: List[str] = Field(default_factory=list, description="MCP tags")
+ authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server")
+ custom_headers: Optional[Dict[str, Any]] = Field(None, description="Custom HTTP headers as JSON object")
+ container_config: Optional[Dict[str, Any]] = Field(None, description="Container configuration")
+ registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON")
+ enabled: Optional[bool] = Field(default=False, description="Whether the MCP is enabled after creation")
+
+ @field_validator("name", "server_url", "description", "authorization_token", mode="before")
+ @classmethod
+ def _strip_text(cls, value: Any):
+ if isinstance(value, str):
+ return value.strip()
+ return value
+
+
+class AddContainerMcpServiceRequest(BaseModel):
+ """Request model for adding a container-based MCP service"""
+ name: str = Field(..., min_length=1, description="MCP service name")
+ description: Optional[str] = Field(None, description="MCP service description")
+ source: MCPSourceType = Field(default=MCPSourceType.LOCAL, description="MCP source type")
+ tags: List[str] = Field(default_factory=list, description="MCP tags")
+ authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server")
+ registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON")
+ port: int = Field(..., ge=1, le=65535, description="Host port for the container")
+ mcp_config: MCPConfigRequest = Field(..., description="MCP server configuration")
+
+ @field_validator("name", "description", "authorization_token", mode="before")
+ @classmethod
+ def _strip_text(cls, value: Any):
+ if isinstance(value, str):
+ return value.strip()
+ return value
+
+
+class UpdateMcpServiceRequest(BaseModel):
+ """Request model for updating an MCP service"""
+ mcp_id: int = Field(..., gt=0, description="MCP record ID")
+ name: str = Field(..., min_length=1, description="New MCP service name")
+ description: Optional[str] = Field(None, description="MCP service description")
+ server_url: str = Field(..., min_length=1, description="New MCP server URL")
+ tags: List[str] = Field(default_factory=list, description="MCP tags")
+ authorization_token: Optional[str] = Field(None, description="Authorization token for MCP server")
+ custom_headers: Optional[Dict[str, Any]] = Field(None, description="Custom HTTP headers as JSON object")
+
+ @field_validator("name", "server_url", "description", "authorization_token", mode="before")
+ @classmethod
+ def _strip_text(cls, value: Any):
+ if isinstance(value, str):
+ return value.strip()
+ return value
+
+
+class EnableMcpServiceRequest(BaseModel):
+ """Request model for enabling an MCP service"""
+ mcp_id: int = Field(..., gt=0, description="MCP record ID to enable")
+
+
+class DisableMcpServiceRequest(BaseModel):
+ """Request model for disabling an MCP service"""
+ mcp_id: int = Field(..., gt=0, description="MCP record ID to disable")
+
+
+class HealthcheckMcpServiceRequest(BaseModel):
+ """Request model for checking MCP service health"""
+ mcp_id: int = Field(..., gt=0, description="MCP record ID to health check")
+
+
+class ListMcpToolsRequest(BaseModel):
+ """Request model for listing MCP service tools"""
+ mcp_id: int = Field(..., gt=0, description="MCP record ID")
+
+
+class PortConflictCheckRequest(BaseModel):
+ """Request model for checking port availability"""
+ port: int = Field(..., ge=1, le=65535, description="Port number to check")
+
+
+class ListMcpServicesQuery(BaseModel):
+ """Query parameters for listing MCP services"""
+ tag: Optional[str] = Field(None, description="Filter by tag")
+
+ @field_validator("tag", mode="before")
+ @classmethod
+ def _strip_tag(cls, value: Any):
+ if isinstance(value, str):
+ stripped = value.strip()
+ return stripped or None
+ return value
+
+
+class RegistryListQuery(BaseModel):
+ """Query parameters for listing MCP registry services"""
+ search: Optional[str] = Field(None, description="Search keyword")
+ include_deleted: bool = Field(default=False, description="Include deleted records")
+ updated_since: Optional[str] = Field(None, description="Filter by update time")
+ version: Optional[str] = Field(None, description="Filter by version")
+ cursor: Optional[str] = Field(None, description="Pagination cursor")
+ limit: int = Field(default=30, ge=1, le=100, description="Items per page")
+
+ @field_validator("search", "updated_since", "version", "cursor", mode="before")
+ @classmethod
+ def _strip_text(cls, value: Any):
+ if isinstance(value, str):
+ stripped = value.strip()
+ return stripped or None
+ return value
+
+
+class CommunityListRequest(BaseModel):
+ """Request model for listing community MCP services"""
+ search: Optional[str] = Field(None, description="Search keyword")
+ tag: Optional[str] = Field(None, description="Filter by tag")
+ transport_type: Optional[str] = Field(None,description="Filter by transport: url or container")
+ cursor: Optional[str] = Field(None, description="Pagination cursor")
+ limit: int = Field(default=30, ge=1, le=100, description="Items per page")
+
+ @field_validator("search", "tag", "cursor", "transport_type", mode="before")
+ @classmethod
+ def _strip_text(cls, value: Any):
+ if isinstance(value, str):
+ stripped = value.strip()
+ return stripped or None
+ return value
+
+
+class CommunityPublishRequest(BaseModel):
+ """Publish a local MCP to the community; optional fields override the snapshot."""
+
+ mcp_id: int = Field(..., gt=0, description="MCP record ID to publish")
+ name: Optional[str] = Field(None, description="Community display name override")
+ description: Optional[str] = Field(None, description="Description override")
+ version: Optional[str] = Field(None, description="Version override")
+ tags: Optional[List[str]] = Field(None, description="Tags override")
+ mcp_server: Optional[str] = Field(None, max_length=500, description="Remote MCP server URL override (URL / HTTP / SSE transports)")
+ config_json: Optional[Dict[str, Any]] = Field(None, description="Container MCP configuration JSON override")
+
+ @field_validator("name", "description", "version", "mcp_server", mode="before")
+ @classmethod
+ def _strip_publish_optional_text(cls, value: Any):
+ if isinstance(value, str):
+ stripped = value.strip()
+ return stripped or None
+ return value
+
+
+class CommunityUpdateRequest(BaseModel):
+ """Request model for updating community MCP service"""
+ community_id: int = Field(..., gt=0, description="Community record ID")
+ name: Optional[str] = Field(default=None, min_length=1, description="New MCP service name")
+ description: Optional[str] = Field(None, description="MCP service description")
+ tags: List[str] = Field(default_factory=list, description="MCP tags")
+ version: Optional[str] = Field(None, description="MCP version")
+ registry_json: Optional[Dict[str, Any]] = Field(None, description="Registry metadata JSON")
+ config_json: Optional[Dict[str, Any]] = Field(
+ None,
+ description="Container MCP configuration JSON (omit to leave unchanged)",
+ )
+
+ @field_validator("name", "description", "version", mode="before")
+ @classmethod
+ def _strip_text(cls, value: Any):
+ if isinstance(value, str):
+ stripped = value.strip()
+ return stripped or None
+ return value
+
+
+class DeleteMcpServiceRequest(BaseModel):
+ """Request model for deleting an MCP service"""
+ mcp_id: int = Field(..., gt=0, description="MCP record ID to delete")
diff --git a/backend/consts/oauth_providers.py b/backend/consts/oauth_providers.py
new file mode 100644
index 000000000..7429855b6
--- /dev/null
+++ b/backend/consts/oauth_providers.py
@@ -0,0 +1,140 @@
+import os
+from typing import Dict
+
+from consts.model import OAuthProviderDefinition
+
+GITHUB_PROVIDER = OAuthProviderDefinition(
+ name="github",
+ display_name="GitHub",
+ icon="github",
+ authorize_url="https://github.com/login/oauth/authorize",
+ authorize_params={"scope": "read:user user:email"},
+ token_url="https://github.com/login/oauth/access_token",
+ token_error_key="error",
+ token_error_message_key="error_description",
+ userinfo_url="https://api.github.com/user",
+ userinfo_field_map={
+ "id": "id",
+ "email": "email",
+ "username": "login",
+ },
+ userinfo_needs_email_fetch=True,
+ userinfo_email_url="https://api.github.com/user/emails",
+ client_id_env="GITHUB_OAUTH_CLIENT_ID",
+ client_secret_env="GITHUB_OAUTH_CLIENT_SECRET",
+)
+
+GDE_PROVIDER = OAuthProviderDefinition(
+ name="gde",
+ display_name="Gde",
+ icon="gde",
+ authorize_url=f"{os.getenv('GDE_URL')}/dspcas/oauth2.0/authorize",
+ authorize_param_map={"client_id": "client_id", "redirect_uri": "redirect_uri"},
+ token_url=f"{os.getenv('GDE_URL')}/dspcas/v2/oauth2.0/accessToken",
+ token_params_map={
+ "client_id": "client_id",
+ "client_secret": "secret",
+ "code": "code",
+ "grant_type": "grant_type",
+ "redirect_uri": "redirect_uri",
+ },
+ token_error_key="errorCode",
+ token_error_message_key="errorMessage",
+ userinfo_url=f"{os.getenv('GDE_URL')}/dspcas/oauth2.0/profile",
+ userinfo_params={"access_token": "{access_token}"},
+ userinfo_field_map={"id": "attributes.userId", "username": "id"},
+ client_id_env="GDE_OAUTH_CLIENT_ID",
+ client_secret_env="GDE_OAUTH_CLIENT_SECRET",
+)
+
+LINK_APP_PROVIDER = OAuthProviderDefinition(
+ name="link_app",
+ display_name="Link App",
+ icon="link_app",
+ authorize_url=f"{os.getenv('LINK_APP_URL')}/CNS/oauth2/authorize",
+ authorize_params={"response_type": "code", "scope": "read write"},
+ token_url=f"{os.getenv('LINK_APP_URL')}/CNS/oauth2/token",
+ token_params_map={
+ "client_id": "client_id",
+ "client_secret": "client_secret",
+ "code": "code",
+ "grant_type": "grant_type",
+ "redirect_uri": "redirect_uri",
+ },
+ token_error_key="error",
+ token_error_message_key="error_description",
+ userinfo_url=f"{os.getenv('LINK_APP_URL')}/CNS/getUserInfo",
+ userinfo_field_map={
+ "id": "data.id",
+ "email": "data.email",
+ "username": "data.username",
+ },
+ client_id_env="LINK_APP_OAUTH_CLIENT_ID",
+ client_secret_env="LINK_APP_OAUTH_CLIENT_SECRET",
+)
+
+WECHAT_PROVIDER = OAuthProviderDefinition(
+ name="wechat",
+ display_name="WeChat",
+ icon="wechat",
+ authorize_url="https://open.weixin.qq.com/connect/qrconnect",
+ authorize_params={"response_type": "code", "scope": "snsapi_login"},
+ authorize_fragment="#wechat_redirect",
+ authorize_param_map={
+ "client_id": "appid",
+ "redirect_uri": "redirect_uri",
+ "scope": "scope",
+ "state": "state",
+ },
+ encode_redirect_uri=True,
+ token_url="https://api.weixin.qq.com/sns/oauth2/access_token",
+ token_method="GET",
+ token_params_map={
+ "client_id": "appid",
+ "client_secret": "secret",
+ "code": "code",
+ "grant_type": "grant_type",
+ },
+ token_error_key="errcode",
+ token_error_message_key="errmsg",
+ token_response_id_key="openid",
+ userinfo_url="https://api.weixin.qq.com/sns/userinfo",
+ userinfo_auth_scheme="",
+ userinfo_params={"openid": "{openid}"},
+ userinfo_field_map={
+ "id": "openid",
+ "email": "",
+ "username": "nickname",
+ },
+ client_id_env="WECHAT_OAUTH_APP_ID",
+ client_secret_env="WECHAT_OAUTH_APP_SECRET",
+ enabled_check="ENABLE_WECHAT_OAUTH",
+)
+
+OAUTH_PROVIDER_REGISTRY: Dict[str, OAuthProviderDefinition] = {
+ "github": GITHUB_PROVIDER,
+ "wechat": WECHAT_PROVIDER,
+ "gde": GDE_PROVIDER,
+ "link_app": LINK_APP_PROVIDER,
+}
+
+
+def get_provider_definition(provider: str) -> OAuthProviderDefinition:
+ return OAUTH_PROVIDER_REGISTRY[provider]
+
+
+def is_provider_enabled(definition: OAuthProviderDefinition) -> bool:
+ if definition.enabled_check:
+ return os.getenv(definition.enabled_check, "false").lower() in (
+ "true",
+ "1",
+ "yes",
+ )
+
+ client_id = os.getenv(definition.client_id_env, "")
+ client_secret = os.getenv(definition.client_secret_env, "")
+ return bool(client_id and client_secret)
+
+
+def get_all_provider_definitions() -> Dict[str, OAuthProviderDefinition]:
+ return dict(OAUTH_PROVIDER_REGISTRY)
diff --git a/backend/consts/prompt_template.py b/backend/consts/prompt_template.py
new file mode 100644
index 000000000..febcaeca5
--- /dev/null
+++ b/backend/consts/prompt_template.py
@@ -0,0 +1,15 @@
+PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP = {
+ "duty_system_prompt": "DUTY_SYSTEM_PROMPT",
+ "constraint_system_prompt": "CONSTRAINT_SYSTEM_PROMPT",
+ "few_shots_system_prompt": "FEW_SHOTS_SYSTEM_PROMPT",
+ "agent_variable_name_system_prompt": "AGENT_VARIABLE_NAME_SYSTEM_PROMPT",
+ "agent_display_name_system_prompt": "AGENT_DISPLAY_NAME_SYSTEM_PROMPT",
+ "agent_description_system_prompt": "AGENT_DESCRIPTION_SYSTEM_PROMPT",
+ "user_prompt": "USER_PROMPT",
+ "agent_name_regenerate_system_prompt": "AGENT_NAME_REGENERATE_SYSTEM_PROMPT",
+ "agent_name_regenerate_user_prompt": "AGENT_NAME_REGENERATE_USER_PROMPT",
+ "agent_display_name_regenerate_system_prompt": "AGENT_DISPLAY_NAME_REGENERATE_SYSTEM_PROMPT",
+ "agent_display_name_regenerate_user_prompt": "AGENT_DISPLAY_NAME_REGENERATE_USER_PROMPT",
+}
+
+PROMPT_GENERATE_TEMPLATE_FIELDS = tuple(PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP.keys())
diff --git a/backend/consts/provider.py b/backend/consts/provider.py
index 38bbc4027..fe49332b7 100644
--- a/backend/consts/provider.py
+++ b/backend/consts/provider.py
@@ -17,6 +17,8 @@ class ProviderEnum(str, Enum):
# Dashcope
DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/"
DASHSCOPE_GET_URL = "https://dashscope.aliyuncs.com/api/v1/models"
+DASHSCOPE_REALTIME_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+DASHSCOPE_STT_BASE_URL = DASHSCOPE_REALTIME_BASE_URL
# TokenPony
TOKENPONY_BASE_URL = "https://api.tokenpony.cn/v1/"
diff --git a/backend/consts/scheduler.py b/backend/consts/scheduler.py
new file mode 100644
index 000000000..6820a9687
--- /dev/null
+++ b/backend/consts/scheduler.py
@@ -0,0 +1,28 @@
+"""
+Scheduler frequency constants
+Centralized definition for auto-summary frequency options
+"""
+from datetime import timedelta
+
+# Core frequency config: includes value, timedelta, and label; this is the single source of truth
+SUMMARY_FREQUENCY_CONFIG = [
+ {"value": "1h", "timedelta": timedelta(hours=1), "label": "1h"},
+ {"value": "3h", "timedelta": timedelta(hours=3), "label": "3h"},
+ {"value": "6h", "timedelta": timedelta(hours=6), "label": "6h"},
+ {"value": "1d", "timedelta": timedelta(days=1), "label": "1d"},
+ {"value": "1w", "timedelta": timedelta(weeks=1), "label": "1w"},
+]
+
+# Generate valid frequency list from config (for validation)
+VALID_SUMMARY_FREQUENCIES = [item["value"] for item in SUMMARY_FREQUENCY_CONFIG] + [None]
+
+# Generate frequency to timedelta mapping from config (direct value, no loop conversion needed)
+FREQUENCY_MAP = {item["value"]: item["timedelta"] for item in SUMMARY_FREQUENCY_CONFIG}
+
+# Generate API options from config (for frontend)
+SUMMARY_FREQUENCY_OPTIONS_FOR_API = [
+ {"value": "disabled", "label": "Disabled"},
+] + [{"value": item["value"], "label": item["value"]} for item in SUMMARY_FREQUENCY_CONFIG]
+
+# Scheduler check interval (seconds)
+SCHEDULER_CHECK_INTERVAL_SECONDS = 30 * 60
diff --git a/backend/data_process/ray_actors.py b/backend/data_process/ray_actors.py
index 2fa590bec..c3879c007 100644
--- a/backend/data_process/ray_actors.py
+++ b/backend/data_process/ray_actors.py
@@ -1,11 +1,20 @@
+from io import BytesIO
import logging
import json
+import time
from typing import Any, Dict, List, Optional
import ray
-from consts.const import RAY_ACTOR_NUM_CPUS, REDIS_BACKEND_URL, DEFAULT_EXPECTED_CHUNK_SIZE, DEFAULT_MAXIMUM_CHUNK_SIZE
-from database.attachment_db import get_file_stream
+from consts.const import (
+ RAY_ACTOR_NUM_CPUS,
+ REDIS_BACKEND_URL,
+ DEFAULT_EXPECTED_CHUNK_SIZE,
+ DEFAULT_MAXIMUM_CHUNK_SIZE,
+ TABLE_TRANSFORMER_MODEL_PATH,
+ UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH,
+)
+from database.attachment_db import build_s3_url, get_file_stream, upload_fileobj
from database.model_management_db import get_model_by_model_id
from nexent.data_process import DataProcessCore
@@ -27,6 +36,60 @@ def __init__(self):
f"Ray actor initialized using {RAY_ACTOR_NUM_CPUS} CPU cores...")
self._processor = DataProcessCore()
+ def ping(self) -> bool:
+ """Lightweight health check used by prewarm logic."""
+ return True
+
+ def _prepare_process_params(
+ self,
+ task_id: Optional[str],
+ model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+ ) -> Dict[str, Any]:
+ """
+ Normalize task/model-related processing params.
+ """
+ process_params = dict(params)
+ self._apply_model_paths(process_params)
+ if task_id:
+ process_params["task_id"] = task_id
+
+ # Reuse shared model param logic so we also keep extra fields
+ self._apply_model_chunk_sizes(
+ model_id=model_id,
+ tenant_id=tenant_id,
+ params=process_params,
+ )
+ return process_params
+
+ def _run_file_process(
+ self,
+ file_data: bytes,
+ filename: str,
+ chunking_strategy: str,
+ process_params: Dict[str, Any],
+ log_subject: str,
+ ) -> List[Dict[str, Any]]:
+ result = self._processor.file_process(
+ file_data=file_data,
+ filename=filename,
+ chunking_strategy=chunking_strategy,
+ **process_params
+ )
+
+ chunks, images_info = self._normalize_processor_result(result)
+ if images_info:
+ self._append_image_chunks(
+ source=filename, chunks=chunks, images_info=images_info)
+ chunks = self._validate_chunks(chunks, filename)
+ if not chunks:
+ return []
+
+ logger.info(
+ f"[RayActor] Processing done: produced {len(chunks)} chunks for {log_subject}='{filename}'")
+ return chunks
+
def process_file(
self,
source: str,
@@ -54,54 +117,143 @@ def process_file(
"""
logger.info(
f"[RayActor] Processing start: source='{source}', destination='{destination}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'")
-
- if task_id:
- params['task_id'] = task_id
-
- # Get chunk size parameters from embedding model if model_id is provided
- if model_id and tenant_id:
- try:
- # Get embedding model details directly by model_id
- model_record = get_model_by_model_id(
- model_id=model_id, tenant_id=tenant_id)
- if model_record:
- expected_chunk_size = model_record.get(
- 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE)
- maximum_chunk_size = model_record.get(
- 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE)
- model_name = model_record.get('display_name')
-
- # Pass chunk sizes to processing parameters
- params['max_characters'] = maximum_chunk_size
- params['new_after_n_chars'] = expected_chunk_size
-
- logger.info(
- f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): "
- f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}")
- else:
- logger.warning(
- f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes")
- except Exception as e:
- logger.warning(
- f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes")
+ process_params = self._prepare_process_params(
+ task_id=task_id,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ params=params,
+ )
try:
+ fetch_start = time.perf_counter()
file_stream = get_file_stream(source)
if file_stream is None:
raise FileNotFoundError(
f"Unable to fetch file from URL: {source}")
file_data = file_stream.read()
+ fetch_elapsed = time.perf_counter() - fetch_start
+ logger.info(
+ f"[RayActor] Fetch file bytes done: destination='{destination}', source='{source}', "
+ f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s")
except Exception as e:
logger.error(f"Failed to fetch file from {source}: {e}")
raise
- chunks = self._processor.file_process(
+ return self._run_file_process(
file_data=file_data,
filename=source,
chunking_strategy=chunking_strategy,
- **params
- )
+ process_params=process_params,
+ log_subject="source",
+ )
+
+ def _apply_model_paths(self, params: Dict[str, Any]) -> None:
+ params["table_transformer_model_path"] = TABLE_TRANSFORMER_MODEL_PATH
+ params[
+ "unstructured_default_model_initialize_params_json_path"
+ ] = UNSTRUCTURED_DEFAULT_MODEL_INITIALIZE_PARAMS_JSON_PATH
+
+ def _apply_model_chunk_sizes(
+ self,
+ model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+ ) -> None:
+ if not (model_id and tenant_id):
+ return
+
+ try:
+ model_record = get_model_by_model_id(
+ model_id=model_id, tenant_id=tenant_id)
+ if not model_record:
+ logger.warning(
+ f"[RayActor] Embedding model with ID {model_id} not found for tenant '{tenant_id}', using default chunk sizes")
+ return
+
+ expected_chunk_size = model_record.get(
+ 'expected_chunk_size', DEFAULT_EXPECTED_CHUNK_SIZE)
+ maximum_chunk_size = model_record.get(
+ 'maximum_chunk_size', DEFAULT_MAXIMUM_CHUNK_SIZE)
+ model_name = model_record.get('display_name')
+ model_type = model_record.get('model_type')
+
+ params['max_characters'] = maximum_chunk_size
+ params['new_after_n_chars'] = expected_chunk_size
+ if model_type:
+ params['model_type'] = model_type
+
+ logger.info(
+ f"[RayActor] Using chunk sizes from embedding model '{model_name}' (ID: {model_id}): "
+ f"max_characters={maximum_chunk_size}, new_after_n_chars={expected_chunk_size}")
+ except Exception as e:
+ logger.warning(
+ f"[RayActor] Failed to retrieve chunk sizes from embedding model ID {model_id}: {e}. Using default chunk sizes")
+
+ def _read_file_bytes(self, source: str) -> bytes:
+ try:
+ file_stream = get_file_stream(source)
+ if file_stream is None:
+ raise FileNotFoundError(
+ f"Unable to fetch file from URL: {source}")
+ return file_stream.read()
+ except Exception as e:
+ logger.error(f"Failed to fetch file from {source}: {e}")
+ raise
+
+ def _normalize_processor_result(
+ self, result: Any
+ ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+ if isinstance(result, tuple) and len(result) == 2:
+ chunks, images_info = result
+ return chunks or [], images_info or []
+ return result or [], []
+ def _append_image_chunks(
+ self,
+ source: str,
+ chunks: List[Dict[str, Any]],
+ images_info: List[Dict[str, Any]],
+ ) -> None:
+ folder = "images_in_attachments"
+ for index, image_data in enumerate(images_info):
+ if not isinstance(image_data, dict):
+ logger.warning(
+ f"[RayActor] Skipping image entry at index {index}: unexpected type {type(image_data)}"
+ )
+ continue
+ if "image_bytes" not in image_data:
+ logger.warning(
+ f"[RayActor] Skipping image entry at index {index}: missing image_bytes"
+ )
+ continue
+
+ img_obj = BytesIO(image_data["image_bytes"])
+ result = upload_fileobj(
+ file_obj=img_obj,
+ file_name=f"{index}.{image_data['image_format']}",
+ prefix=folder)
+ image_url = build_s3_url(result.get("object_name", ""))
+
+ image_data["source_file"] = source
+ image_data["image_url"] = image_url
+
+ chunks.append({
+ "content": json.dumps({
+ "source_file": source,
+ "position": image_data["position"],
+ "image_url": image_url,
+ }),
+ "filename": source,
+ "metadata": {
+ "chunk_index": len(chunks) + index,
+ "process_source": "UniversalImageExtractor",
+ "image_url": image_url,
+ }
+ })
+
+ def _validate_chunks(
+ self, chunks: Any, source: str
+ ) -> List[Dict[str, Any]]:
if chunks is None:
logger.warning(
f"[RayActor] file_process returned None for source='{source}'")
@@ -114,10 +266,97 @@ def process_file(
logger.warning(
f"[RayActor] file_process returned empty list for source='{source}'")
return []
+ return chunks
+
+ def process_bytes(
+ self,
+ file_bytes: bytes,
+ filename: str,
+ chunking_strategy: str,
+ task_id: Optional[str] = None,
+ model_id: Optional[int] = None,
+ tenant_id: Optional[str] = None,
+ **params
+ ) -> List[Dict[str, Any]]:
+ """
+ Process in-memory file bytes, auto-detecting its type using DataProcessCore.file_process.
+ """
+ logger.info(
+ f"[RayActor] Processing bytes: filename='{filename}', strategy='{chunking_strategy}', task_id='{task_id}', model_id='{model_id}'"
+ )
+ process_params = self._prepare_process_params(
+ task_id=task_id,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ params=params,
+ )
+
+ return self._run_file_process(
+ file_data=file_bytes,
+ filename=filename,
+ chunking_strategy=chunking_strategy,
+ process_params=process_params,
+ log_subject="filename",
+ )
+
+ def split_file(
+ self,
+ source: str,
+ destination: str,
+ task_id: Optional[str] = None,
+ max_size: int = 5 * 1024 * 1024,
+ file_data: Optional[bytes] = None,
+ **params
+ ) -> List[bytes]:
+ """
+ Split file into parts using DataProcessCore.file_split and return raw bytes list.
+ """
+ logger.info(
+ f"[RayActor] Splitting file: source='{source}', destination='{destination}', task_id='{task_id}', max_size={max_size}"
+ )
+
+ if file_data is None:
+ try:
+ fetch_start = time.perf_counter()
+ file_stream = get_file_stream(source)
+ if file_stream is None:
+ raise FileNotFoundError(
+ f"Unable to fetch file from URL: {source}")
+ file_data = file_stream.read()
+ fetch_elapsed = time.perf_counter() - fetch_start
+ logger.info(
+ f"[RayActor] Fetch file bytes for split done: destination='{destination}', source='{source}', "
+ f"bytes={len(file_data)}, elapsed={fetch_elapsed:.3f}s")
+ except Exception as e:
+ logger.error(f"Failed to fetch file from {source}: {e}")
+ raise
+
+ split_start = time.perf_counter()
+ parts = self._processor.file_split(
+ file_data=file_data,
+ filename=source,
+ max_size=max_size,
+ **params
+ )
+ split_elapsed = time.perf_counter() - split_start
+
+ if not parts:
+ logger.info(
+ f"[RayActor] Split done: destination='{destination}', source='{source}', "
+ f"parts=0, elapsed={split_elapsed:.3f}s")
+ return []
+
+ bytes_parts: List[bytes] = []
+ for part in parts:
+ try:
+ bytes_parts.append(part.getvalue())
+ except Exception:
+ continue
logger.info(
- f"[RayActor] Processing done: produced {len(chunks)} chunks for source='{source}'")
- return chunks
+ f"[RayActor] Split done: destination='{destination}', source='{source}', "
+ f"parts={len(bytes_parts)}, elapsed={split_elapsed:.3f}s")
+ return bytes_parts
def store_chunks_in_redis(self, redis_key: str, chunks: List[Dict[str, Any]]) -> bool:
"""
diff --git a/backend/data_process/tasks.py b/backend/data_process/tasks.py
index 50414b711..4dd6edd69 100644
--- a/backend/data_process/tasks.py
+++ b/backend/data_process/tasks.py
@@ -4,32 +4,185 @@
import asyncio
import json
import logging
+import math
import os
import threading
import time
-from typing import Any, Dict, Optional
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, List, Tuple
import aiohttp
+import requests
import re
import ray
-from celery import Task, chain, states
+from celery import Task, chain, states, group, chord
from celery.exceptions import Retry
+from celery.result import allow_join_result
-from consts.const import ELASTICSEARCH_SERVICE
from utils.file_management_utils import get_file_size
+from database.attachment_db import get_file_stream
+from database.knowledge_db import get_knowledge_record
from services.redis_service import get_redis_service
from .app import app
from .ray_actors import DataProcessorRayActor
from consts.const import (
+ ELASTICSEARCH_SERVICE,
REDIS_BACKEND_URL,
FORWARD_REDIS_RETRY_DELAY_S,
FORWARD_REDIS_RETRY_MAX,
+ DP_REDIS_CHUNKS_WAIT_TIMEOUT_S,
+ DP_REDIS_CHUNKS_POLL_INTERVAL_MS,
+ RAY_ACTOR_NUM_CPUS,
+ RAY_NUM_CPUS,
DISABLE_RAY_DASHBOARD,
ROOT_DIR,
+ PER_WAVE_TIMEOUT,
+ MAX_TIMEOUT,
+ RAY_GLOBAL_ACTOR_POOL_SIZE,
+ RAY_ACTOR_WARM_TIMEOUT_S,
+ RAY_GLOBAL_ACTOR_POOL_NAME,
+ RAY_GLOBAL_ACTOR_POOL_NAMESPACE
)
logger = logging.getLogger("data_process.tasks")
+ASYNC_SPLIT_RETRY_MAX = max(
+ FORWARD_REDIS_RETRY_MAX * 5, FORWARD_REDIS_RETRY_MAX)
+FORWARD_ES_CHUNK_BATCH_SIZE = 64
+IMAGE_METADATA_PROCESS_SOURCE = "UniversalImageExtractor"
+
+
+def _wait_for_split_ready(redis_key: str, timeout_s: int, poll_interval_ms: int) -> int:
+ """
+ Wait until async split aggregation is marked ready in Redis.
+ Returns aggregated chunk count.
+ Raises TimeoutError on timeout.
+ """
+ if not REDIS_BACKEND_URL:
+ raise RuntimeError("REDIS_BACKEND_URL not configured")
+
+ import redis
+
+ client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True)
+ ready_key = f"{redis_key}:ready"
+ deadline = time.time() + timeout_s
+
+ while time.time() < deadline:
+ if client.get(ready_key):
+ cached = client.get(redis_key)
+ if cached:
+ try:
+ chunks = json.loads(cached)
+ return len(chunks) if isinstance(chunks, list) else 0
+ except Exception:
+ return 0
+ return 0
+ time.sleep(max(0.01, poll_interval_ms / 1000.0))
+
+ raise TimeoutError(
+ f"Timed out waiting for async split aggregation at key '{ready_key}' after {timeout_s}s"
+ )
+
+
+def _estimate_parallel_parts() -> int:
+ try:
+ total_cpus = RAY_NUM_CPUS
+ except Exception:
+ total_cpus = os.cpu_count() or 1
+ actor_cpus = max(1, int(RAY_ACTOR_NUM_CPUS))
+ return max(1, total_cpus // actor_cpus)
+
+
+def _compute_split_wait_timeout(parts_count: int) -> int:
+ base_timeout = DP_REDIS_CHUNKS_WAIT_TIMEOUT_S
+ waves = math.ceil(max(1, parts_count) / _estimate_parallel_parts())
+ dynamic_timeout = base_timeout + \
+ max(0, waves - 1) * max(1, PER_WAVE_TIMEOUT)
+ return min(MAX_TIMEOUT, max(base_timeout, dynamic_timeout))
+
+
+def _count_image_metadata_chunks(chunks: Optional[List[Dict[str, Any]]]) -> int:
+ if not chunks:
+ return 0
+ return sum(
+ 1
+ for chunk in chunks
+ if isinstance(chunk, dict) and chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ )
+
+
+def _get_next_available_batch_index(
+ batches: List[List[Dict[str, Any]]],
+ start_idx: int,
+ batch_size: int,
+) -> int:
+ total_batches = len(batches)
+ idx = start_idx
+ for _ in range(total_batches):
+ if len(batches[idx]) < batch_size:
+ return idx
+ idx = (idx + 1) % total_batches
+ raise RuntimeError("No available batch capacity")
+
+
+def _distribute_chunks_round_robin(
+ batches: List[List[Dict[str, Any]]],
+ chunks: List[Dict[str, Any]],
+ batch_size: int,
+ error_context: str,
+) -> None:
+ idx = 0
+ for chunk in chunks:
+ try:
+ idx = _get_next_available_batch_index(batches, idx, batch_size)
+ except RuntimeError as exc:
+ raise RuntimeError(
+ f"No available batch capacity while distributing {error_context}"
+ ) from exc
+ batches[idx].append(chunk)
+ idx = (idx + 1) % len(batches)
+
+
+def _build_balanced_batches(
+ formatted_chunks: List[Dict[str, Any]],
+ batch_size: int = FORWARD_ES_CHUNK_BATCH_SIZE,
+) -> List[List[Dict[str, Any]]]:
+ """
+ Split chunks into max-size batches and spread image-metadata chunks evenly.
+ """
+ total = len(formatted_chunks)
+ if total == 0:
+ return []
+ if total <= batch_size:
+ return [formatted_chunks]
+
+ total_batches = math.ceil(total / batch_size)
+ image_chunks = [
+ chunk for chunk in formatted_chunks
+ if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ ]
+ text_chunks = [
+ chunk for chunk in formatted_chunks
+ if chunk.get("process_source") != IMAGE_METADATA_PROCESS_SOURCE
+ ]
+
+ batches: List[List[Dict[str, Any]]] = [[] for _ in range(total_batches)]
+
+ _distribute_chunks_round_robin(
+ batches=batches,
+ chunks=image_chunks,
+ batch_size=batch_size,
+ error_context="image metadata chunks",
+ )
+ _distribute_chunks_round_robin(
+ batches=batches,
+ chunks=text_chunks,
+ batch_size=batch_size,
+ error_context="text chunks",
+ )
+
+ return batches
+
# Thread lock for initializing Ray to prevent race conditions
ray_init_lock = threading.Lock()
@@ -179,21 +332,489 @@ def run_in_thread():
raise
-# Initialize the data processing core LAZILY
-# This will be initialized on first task run by a worker process
-def get_ray_actor() -> Any:
+def _delete_source_file_via_http_sync(
+ *,
+ base_url: str,
+ index_name: str,
+ path_or_url: str,
+ scope: str,
+ timeout_s: float = 30.0,
+) -> Dict[str, Any]:
+ base = (base_url or "").rstrip("/")
+ if not base:
+ raise RuntimeError("ELASTICSEARCH_SERVICE is not configured")
+ url = f"{base}/indices/{index_name}/documents"
+ params = {"path_or_url": path_or_url, "scope": scope}
+
+ resp = requests.delete(url, params=params, timeout=timeout_s)
+ body_text = getattr(resp, "text", "")
+ parsed = None
+ try:
+ parsed = resp.json()
+ except Exception:
+ parsed = _parse_json_or_none(body_text) if body_text else None
+
+ return {
+ "http_status": getattr(resp, "status_code", None),
+ "response_json": parsed if isinstance(parsed, dict) else None,
+ "response_text": body_text if not isinstance(parsed, dict) else None,
+ }
+
+
+def _build_forward_error(
+ message: str,
+ index_name: str,
+ source: Optional[str],
+ original_filename: Optional[str],
+) -> Exception:
+ return Exception(json.dumps({
+ "message": message,
+ "index_name": index_name,
+ "task_name": "forward",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+
+def _parse_json_or_none(text: str) -> Optional[Dict[str, Any]]:
+ try:
+ parsed = json.loads(text)
+ return parsed if isinstance(parsed, dict) else None
+ except Exception:
+ return None
+
+
+@dataclass(frozen=True)
+class _ForwardContext:
+ task_id: str
+ request_id: str
+ start_time: float
+ source: str
+ index_name: str
+ source_type: str
+ original_filename: Optional[str]
+
+
+def _init_forward_context(
+ *,
+ task_id: str,
+ request_id: str,
+ start_time: float,
+ source: str,
+ index_name: str,
+ source_type: str,
+ original_filename: Optional[str],
+) -> _ForwardContext:
+ return _ForwardContext(
+ task_id=task_id,
+ request_id=request_id,
+ start_time=start_time,
+ source=source,
+ index_name=index_name,
+ source_type=source_type,
+ original_filename=original_filename,
+ )
+
+
+def _is_forward_task_cancelled(ctx: _ForwardContext) -> bool:
+ try:
+ redis_service = get_redis_service()
+ return bool(redis_service.is_task_cancelled(ctx.task_id))
+ except Exception as exc:
+ logger.warning(
+ f"[{ctx.request_id}] FORWARD TASK: Failed to check cancellation flag for task {ctx.task_id}: "
+ f"{exc}"
+ )
+ return False
+
+
+def _build_forward_cancelled_result(ctx: _ForwardContext) -> Dict[str, Any]:
+ return {
+ 'task_id': ctx.task_id,
+ 'source': ctx.source,
+ 'index_name': ctx.index_name,
+ 'original_filename': ctx.original_filename,
+ 'chunks_stored': 0,
+ 'storage_time': 0,
+ 'es_result': {
+ "success": False,
+ "message": "Indexing cancelled because document was deleted.",
+ "total_indexed": 0,
+ "total_submitted": 0,
+ },
+ }
+
+
+def _load_forward_chunks(
+ self: Task,
+ *,
+ processed_data: Dict[str, Any],
+ original_source: str,
+ original_index_name: str,
+ filename: Optional[str],
+) -> Tuple[Optional[List[Dict[str, Any]]], bool, str, str, Optional[str]]:
+ chunks = processed_data.get('chunks')
+ split_async = bool(processed_data.get('split_async'))
+
+ # If chunks are not in payload, try loading from Redis via the redis_key
+ if (not chunks) and processed_data.get('redis_key'):
+ redis_key = processed_data.get('redis_key')
+ if not REDIS_BACKEND_URL:
+ raise Exception(json.dumps({
+ "message": "REDIS_BACKEND_URL not configured to retrieve chunks",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ try:
+ import redis
+ client = redis.Redis.from_url(
+ REDIS_BACKEND_URL, decode_responses=True)
+ ready_key = f"{redis_key}:ready"
+ if split_async:
+ ready_flag = client.get(ready_key)
+ if not ready_flag:
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Async split not ready for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=ASYNC_SPLIT_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Async split not ready; will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
+ cached = client.get(redis_key)
+ if cached:
+ try:
+ logger.debug(
+ f"[{self.request.id}] FORWARD TASK: Retrieved Redis key '{redis_key}', payload_length={len(cached)}")
+ chunks = json.loads(cached)
+ except json.JSONDecodeError as jde:
+ # Log raw prefix to help diagnose incorrect writes
+ raw_preview = cached[:120] if isinstance(
+ cached, str) else str(type(cached))
+ logger.error(
+ f"[{self.request.id}] FORWARD TASK: JSON decode error for key '{redis_key}': {str(jde)}; raw_prefix={raw_preview!r}")
+ raise
+ else:
+ if split_async:
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Async split ready but chunks missing for key {redis_key}. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=ASYNC_SPLIT_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Async split ready but chunks missing; will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
+ # No busy-wait: release the worker slot and retry later
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Chunks not yet available for key {redis_key}. Retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=FORWARD_REDIS_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Chunks not ready in Redis; will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
+ except Retry:
+ raise
+ except Exception as exc:
+ raise Exception(json.dumps({
+ "message": f"Failed to retrieve chunks from Redis: {str(exc)}",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+
+ if processed_data.get('source'):
+ original_source = processed_data.get('source')
+ if processed_data.get('index_name'):
+ original_index_name = processed_data.get('index_name')
+ if processed_data.get('original_filename'):
+ filename = processed_data.get('original_filename')
+
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Received data for source '{original_source}' with {len(chunks) if chunks else 'None'} chunks")
+
+ if chunks is None:
+ raise Exception(json.dumps({
+ "message": "No chunks received for forwarding",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ if len(chunks) == 0:
+ if split_async and processed_data.get('redis_key'):
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Empty chunks while waiting for async split. Retry {retry_num + 1}/{ASYNC_SPLIT_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=ASYNC_SPLIT_RETRY_MAX,
+ exc=Exception(json.dumps({
+ "message": "Chunks not ready in Redis (empty); will retry",
+ "index_name": original_index_name,
+ "task_name": "forward",
+ "source": original_source,
+ "original_filename": filename
+ }, ensure_ascii=False))
+ )
+ logger.warning(
+ f"[{self.request.id}] FORWARD TASK: Empty chunks list received for source {original_source}")
+
+ return chunks, split_async, original_source, original_index_name, filename
+
+
+def _extract_error_code_from_es_response(
+ parsed_body: Optional[Dict[str, Any]],
+ text: str,
+) -> Optional[str]:
+ error_code = None
+ if isinstance(parsed_body, dict):
+ error_code = parsed_body.get("error_code")
+ detail = parsed_body.get("detail")
+ if isinstance(detail, dict) and detail.get("error_code"):
+ error_code = detail.get("error_code")
+ elif isinstance(detail, str):
+ parsed_detail = _parse_json_or_none(detail)
+ if isinstance(parsed_detail, dict):
+ error_code = parsed_detail.get("error_code", error_code)
+
+ if error_code:
+ return error_code
+
+ try:
+ match = re.search(
+ r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text)
+ return match.group(1) if match else None
+ except Exception:
+ return None
+
+
+def _send_chunks_to_es(
+ chunks: List[Dict[str, Any]],
+ index_name: str,
+ authorization: str | None,
+ task_id: Optional[str] = None,
+ source: str = "",
+ original_filename: str = "",
+ large_mode: bool = False,
+) -> Dict[str, Any]:
+ async def _post():
+ elasticsearch_url = ELASTICSEARCH_SERVICE
+ if not elasticsearch_url:
+ raise _build_forward_error(
+ message="ELASTICSEARCH_SERVICE env is not set",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ route_url = f"/indices/{index_name}/documents"
+ full_url = elasticsearch_url + route_url
+ headers = {"Content-Type": "application/json"}
+ if authorization:
+ headers["Authorization"] = authorization
+ if task_id:
+ headers["X-Task-Id"] = task_id
+ try:
+ connector = aiohttp.TCPConnector(verify_ssl=False)
+ timeout = aiohttp.ClientTimeout(total=600)
+
+ request_params: Dict[str, str] = {}
+
+ if large_mode:
+ request_params["large_mode"] = "true"
+
+ async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
+ async with session.post(
+ full_url,
+ headers=headers,
+ json=chunks,
+ params=request_params,
+ raise_for_status=False
+ ) as response:
+ text = await response.text()
+ status = response.status
+ parsed_body = _parse_json_or_none(text)
+
+ if status >= 400:
+ error_code = _extract_error_code_from_es_response(
+ parsed_body, text)
+ if error_code:
+ raise Exception(json.dumps({
+ "error_code": error_code
+ }, ensure_ascii=False))
+
+ raise Exception(
+ f"ElasticSearch service returned HTTP {status}")
+
+ result = parsed_body if isinstance(parsed_body, dict) else await response.json()
+ return result
+
+ except aiohttp.ClientConnectorError as e:
+ logger.error(
+ f"[{task_id}] FORWARD TASK: Connection error to {full_url}: {str(e)}")
+ raise _build_forward_error(
+ message=f"Failed to connect to API: {str(e)}",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ except asyncio.TimeoutError as e:
+ logger.warning(
+ f"[{task_id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.")
+ raise _build_forward_error(
+ message=f"Timeout when indexing documents: {str(e)}",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ except Exception as e:
+ logger.error(
+ f"[{task_id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.")
+ raise _build_forward_error(
+ message=f"Unexpected error when indexing documents: {str(e)}",
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+
+ return run_async(_post())
+
+
+@ray.remote(num_cpus=0)
+class GlobalRayActorPoolManager:
"""
- Creates a new, anonymous DataProcessorRayActor instance for each call.
- This allows for parallel execution of data processing tasks, with each
- task running in its own actor.
+ Cluster-wide shared actor pool manager.
+ A single detached manager serves all Celery worker processes.
"""
+
+ def __init__(self, warm_timeout_s: float):
+ self.warm_timeout_s = warm_timeout_s
+ self.actors: List[Any] = []
+ self.rr_index = 0
+
+ def _create_and_warm_actor(self) -> Optional[Any]:
+ actor = DataProcessorRayActor.remote()
+ try:
+ ray.get(actor.ping.remote(), timeout=self.warm_timeout_s)
+ return actor
+ except Exception as exc:
+ try:
+ ray.kill(actor, no_restart=True)
+ except Exception:
+ pass
+ logger.warning(
+ f"[GlobalRayActorPoolManager] Warm actor failed in {self.warm_timeout_s:.1f}s: {exc}"
+ )
+ return None
+
+ def ensure_pool(self, desired: int, max_allowed: int) -> int:
+ desired = max(0, int(desired))
+ max_allowed = max(1, int(max_allowed))
+ desired = min(desired, max_allowed)
+ missing = max(0, desired - len(self.actors))
+ for _ in range(missing):
+ actor = self._create_and_warm_actor()
+ if actor is not None:
+ self.actors.append(actor)
+ return len(self.actors)
+
+ def get_actor(self) -> Any:
+ if not self.actors:
+ actor = self._create_and_warm_actor()
+ if actor is None:
+ raise RuntimeError(
+ "Global actor pool is empty and actor warm-up failed")
+ self.actors.append(actor)
+ idx = self.rr_index % len(self.actors)
+ self.rr_index += 1
+ return self.actors[idx]
+
+
+def _get_or_create_global_pool_manager() -> Any:
with ray_init_lock:
init_ray_in_worker()
- actor = DataProcessorRayActor.remote()
- logger.debug(
- "Successfully created a new DataProcessorRayActor for a task.")
- return actor
+ # Prefer atomic get/create when supported.
+ try:
+ return GlobalRayActorPoolManager.options(
+ name=RAY_GLOBAL_ACTOR_POOL_NAME,
+ namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE,
+ lifetime="detached",
+ get_if_exists=True,
+ ).remote(RAY_ACTOR_WARM_TIMEOUT_S)
+ except TypeError:
+ pass
+
+ try:
+ return ray.get_actor(
+ RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE)
+ except Exception:
+ pass
+
+ try:
+ return GlobalRayActorPoolManager.options(
+ name=RAY_GLOBAL_ACTOR_POOL_NAME,
+ namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE,
+ lifetime="detached",
+ ).remote(RAY_ACTOR_WARM_TIMEOUT_S)
+ except Exception:
+ # Name race: another worker may have created it in the meantime.
+ return ray.get_actor(
+ RAY_GLOBAL_ACTOR_POOL_NAME, namespace=RAY_GLOBAL_ACTOR_POOL_NAMESPACE)
+
+
+def prewarm_ray_actors(target_size: Optional[int] = None) -> int:
+ """
+ Ensure a global shared pool of warm Ray actors exists for low-latency task execution.
+ """
+ desired = RAY_GLOBAL_ACTOR_POOL_SIZE if target_size is None else max(
+ 0, int(target_size))
+ manager = _get_or_create_global_pool_manager()
+ current_after = ray.get(
+ manager.ensure_pool.remote(
+ desired=desired, max_allowed=_estimate_parallel_parts())
+ )
+ logger.info(
+ f"Global Ray actor pool ready: current={current_after}, desired={desired}"
+ )
+ return current_after
+
+
+def get_ray_actor() -> Any:
+ """
+ Return a warm actor from the global shared pool with round-robin selection.
+ """
+ manager = _get_or_create_global_pool_manager()
+ return ray.get(manager.get_actor.remote())
+
+
+def _get_split_actor() -> Any:
+ """
+ Reuse warm DataProcessorRayActor instances for split operations.
+ This keeps split path aligned with prewarmed actor pool.
+ """
+ return get_ray_actor()
class LoggingTask(Task):
@@ -221,6 +842,473 @@ def on_retry(self, exc, task_id, args, kwargs, einfo):
return super().on_retry(exc, task_id, args, kwargs, einfo)
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_part', queue='process_part_q')
+def process_part(
+ self,
+ part_bytes: bytes,
+ filename: str,
+ chunking_strategy: str,
+ part_redis_key: str,
+ source: Optional[str] = None,
+ source_type: Optional[str] = None,
+ model_id: Optional[int] = None,
+ tenant_id: Optional[str] = None,
+ **params
+) -> Dict[str, Any]:
+ """
+ Hidden sub-task to process a file part with Ray.
+ """
+ actor = get_ray_actor()
+ try:
+ chunks_ref = actor.process_bytes.remote(
+ part_bytes,
+ filename,
+ chunking_strategy,
+ task_id=None,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ **params
+ )
+ chunks = ray.get(chunks_ref) or []
+
+ if not REDIS_BACKEND_URL:
+ raise RuntimeError("REDIS_BACKEND_URL not configured")
+
+ import redis
+ client = redis.Redis.from_url(REDIS_BACKEND_URL, decode_responses=True)
+ client.set(part_redis_key, json.dumps(chunks, ensure_ascii=False))
+ client.expire(part_redis_key, 2 * 60 * 60)
+
+ return {
+ "part_redis_key": part_redis_key,
+ "chunks_count": len(chunks),
+ }
+ except Exception as e:
+ logger.error(
+ f"[process_part] Failed to process part for '{filename}': {str(e)}")
+ return {
+ "part_redis_key": part_redis_key,
+ "chunks_count": 0,
+ }
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_parts', queue='process_part_q')
+def aggregate_parts(
+ self,
+ parts_results: List[List[Dict[str, Any]]],
+ source: Optional[str] = None,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+ """
+ Hidden sub-task to aggregate part chunks.
+ """
+ merged: List[Dict[str, Any]] = []
+ for part_chunks in parts_results or []:
+ if part_chunks:
+ merged.extend(part_chunks)
+ return {
+ "chunks": merged,
+ "source": source,
+ "index_name": index_name,
+ "original_filename": original_filename
+ }
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_store_chunks', queue='process_part_q')
+def aggregate_store_chunks(
+ self,
+ parts_results: List[Dict[str, Any]],
+ redis_key: str,
+ source: Optional[str] = None,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+ """
+ Hidden sub-task to aggregate part chunks and store into Redis for forward task.
+ """
+ if not REDIS_BACKEND_URL:
+ raise Exception(json.dumps({
+ "message": "REDIS_BACKEND_URL not configured to store chunks",
+ "index_name": index_name,
+ "task_name": "process",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+ try:
+ import redis
+ client = redis.Redis.from_url(
+ REDIS_BACKEND_URL, decode_responses=True)
+
+ merged: List[Dict[str, Any]] = []
+ for part_result in parts_results or []:
+ part_key = (part_result or {}).get("part_redis_key")
+ if not part_key:
+ continue
+ cached = client.get(part_key)
+ if not cached:
+ continue
+ try:
+ part_chunks = json.loads(cached)
+ if isinstance(part_chunks, list):
+ merged.extend(part_chunks)
+ except Exception:
+ continue
+ # best-effort cleanup for part payload key
+ try:
+ client.delete(part_key)
+ except Exception:
+ pass
+
+ serialized = json.dumps(merged, ensure_ascii=False)
+ client.set(redis_key, serialized)
+ client.expire(redis_key, 2 * 60 * 60)
+ ready_key = f"{redis_key}:ready"
+ client.set(ready_key, "1")
+ client.expire(ready_key, 2 * 60 * 60)
+ logger.info(
+ f"[{self.request.id}] PROCESS TASK: Stored aggregated chunks in Redis at key '{redis_key}', count={len(merged)}")
+ except Exception as exc:
+ raise Exception(json.dumps({
+ "message": f"Failed to store chunks to Redis: {str(exc)}",
+ "index_name": index_name,
+ "task_name": "process",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+ return {
+ "chunks_count": len(merged),
+ "redis_key": redis_key,
+ "source": source,
+ "index_name": index_name,
+ "original_filename": original_filename
+ }
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.forward_part', queue='forward_q')
+def forward_part(
+ self,
+ chunks: List[Dict[str, Any]],
+ index_name: str,
+ authorization: Optional[str] = None,
+ parent_task_id: Optional[str] = None,
+ parent_total_chunks: Optional[int] = None,
+ source: Optional[str] = None,
+ original_filename: Optional[str] = None,
+ batch_index: Optional[int] = None,
+ total_batches: Optional[int] = None,
+ large_mode: Optional[bool] = False,
+) -> Dict[str, Any]:
+ """
+ Forward sub-task that indexes a chunk batch.
+ """
+ try:
+ # Respect cancellation from parent task if available
+ if parent_task_id:
+ try:
+ redis_service = get_redis_service()
+ if redis_service.is_task_cancelled(parent_task_id):
+ raise RuntimeError(
+ f"Parent task {parent_task_id} marked as cancelled")
+ except Exception:
+ pass
+
+ es_result = _send_chunks_to_es(
+ chunks=chunks,
+ index_name=index_name,
+ authorization=authorization,
+ task_id=None,
+ source=source,
+ original_filename=original_filename,
+ large_mode=large_mode,
+ )
+
+ if not isinstance(es_result, dict) or not es_result.get("success"):
+ error_message = es_result.get(
+ "message", "Unknown error from main_server") if isinstance(es_result, dict) else "Unknown error"
+ raise Exception(json.dumps({
+ "message": f"main_server API error: {error_message}",
+ "index_name": index_name,
+ "task_name": "forward_part",
+ "source": source,
+ "original_filename": original_filename
+ }, ensure_ascii=False))
+
+ # Update parent task progress per finished batch so frontend can show real-time indexing count.
+ if parent_task_id:
+ try:
+ processed_delta = int(es_result.get("total_indexed", 0) or 0)
+ redis_service = get_redis_service()
+ redis_service.increment_progress_info(
+ task_id=parent_task_id,
+ delta_processed=processed_delta,
+ total_chunks=parent_total_chunks,
+ )
+ except Exception as progress_exc:
+ logger.warning(
+ f"[{self.request.id}] FORWARD PART: Failed to update parent progress "
+ f"for task {parent_task_id}: {progress_exc}"
+ )
+
+ return {
+ "success": True,
+ "total_indexed": es_result.get("total_indexed", 0),
+ "total_submitted": es_result.get("total_submitted", len(chunks)),
+ "batch_index": batch_index,
+ "total_batches": total_batches,
+ }
+ except Exception as e:
+ retry_num = getattr(self.request, 'retries', 0)
+ logger.warning(
+ f"[{self.request.id}] FORWARD PART: Failed batch {batch_index}/{total_batches} "
+ f"(retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX}): {str(e)}"
+ )
+ raise self.retry(
+ countdown=FORWARD_REDIS_RETRY_DELAY_S,
+ max_retries=FORWARD_REDIS_RETRY_MAX,
+ exc=e
+ )
+
+
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.aggregate_forward_parts', queue='forward_q')
+def aggregate_forward_parts(
+ self,
+ parts_results: List[Dict[str, Any]],
+ source: Optional[str] = None,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+ """
+ Aggregate forward_part results.
+ """
+ total_indexed = 0
+ total_submitted = 0
+ for result in parts_results or []:
+ if not result:
+ continue
+ total_indexed += int(result.get("total_indexed", 0) or 0)
+ total_submitted += int(result.get("total_submitted", 0) or 0)
+
+ return {
+ "success": True,
+ "total_indexed": total_indexed,
+ "total_submitted": total_submitted,
+ "source": source,
+ "index_name": index_name,
+ "original_filename": original_filename
+ }
+
+
+def _split_file_for_processing(
+ request_id: str,
+ source: str,
+ source_type: str,
+ task_id: str,
+ params: Dict[str, Any],
+ file_data: Optional[bytes] = None,
+) -> List[bytes]:
+ max_size = 5 * 1024 * 1024
+ params.pop("max_size", None)
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Splitting file before processing (max_size={max_size})")
+
+ split_actor_get_start = time.perf_counter()
+ split_actor = _get_split_actor()
+ split_actor_get_elapsed = time.perf_counter() - split_actor_get_start
+ logger.info(
+ f"[{request_id}] PROCESS TASK: split actor ready in {split_actor_get_elapsed:.3f}s")
+
+ split_call_start = time.perf_counter()
+ split_kwargs = {
+ "source": source,
+ "destination": source_type,
+ "task_id": task_id,
+ "max_size": max_size,
+ **params,
+ }
+ if file_data is not None:
+ split_kwargs["file_data"] = file_data
+
+ parts_ref = split_actor.split_file.remote(**split_kwargs)
+ parts = ray.get(parts_ref)
+ split_call_elapsed = time.perf_counter() - split_call_start
+ logger.info(
+ f"[{request_id}] PROCESS TASK: split_file RPC done in {split_call_elapsed:.3f}s "
+ f"(source_type={source_type})")
+
+ if parts:
+ part_sizes = [len(p) for p in parts]
+ total_bytes = sum(part_sizes)
+ min_size = min(part_sizes)
+ max_part_size = max(part_sizes)
+ avg_size = total_bytes / len(part_sizes)
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Split stats: parts={len(part_sizes)}, "
+ f"total={total_bytes/1024/1024:.2f}MB, "
+ f"min={min_size/1024:.2f}KB, max={max_part_size/1024:.2f}KB, avg={avg_size/1024:.2f}KB")
+
+ return parts
+
+
+def _run_processing_for_parts(
+ request_id: str,
+ source: str,
+ source_type: str,
+ task_id: str,
+ chunking_strategy: str,
+ filename_for_processing: str,
+ parts: List[bytes],
+ index_name: Optional[str],
+ original_filename: Optional[str],
+ embedding_model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]:
+ if not parts:
+ logger.warning(
+ f"[{request_id}] PROCESS TASK: Split returned no parts; fallback to full-file processing")
+ process_actor = get_ray_actor()
+ chunks_ref = process_actor.process_file.remote(
+ source,
+ chunking_strategy,
+ destination=source_type,
+ task_id=task_id,
+ model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ **params
+ )
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...")
+ return False, ray.get(chunks_ref), None
+
+ if len(parts) == 1:
+ process_actor = get_ray_actor()
+ chunks_ref = process_actor.process_bytes.remote(
+ parts[0],
+ filename_for_processing,
+ chunking_strategy,
+ task_id=None,
+ model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ **params
+ )
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Waiting for Ray processing to complete...")
+ return False, ray.get(chunks_ref), None
+
+ redis_key = f"dp:{task_id}:chunks"
+ group_tasks = group(
+ process_part.s(
+ part_bytes=part,
+ filename=filename_for_processing,
+ chunking_strategy=chunking_strategy,
+ part_redis_key=f"dp:{task_id}:part:{idx}",
+ source=source,
+ source_type=source_type,
+ model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ **params
+ ) for idx, part in enumerate(parts)
+ )
+ callback = aggregate_store_chunks.s(
+ redis_key=redis_key,
+ source=source,
+ index_name=index_name,
+ original_filename=original_filename
+ ).set(queue='process_part_q')
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Dispatching {len(parts)} part tasks...")
+ chord(group_tasks)(callback)
+
+ split_wait_timeout = _compute_split_wait_timeout(len(parts))
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Waiting split aggregation, timeout={split_wait_timeout}s, "
+ f"parts={len(parts)}, est_parallel={_estimate_parallel_parts()}")
+ split_chunk_count = _wait_for_split_ready(
+ redis_key=redis_key,
+ timeout_s=split_wait_timeout,
+ poll_interval_ms=DP_REDIS_CHUNKS_POLL_INTERVAL_MS,
+ )
+ return True, None, split_chunk_count
+
+
+def _process_source_with_split(
+ request_id: str,
+ source: str,
+ source_type: str,
+ task_id: str,
+ chunking_strategy: str,
+ index_name: Optional[str],
+ original_filename: Optional[str],
+ embedding_model_id: Optional[int],
+ tenant_id: Optional[str],
+ params: Dict[str, Any],
+ file_data: Optional[bytes] = None,
+) -> Tuple[bool, Optional[List[Dict[str, Any]]], Optional[int]]:
+ parts = _split_file_for_processing(
+ request_id=request_id,
+ source=source,
+ source_type=source_type,
+ task_id=task_id,
+ params=params,
+ file_data=file_data,
+ )
+ filename_for_processing = original_filename or os.path.basename(source)
+ split_async, chunks, split_chunk_count = _run_processing_for_parts(
+ request_id=request_id,
+ source=source,
+ source_type=source_type,
+ task_id=task_id,
+ chunking_strategy=chunking_strategy,
+ filename_for_processing=filename_for_processing,
+ parts=parts,
+ index_name=index_name,
+ original_filename=original_filename,
+ embedding_model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ params=params,
+ )
+
+ if split_async:
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Async split finished with {split_chunk_count or 0} chunks")
+ else:
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
+
+ if not split_async:
+ redis_key = f"dp:{task_id}:chunks"
+ process_actor = get_ray_actor()
+ process_actor.store_chunks_in_redis.remote(redis_key, chunks)
+ logger.info(
+ f"[{request_id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'")
+
+ return split_async, chunks, split_chunk_count
+
+
+def _build_no_valid_chunks_error(
+ split_async: bool,
+ index_name: Optional[str],
+ source: str,
+ original_filename: Optional[str],
+) -> Exception:
+ message = (
+ "Async split completed but produced 0 chunks"
+ if split_async else
+ "Ray processing completed but produced 0 chunks"
+ )
+ return Exception(json.dumps({
+ "message": message,
+ "index_name": index_name,
+ "task_name": "process",
+ "source": source,
+ "original_filename": original_filename,
+ "error_code": "no_valid_chunks"
+ }, ensure_ascii=False))
+
+
@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process', queue='process_q')
def process(
self,
@@ -248,6 +1336,7 @@ def process(
"""
start_time = time.time()
task_id = self.request.id
+ # _warn_if_queue_mismatch("PROCESS TASK", "process_q", self.request)
logger.info(
f"[{self.request.id}] PROCESS TASK: source_type: {source_type}")
@@ -264,51 +1353,39 @@ def process(
'stage': 'extracting_text'
}
)
- # Get the data processor instance
- actor = get_ray_actor()
-
try:
# Process the file based on the source type
file_size_mb = 0
+ split_chunk_count = None
+ image_metadata_chunk_count = 0
+ elapsed_time = 0.0
+ chunks: Optional[List[Dict[str, Any]]] = None
+ split_async = False
+
if source_type == "local":
# Check file existence and size for optimization
if not os.path.exists(source):
raise FileNotFoundError(f"File does not exist: {source}")
file_size = os.path.getsize(source)
- file_size_mb = file_size / (1024 * 1024)
+ file_size_mb = file_size / (5 * 1024 * 1024)
logger.info(
f"[{self.request.id}] PROCESS TASK: File size: {file_size_mb:.2f}MB")
- # The unified actor call, mapping 'file' source_type to 'local' destination
- # Submit Ray work and WAIT for processing to complete
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for source='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}")
- chunks_ref = actor.process_file.remote(
- source,
- chunking_strategy,
- destination=source_type,
+ split_async, chunks, split_chunk_count = _process_source_with_split(
+ request_id=self.request.id,
+ source=source,
+ source_type=source_type,
task_id=task_id,
- model_id=embedding_model_id,
+ chunking_strategy=chunking_strategy,
+ index_name=index_name,
+ original_filename=original_filename,
+ embedding_model_id=embedding_model_id,
tenant_id=tenant_id,
- **params
+ params=params,
)
- # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...")
- chunks = ray.get(chunks_ref)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
-
- # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task)
- redis_key = f"dp:{task_id}:chunks"
- actor.store_chunks_in_redis.remote(redis_key, chunks)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'")
-
- end_time = time.time()
- elapsed_time = end_time - start_time
+ elapsed_time = time.time() - start_time
processing_speed = file_size_mb / \
elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0
logger.info(
@@ -318,33 +1395,32 @@ def process(
logger.info(
f"[{self.request.id}] PROCESS TASK: Processing from URL: {source}")
- # For URL source, core.py expects a non-local destination to trigger URL fetching
+ # Measure MinIO fetch time in process worker logs for observability
+ fetch_start = time.perf_counter()
+ file_stream = get_file_stream(source)
+ if file_stream is None:
+ raise FileNotFoundError(
+ f"Unable to fetch file from URL: {source}")
+ file_data = file_stream.read()
+ fetch_elapsed = time.perf_counter() - fetch_start
logger.info(
- f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for URL='{source}', strategy='{chunking_strategy}', destination='{source_type}', model_id={embedding_model_id}")
- chunks_ref = actor.process_file.remote(
- source,
- chunking_strategy,
- destination=source_type,
+ f"[{self.request.id}] PROCESS TASK: MinIO fetch done in {fetch_elapsed:.3f}s, "
+ f"bytes={len(file_data)}")
+
+ split_async, chunks, split_chunk_count = _process_source_with_split(
+ request_id=self.request.id,
+ source=source,
+ source_type=source_type,
task_id=task_id,
- model_id=embedding_model_id,
+ chunking_strategy=chunking_strategy,
+ index_name=index_name,
+ original_filename=original_filename,
+ embedding_model_id=embedding_model_id,
tenant_id=tenant_id,
- **params
+ params=params,
+ file_data=file_data,
)
- # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...")
- chunks = ray.get(chunks_ref)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
-
- # Persist chunks into Redis via Ray (synchronous to ensure data is ready before forward task)
- redis_key = f"dp:{task_id}:chunks"
- actor.store_chunks_in_redis.remote(redis_key, chunks)
- logger.info(
- f"[{self.request.id}] PROCESS TASK: Stored chunks in Redis at key '{redis_key}'")
-
- end_time = time.time()
- elapsed_time = end_time - start_time
+ elapsed_time = time.time() - start_time
logger.info(
f"[{self.request.id}] PROCESS TASK: URL processing completed in {elapsed_time:.2f}s")
@@ -353,23 +1429,52 @@ def process(
raise NotImplementedError(
f"Source type '{source_type}' not yet supported")
- chunk_count = len(chunks) if chunks else 0
- if chunk_count == 0:
- raise Exception(json.dumps({
- "message": "Ray processing completed but produced 0 chunks",
- "index_name": index_name,
- "task_name": "process",
- "source": source,
- "original_filename": original_filename,
- "error_code": "no_valid_chunks"
- }, ensure_ascii=False))
+ if split_async:
+ chunk_count = split_chunk_count or 0
+ if chunk_count == 0:
+ raise _build_no_valid_chunks_error(
+ split_async=True,
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ # For async split, chunks are persisted in Redis; count image-metadata chunks from cached payload.
+ try:
+ if REDIS_BACKEND_URL:
+ import redis
+ redis_key = f"dp:{task_id}:chunks"
+ client = redis.Redis.from_url(
+ REDIS_BACKEND_URL, decode_responses=True)
+ cached = client.get(redis_key)
+ if cached:
+ cached_chunks = json.loads(cached)
+ if isinstance(cached_chunks, list):
+ image_metadata_chunk_count = _count_image_metadata_chunks(
+ cached_chunks)
+ except Exception as image_count_exc:
+ logger.warning(
+ f"[{self.request.id}] PROCESS TASK: Failed counting image metadata chunks for async split: {image_count_exc}")
+ else:
+ chunk_count = len(chunks) if chunks else 0
+ if chunk_count == 0:
+ raise _build_no_valid_chunks_error(
+ split_async=False,
+ index_name=index_name,
+ source=source,
+ original_filename=original_filename,
+ )
+ image_metadata_chunk_count = _count_image_metadata_chunks(chunks)
+
+ logger.info(
+ f"[{self.request.id}] PROCESS TASK: Chunk composition: total={chunk_count}, "
+ f"image_metadata={image_metadata_chunk_count}, text={max(0, chunk_count - image_metadata_chunk_count)}")
# Update task state to SUCCESS after Ray processing completes
# This transitions from STARTED (PROCESSING) to SUCCESS (WAIT_FOR_FORWARDING)
self.update_state(
state=states.SUCCESS,
meta={
- 'chunks_count': len(chunks) if chunks else 0,
+ 'chunks_count': chunk_count,
'processing_time': elapsed_time,
'source': source,
'index_name': index_name,
@@ -391,7 +1496,9 @@ def process(
'source': source,
'index_name': index_name,
'original_filename': original_filename,
- 'task_id': task_id
+ 'task_id': task_id,
+ 'split_async': split_async,
+ 'image_metadata_chunk_count': image_metadata_chunk_count,
}
return returned_data
@@ -537,122 +1644,46 @@ def forward(
"""
start_time = time.time()
task_id = self.request.id
+ # _warn_if_queue_mismatch("FORWARD TASK", "forward_q", self.request)
original_source = source
original_index_name = index_name
filename = original_filename
try:
- # Before doing any heavy work, check whether this task has been
- # explicitly cancelled (for example, because the user deleted the
- # document from the knowledge base configuration page).
- try:
- redis_service = get_redis_service()
- if redis_service.is_task_cancelled(task_id):
- logger.info(
- f"[{self.request.id}] FORWARD TASK: Detected cancellation flag for task {task_id}; "
- f"skipping chunk forwarding for source '{source}' in index '{index_name}'."
- )
- # Treat this as a graceful early exit. We still return a
- # structured payload so callers can consider the task done.
- return {
- 'task_id': task_id,
- 'source': source,
- 'index_name': index_name,
- 'original_filename': original_filename,
- 'chunks_stored': 0,
- 'storage_time': 0,
- 'es_result': {
- "success": False,
- "message": "Indexing cancelled because document was deleted.",
- "total_indexed": 0,
- "total_submitted": 0,
- },
- }
- except Exception as cancel_check_exc:
- logger.warning(
- f"[{self.request.id}] FORWARD TASK: Failed to check cancellation flag for task {task_id}: "
- f"{cancel_check_exc}"
- )
+ ctx = _init_forward_context(
+ task_id=task_id,
+ request_id=str(self.request.id),
+ start_time=start_time,
+ source=source,
+ index_name=index_name,
+ source_type=source_type,
+ original_filename=original_filename,
+ )
- chunks = processed_data.get('chunks')
- # If chunks are not in payload, try loading from Redis via the redis_key
- if (not chunks) and processed_data.get('redis_key'):
- redis_key = processed_data.get('redis_key')
- if not REDIS_BACKEND_URL:
- raise Exception(json.dumps({
- "message": "REDIS_BACKEND_URL not configured to retrieve chunks",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": filename
- }, ensure_ascii=False))
- try:
- import redis
- client = redis.Redis.from_url(
- REDIS_BACKEND_URL, decode_responses=True)
- cached = client.get(redis_key)
- if cached:
- try:
- logger.debug(
- f"[{self.request.id}] FORWARD TASK: Retrieved Redis key '{redis_key}', payload_length={len(cached)}")
- chunks = json.loads(cached)
- except json.JSONDecodeError as jde:
- # Log raw prefix to help diagnose incorrect writes
- raw_preview = cached[:120] if isinstance(
- cached, str) else str(type(cached))
- logger.error(
- f"[{self.request.id}] FORWARD TASK: JSON decode error for key '{redis_key}': {str(jde)}; raw_prefix={raw_preview!r}")
- raise
- else:
- # No busy-wait: release the worker slot and retry later
- retry_num = getattr(self.request, 'retries', 0)
- logger.info(
- f"[{self.request.id}] FORWARD TASK: Chunks not yet available for key {redis_key}. Retry {retry_num + 1}/{FORWARD_REDIS_RETRY_MAX} in {FORWARD_REDIS_RETRY_DELAY_S}s")
- raise self.retry(
- countdown=FORWARD_REDIS_RETRY_DELAY_S,
- max_retries=FORWARD_REDIS_RETRY_MAX,
- exc=Exception(json.dumps({
- "message": "Chunks not ready in Redis; will retry",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": filename
- }, ensure_ascii=False))
- )
- except Retry:
- raise
- except Exception as exc:
- raise Exception(json.dumps({
- "message": f"Failed to retrieve chunks from Redis: {str(exc)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": filename
- }, ensure_ascii=False))
- if processed_data.get('source'):
- original_source = processed_data.get('source')
- if processed_data.get('index_name'):
- original_index_name = processed_data.get('index_name')
- if processed_data.get('original_filename'):
- filename = processed_data.get('original_filename')
- logger.info(
- f"[{self.request.id}] FORWARD TASK: Received data for source '{original_source}' with {len(chunks) if chunks else 'None'} chunks")
+ # Before doing any heavy work, check whether this task has been explicitly cancelled.
+ if _is_forward_task_cancelled(ctx):
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Detected cancellation flag for task {task_id}; "
+ f"skipping chunk forwarding for source '{source}' in index '{index_name}'."
+ )
+ return _build_forward_cancelled_result(ctx)
+
+ chunks, split_async, original_source, original_index_name, filename = _load_forward_chunks(
+ self,
+ processed_data=processed_data,
+ original_source=original_source,
+ original_index_name=original_index_name,
+ filename=filename,
+ )
# Calculate total chunks for progress tracking
total_chunks = len(chunks) if chunks else 0
-
- if chunks is None:
- raise Exception(json.dumps({
- "message": "No chunks received for forwarding",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- if len(chunks) == 0:
- logger.warning(
- f"[{self.request.id}] FORWARD TASK: Empty chunks list received for source {original_source}")
formatted_chunks = []
+ # Compute once per file to avoid repeated IO/MinIO calls inside loop
+ file_size = get_file_size(source_type, original_source) if isinstance(
+ original_source, str) else 0
+ filename_resolved = filename or (os.path.basename(original_source) if original_source and isinstance(
+ original_source, str) else "")
for i, chunk in enumerate(chunks):
# Extract text and metadata
content = chunk.get("content", "")
@@ -664,20 +1695,18 @@ def forward(
f"[{self.request.id}] FORWARD TASK: Chunk {i+1} has empty text content, skipping")
continue
- file_size = get_file_size(source_type, original_source) if isinstance(
- original_source, str) else 0
-
# Format as expected by the Elasticsearch API
formatted_chunk = {
"metadata": metadata,
- "filename": filename or (os.path.basename(original_source) if original_source and isinstance(original_source, str) else ""),
+ "filename": filename_resolved,
"path_or_url": original_source,
"content": content,
- "process_source": "Unstructured",
+ "process_source": chunk.get("process_source", "Unstructured"),
"source_type": source_type,
"file_size": file_size,
"create_time": metadata.get("creation_date"),
"date": metadata.get("date"),
+ "index": i,
}
formatted_chunks.append(formatted_chunk)
@@ -691,112 +1720,6 @@ def forward(
"error_code": "no_valid_chunks"
}, ensure_ascii=False))
- async def index_documents():
- elasticsearch_url = ELASTICSEARCH_SERVICE
- if not elasticsearch_url:
- raise Exception(json.dumps({
- "message": "ELASTICSEARCH_SERVICE env is not set",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- route_url = f"/indices/{original_index_name}/documents"
- full_url = elasticsearch_url + route_url
- headers = {"Content-Type": "application/json"}
- if authorization:
- headers["Authorization"] = authorization
- # Add task_id header for progress tracking
- headers["X-Task-Id"] = task_id
-
- try:
- connector = aiohttp.TCPConnector(verify_ssl=False)
- timeout = aiohttp.ClientTimeout(total=600)
-
- async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
- async with session.post(
- full_url,
- headers=headers,
- json=formatted_chunks,
- raise_for_status=False
- ) as response:
- text = await response.text()
- status = response.status
- # Try parse JSON body for structured error_code/message
- parsed_body = None
- try:
- parsed_body = json.loads(text)
- except Exception:
- parsed_body = None
-
- if status >= 400:
- error_code = None
- if isinstance(parsed_body, dict):
- error_code = parsed_body.get("error_code")
- detail = parsed_body.get("detail")
- if isinstance(detail, dict) and detail.get("error_code"):
- error_code = detail.get("error_code")
- elif isinstance(detail, str):
- try:
- parsed_detail = json.loads(detail)
- if isinstance(parsed_detail, dict):
- error_code = parsed_detail.get(
- "error_code", error_code)
- except Exception:
- pass
-
- if not error_code:
- try:
- match = re.search(
- r'["\']error_code["\']\s*:\s*["\']([^"\']+)["\']', text)
- if match:
- error_code = match.group(1)
- except Exception:
- pass
-
- if error_code:
- # Raise flat payload to avoid nested JSON and preserve error_code
- raise Exception(json.dumps({
- "error_code": error_code
- }, ensure_ascii=False))
-
- raise Exception(
- f"ElasticSearch service returned HTTP {status}")
-
- result = parsed_body if isinstance(parsed_body, dict) else await response.json()
- return result
-
- except aiohttp.ClientConnectorError as e:
- logger.error(
- f"[{self.request.id}] FORWARD TASK: Connection error to {full_url}: {str(e)}")
- raise Exception(json.dumps({
- "message": f"Failed to connect to API: {str(e)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- except asyncio.TimeoutError as e:
- logger.warning(
- f"[{self.request.id}] FORWARD TASK: Timeout when indexing documents: {str(e)}.")
- raise Exception(json.dumps({
- "message": f"Timeout when indexing documents: {str(e)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
- except Exception as e:
- logger.error(
- f"[{self.request.id}] FORWARD TASK: Unexpected error when indexing documents: {str(e)}.")
- raise Exception(json.dumps({
- "message": f"Unexpected error when indexing documents: {str(e)}",
- "index_name": original_index_name,
- "task_name": "forward",
- "source": original_source,
- "original_filename": original_filename
- }, ensure_ascii=False))
-
logger.info(
f"[{self.request.id}] FORWARD TASK: Starting ES indexing for {len(formatted_chunks)} chunks to index '{original_index_name}'...")
@@ -814,8 +1737,69 @@ async def index_documents():
'processed_chunks': 0 # Will be updated during vectorization via Redis
}
)
+ try:
+ redis_service = get_redis_service()
+ redis_service.save_progress_info(task_id, 0, total_chunks)
+ except Exception as progress_init_exc:
+ logger.warning(
+ f"[{self.request.id}] FORWARD TASK: Failed to initialize progress in Redis: "
+ f"{progress_init_exc}"
+ )
- es_result = run_async(index_documents())
+ if len(formatted_chunks) < FORWARD_ES_CHUNK_BATCH_SIZE:
+ es_result = _send_chunks_to_es(
+ chunks=formatted_chunks,
+ index_name=original_index_name,
+ authorization=authorization,
+ task_id=task_id,
+ source=original_source,
+ original_filename=original_filename,
+ large_mode=False,
+ )
+ else:
+ batches = _build_balanced_batches(
+ formatted_chunks=formatted_chunks,
+ batch_size=FORWARD_ES_CHUNK_BATCH_SIZE,
+ )
+ total_batches = len(batches)
+ image_chunks_total = sum(
+ 1 for chunk in formatted_chunks if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ )
+ image_distribution = [
+ sum(
+ 1
+ for chunk in batch
+ if chunk.get("process_source") == IMAGE_METADATA_PROCESS_SOURCE
+ )
+ for batch in batches
+ ]
+ logger.info(
+ f"[{self.request.id}] FORWARD TASK: Batch distribution ready: total_batches={total_batches}, "
+ f"batch_size={FORWARD_ES_CHUNK_BATCH_SIZE}, image_metadata_total={image_chunks_total}, "
+ f"image_per_batch={image_distribution}")
+ group_tasks = group(
+ forward_part.s(
+ chunks=batch,
+ index_name=original_index_name,
+ authorization=authorization,
+ parent_task_id=task_id,
+ parent_total_chunks=total_chunks,
+ source=original_source,
+ original_filename=original_filename,
+ batch_index=idx + 1,
+ total_batches=total_batches,
+ # If request was split into multiple groups, force all groups to use large path.
+ large_mode=True,
+ ).set(queue='forward_q') for idx, batch in enumerate(batches)
+ )
+ callback = aggregate_forward_parts.s(
+ source=original_source,
+ index_name=original_index_name,
+ original_filename=original_filename
+ ).set(queue='forward_q')
+ result = chord(group_tasks)(callback)
+ with allow_join_result():
+ es_result = result.get()
logger.debug(
f"[{self.request.id}] FORWARD TASK: API response from main_server for source '{original_source}': {es_result}")
@@ -884,6 +1868,7 @@ async def index_documents():
logger.info(
f"[{self.request.id}] FORWARD TASK: Successfully stored {len(chunks)} chunks to index {original_index_name} in {end_time - start_time:.2f}s")
+
return {
'task_id': task_id,
'source': original_source,
@@ -966,9 +1951,106 @@ async def index_documents():
raise
-@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_and_forward')
-def process_and_forward(
- self,
+@app.task(
+ bind=True,
+ base=LoggingTask,
+ name="data_process.tasks.cleanup_source",
+ queue="forward_q",
+)
+def cleanup_source(self, forward_result: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Conditionally delete the MinIO source file after successful indexing.
+
+ If the knowledge base is configured with preserve_source_file=false, call:
+ DELETE /indices/{index_name}/documents?path_or_url=...&scope=source_only
+ """
+ index_name = (forward_result or {}).get("index_name")
+ source = (forward_result or {}).get("source")
+
+ cleanup_info: Dict[str, Any] = {
+ "attempted": False,
+ "skipped_reason": None,
+ "success": None,
+ "http_status": None,
+ "response": None,
+ "error": None,
+ }
+
+ if not index_name or not source:
+ cleanup_info["skipped_reason"] = "missing_index_name_or_source"
+ forward_result = dict(forward_result or {})
+ forward_result["source_cleanup"] = cleanup_info
+ return forward_result
+
+ try:
+ record = get_knowledge_record({"index_name": index_name}) or {}
+ preserve_source_file = record.get("preserve_source_file", True)
+ except Exception as exc:
+ logger.warning(
+ "[%s] CLEANUP TASK: Failed to load knowledge config for index '%s': %s",
+ getattr(self.request, "id", "unknown"),
+ index_name,
+ exc,
+ )
+ cleanup_info["skipped_reason"] = "knowledge_record_lookup_failed"
+ forward_result = dict(forward_result or {})
+ forward_result["source_cleanup"] = cleanup_info
+ return forward_result
+
+ if preserve_source_file:
+ cleanup_info["skipped_reason"] = "preserve_source_file_true"
+ forward_result = dict(forward_result or {})
+ forward_result["source_cleanup"] = cleanup_info
+ return forward_result
+
+ cleanup_info["attempted"] = True
+ try:
+ resp = _delete_source_file_via_http_sync(
+ base_url=ELASTICSEARCH_SERVICE,
+ index_name=index_name,
+ path_or_url=source,
+ scope="source_only",
+ )
+ cleanup_info["http_status"] = resp.get("http_status")
+ cleanup_info["response"] = (
+ resp.get("response_json")
+ if resp.get("response_json") is not None
+ else resp.get("response_text")
+ )
+
+ ok = False
+ if isinstance(resp.get("response_json"), dict):
+ ok = bool(resp["response_json"].get("status") == "success")
+ elif resp.get("http_status") and 200 <= int(resp["http_status"]) < 300:
+ ok = True
+
+ cleanup_info["success"] = ok
+ if not ok:
+ logger.warning(
+ "[%s] CLEANUP TASK: Source-only delete did not succeed. index='%s' source='%s' http_status=%s",
+ getattr(self.request, "id", "unknown"),
+ index_name,
+ source,
+ cleanup_info["http_status"],
+ )
+ except Exception as exc:
+ cleanup_info["success"] = False
+ cleanup_info["error"] = str(exc)
+ logger.warning(
+ "[%s] CLEANUP TASK: Source-only delete failed. index='%s' source='%s' error=%s",
+ getattr(self.request, "id", "unknown"),
+ index_name,
+ source,
+ exc,
+ )
+
+ forward_result = dict(forward_result or {})
+ forward_result["source_cleanup"] = cleanup_info
+ return forward_result
+
+
+def submit_process_forward_chain(
+ *,
source: str,
source_type: str,
chunking_strategy: str,
@@ -976,30 +2058,14 @@ def process_and_forward(
original_filename: Optional[str] = None,
authorization: Optional[str] = None,
embedding_model_id: Optional[int] = None,
- tenant_id: Optional[str] = None
+ tenant_id: Optional[str] = None,
) -> str:
"""
- Combined task that chains processing and forwarding
-
- This task delegates to a chain of process -> forward
-
- Args:
- source: Source file path, URL, or text content
- source_type: source of the file("local", "minio")
- chunking_strategy: Strategy for chunking the document
- index_name: Name of the index to store documents
- original_filename: The original name of the file
- authorization: Authorization header for API calls
- embedding_model_id: Embedding model ID for chunk size configuration
- tenant_id: Tenant ID for retrieving model configuration
+ Build and enqueue a Celery chain: process -> forward.
Returns:
- Task ID of the chain
+ Celery chain task ID, or empty string if enqueue failed.
"""
- logger.info(
- f"Starting processing chain for {source}, original_filename={original_filename}, strategy={chunking_strategy}, index={index_name}, model_id={embedding_model_id}")
-
- # Create a task chain
task_chain = chain(
process.s(
source=source,
@@ -1016,20 +2082,66 @@ def process_and_forward(
source_type=source_type,
original_filename=original_filename,
authorization=authorization
- ).set(queue='forward_q')
+ ).set(queue='forward_q'),
+ cleanup_source.s().set(queue='forward_q'),
)
- # Execute the chain
result = task_chain.apply_async()
if result is None or not hasattr(result, 'id') or result.id is None:
logger.error(
"Celery chain apply_async() did not return a valid result or result.id")
return ""
- logger.info(f"Created task chain ID: {result.id}")
-
return result.id
+@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_and_forward')
+def process_and_forward(
+ self,
+ source: str,
+ source_type: str,
+ chunking_strategy: str,
+ index_name: Optional[str] = None,
+ original_filename: Optional[str] = None,
+ authorization: Optional[str] = None,
+ embedding_model_id: Optional[int] = None,
+ tenant_id: Optional[str] = None
+) -> str:
+ """
+ Combined task that chains processing and forwarding
+
+ This task delegates to a chain of process -> forward
+
+ Args:
+ source: Source file path, URL, or text content
+ source_type: source of the file("local", "minio")
+ chunking_strategy: Strategy for chunking the document
+ index_name: Name of the index to store documents
+ original_filename: The original name of the file
+ authorization: Authorization header for API calls
+ embedding_model_id: Embedding model ID for chunk size configuration
+ tenant_id: Tenant ID for retrieving model configuration
+
+ Returns:
+ Task ID of the chain
+ """
+ logger.info(
+ f"Starting processing chain for {source}, original_filename={original_filename}, strategy={chunking_strategy}, index={index_name}, model_id={embedding_model_id}")
+
+ chain_id = submit_process_forward_chain(
+ source=source,
+ source_type=source_type,
+ chunking_strategy=chunking_strategy,
+ index_name=index_name,
+ original_filename=original_filename,
+ authorization=authorization,
+ embedding_model_id=embedding_model_id,
+ tenant_id=tenant_id,
+ )
+ if chain_id:
+ logger.info(f"Created task chain ID: {chain_id}")
+ return chain_id
+
+
@app.task(bind=True, base=LoggingTask, name='data_process.tasks.process_sync')
def process_sync(
self,
diff --git a/backend/data_process/worker.py b/backend/data_process/worker.py
index a5f5f4a27..48323869b 100644
--- a/backend/data_process/worker.py
+++ b/backend/data_process/worker.py
@@ -1,4 +1,4 @@
-"""
+"""
Celery worker script for data processing tasks
This script is used to start Celery workers for processing data
@@ -21,6 +21,7 @@
import os
import sys
import time
+import threading
import traceback
import ray
@@ -44,6 +45,7 @@
REDIS_URL,
WORKER_CONCURRENCY,
WORKER_NAME,
+ RAY_GLOBAL_ACTOR_POOL_SIZE,
)
from .app import app
@@ -200,6 +202,60 @@ def worker_ready_handler(**kwargs):
# Register health check endpoints, start monitoring, etc.
logger.debug("🔍 Worker is ready to receive tasks")
+ # Prewarm Ray actors for process-related queues to reduce first-task latency.
+ # IMPORTANT: run asynchronously so worker queue registration is never blocked.
+ try:
+ queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()}
+ if "process_q" in queue_set or "process_part_q" in queue_set:
+ from data_process.tasks import prewarm_ray_actors
+
+ # Prewarm a cluster-global shared actor pool once at startup.
+ # Multiple workers may trigger this, but pool manager is idempotent.
+ target = RAY_GLOBAL_ACTOR_POOL_SIZE
+
+ def _prewarm_in_background():
+ try:
+ warmed = prewarm_ray_actors(target_size=target)
+ logger.info(
+ f"Prewarmed Ray actor pool in background, warmed_actors={warmed}, target={target}, queues={sorted(queue_set)}"
+ )
+ except Exception as exc:
+ logger.warning(f"Background prewarm failed: {exc}")
+
+ threading.Thread(target=_prewarm_in_background, daemon=True).start()
+ except Exception as exc:
+ logger.warning(f"Failed to schedule Ray actor prewarm on worker ready: {exc}")
+
+ # Periodic concurrency + Ray CPU availability log for process_part_q.
+ try:
+ queue_set = {q.strip() for q in QUEUES.split(",") if q.strip()}
+ if "process_part_q" in queue_set:
+ def _log_part_concurrency():
+ while True:
+ try:
+ inspector = app.control.inspect(timeout=1)
+ active = inspector.active() or {}
+ part_active = 0
+ for _, tasks in active.items():
+ for t in tasks or []:
+ if t.get("name") == "data_process.tasks.process_part":
+ part_active += 1
+ try:
+ ray_available = ray.available_resources() if ray.is_initialized() else {}
+ except Exception:
+ ray_available = {}
+ avail_cpu = ray_available.get("CPU", 0.0)
+ logger.info(
+ f"[process_part] active={part_active}, ray_available_cpu={avail_cpu}"
+ )
+ except Exception as exc:
+ logger.debug(f"Failed to collect process_part concurrency stats: {exc}")
+ time.sleep(5)
+
+ threading.Thread(target=_log_part_concurrency, daemon=True).start()
+ except Exception as exc:
+ logger.warning(f"Failed to start process_part concurrency logger: {exc}")
+
@worker_shutting_down.connect
def worker_shutdown_handler(**kwargs):
@@ -289,9 +345,9 @@ def validate_redis_connection() -> bool:
def start_worker():
"""Start Celery worker with appropriate settings"""
- # Get configuration parameters
+ # Read from runtime env first, so launcher-assigned values always win.
queues = QUEUES
- worker_name = WORKER_NAME or f'worker-{os.getpid()}'
+ worker_name = WORKER_NAME
concurrency = WORKER_CONCURRENCY
logger.info(f"Start Celery worker '{worker_name}' with queues: {queues}")
diff --git a/backend/data_process_service.py b/backend/data_process_service.py
index 0576e01fc..23d3497d9 100644
--- a/backend/data_process_service.py
+++ b/backend/data_process_service.py
@@ -206,13 +206,21 @@ def start_workers(self):
logger.debug(f"Process-worker concurrency set to: {process_worker_concurrency}")
logger.debug(f"Forward-worker concurrency set to: {forward_worker_concurrency}")
- # Define worker configurations based on new architecture
+ # Define worker configurations based on split architecture:
+ # - process-worker handles orchestration (process_q)
+ # - process-part-worker handles split sub-tasks (process_part_q)
+ # - forward-worker handles vectorization/storage (forward_q)
workers_config = [
{
'name': 'process-worker',
'queue': 'process_q',
'concurrency': process_worker_concurrency
},
+ {
+ 'name': 'process-part-worker',
+ 'queue': 'process_part_q',
+ 'concurrency': process_worker_concurrency
+ },
{
'name': 'forward-worker',
'queue': 'forward_q',
@@ -243,7 +251,7 @@ def start_workers(self):
logging.basicConfig(level=logging.INFO, format='[%(asctime)s: %(levelname)s/%(name)s] %(message)s')
logger = logging.getLogger("data_process.worker_launcher")
-os.environ["QUEUES"] = "{config['queue']}"
+os.environ["QUEUES"] = "{config['queue']}" # backward compatibility
os.environ["WORKER_NAME"] = "{config['name']}"
os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}"
@@ -254,6 +262,10 @@ def start_workers(self):
logger.debug(f"Celery app instance: {{celery_app}}")
logger.debug(f"Attempting to start worker for queue: {config['queue']}")
from data_process.worker import start_worker
+ # Re-apply launcher values after imports in case .env override changed them.
+ os.environ["QUEUES"] = "{config['queue']}"
+ os.environ["WORKER_NAME"] = "{config['name']}"
+ os.environ["WORKER_CONCURRENCY"] = "{config['concurrency']}"
start_worker()
except ImportError as e:
logger.error(f"Import error: {{e}}")
@@ -564,7 +576,11 @@ def start_all_services(self):
if success_count > 0:
self.log_service_info()
-
+
+ # Start auto-summary scheduler
+ from services.auto_summary_scheduler import auto_summary_scheduler
+ auto_summary_scheduler.start()
+
return success_count == enabled_count
def log_service_info(self):
@@ -700,7 +716,11 @@ def stop_all_services(self):
logger.error(f"Final attempt to kill Flower process failed: {final_e}")
finally:
service_processes['flower'] = None
-
+
+ # Stop auto-summary scheduler
+ from services.auto_summary_scheduler import auto_summary_scheduler
+ auto_summary_scheduler.stop()
+
# Stop Redis last
if service_processes['redis']:
try:
diff --git a/backend/database/a2a_agent_db.py b/backend/database/a2a_agent_db.py
index 9becdd67b..c1d998272 100644
--- a/backend/database/a2a_agent_db.py
+++ b/backend/database/a2a_agent_db.py
@@ -29,6 +29,22 @@ def _get_db_session():
# Default cache TTL in seconds (24 hours)
DEFAULT_CACHE_TTL_HOURS = 24
+
+def _extract_base_url(url: str) -> str:
+ """Extract base URL (scheme + host + port) from a full URL.
+
+ Args:
+ url: Full URL, e.g., http://example.com/path/to/agent.json
+
+ Returns:
+ Base URL, e.g., http://example.com
+ """
+ from urllib.parse import urlparse
+ parsed = urlparse(url)
+ if parsed.port:
+ return f"{parsed.scheme}://{parsed.hostname}:{parsed.port}"
+ return f"{parsed.scheme}://{parsed.hostname}"
+
# Standard human-readable protocol label
PROTOCOL_HTTP_JSON = "HTTP+JSON"
PROTOCOL_JSONRPC = "JSONRPC"
@@ -51,27 +67,19 @@ def _generate_endpoint_id(agent_id: int) -> str:
def _extract_primary_interface(supported_interfaces: List[Dict[str, Any]]) -> tuple[str, str]:
- """Extract the primary interface (HTTP+JSON) from supported interfaces.
+ """Extract the primary interface (first one) from supported interfaces.
Args:
supported_interfaces: List of interface objects with protocolBinding, url, protocolVersion.
Returns:
Tuple of (agent_url, protocol_version).
- Falls back to first interface if HTTP+JSON not found.
+ Returns empty string for url if no interfaces found.
"""
if not supported_interfaces:
return "", "1.0"
- # Prefer HTTP+JSON
- for iface in supported_interfaces:
- if iface.get("protocolBinding", "").upper() in (PROTOCOL_HTTP_JSON, PROTOCOL_JSONRPC, PROTOCOL_GRPC):
- return (
- iface.get("url", ""),
- iface.get("protocolVersion", "1.0")
- )
-
- # Fall back to first interface
+ # Return the first interface to ensure URL and protocol are from the same interface
first = supported_interfaces[0]
return (
first.get("url", ""),
@@ -148,6 +156,7 @@ def create_external_agent_from_url(
version: Optional[str] = None,
streaming: bool = False,
supported_interfaces: Optional[List[Dict[str, Any]]] = None,
+ base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Create or update an external A2A agent discovered from URL.
@@ -162,6 +171,7 @@ def create_external_agent_from_url(
version: Agent version from Agent Card.
streaming: Whether this agent supports SSE streaming.
supported_interfaces: All supported protocol interfaces.
+ base_url: Base URL for health checks (service root address).
Returns:
Created agent information dict.
@@ -170,6 +180,10 @@ def create_external_agent_from_url(
expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS)
protocol_type = _extract_protocol_type(supported_interfaces)
+ # Extract base_url from source_url if not provided
+ if not base_url and source_url:
+ base_url = _extract_base_url(source_url)
+
with _get_db_session() as session:
# Check if agent already exists by source_url
existing = session.query(A2AExternalAgent).filter(
@@ -191,6 +205,8 @@ def create_external_agent_from_url(
existing.cached_at = now
existing.cache_expires_at = expires_at
existing.updated_by = user_id
+ if base_url:
+ existing.base_url = base_url
agent = existing
else:
# Create new record
@@ -210,6 +226,7 @@ def create_external_agent_from_url(
raw_card=raw_card,
cached_at=now,
cache_expires_at=expires_at,
+ base_url=base_url,
delete_flag='N'
)
session.add(agent)
@@ -226,6 +243,7 @@ def create_external_agent_from_url(
"streaming": agent.streaming,
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
+ "base_url": agent.base_url,
"is_available": agent.is_available,
"cached_at": agent.cached_at.isoformat() if agent.cached_at else None,
"cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None,
@@ -244,6 +262,7 @@ def create_external_agent_from_nacos(
version: Optional[str] = None,
streaming: bool = False,
supported_interfaces: Optional[List[Dict[str, Any]]] = None,
+ base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Create or update an external A2A agent discovered from Nacos.
@@ -259,6 +278,7 @@ def create_external_agent_from_nacos(
version: Agent version from Agent Card.
streaming: Whether this agent supports SSE streaming.
supported_interfaces: All supported protocol interfaces.
+ base_url: Base URL for health checks (service root address).
Returns:
Created agent information dict.
@@ -267,6 +287,10 @@ def create_external_agent_from_nacos(
expires_at = now + timedelta(hours=DEFAULT_CACHE_TTL_HOURS)
protocol_type = _extract_protocol_type(supported_interfaces)
+ # Extract base_url from agent_url if not provided
+ if not base_url and agent_url:
+ base_url = _extract_base_url(agent_url)
+
with _get_db_session() as session:
# Check if agent already exists by nacos_config_id + nacos_agent_name
existing = session.query(A2AExternalAgent).filter(
@@ -288,6 +312,8 @@ def create_external_agent_from_nacos(
existing.cached_at = now
existing.cache_expires_at = expires_at
existing.updated_by = user_id
+ if base_url:
+ existing.base_url = base_url
agent = existing
else:
agent = A2AExternalAgent(
@@ -307,6 +333,7 @@ def create_external_agent_from_nacos(
raw_card=raw_card,
cached_at=now,
cache_expires_at=expires_at,
+ base_url=base_url,
delete_flag='N'
)
session.add(agent)
@@ -323,6 +350,7 @@ def create_external_agent_from_nacos(
"streaming": agent.streaming,
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
+ "base_url": agent.base_url,
"is_available": agent.is_available,
"cached_at": agent.cached_at.isoformat() if agent.cached_at else None,
"cache_expires_at": agent.cache_expires_at.isoformat() if agent.cache_expires_at else None,
@@ -360,6 +388,7 @@ def get_external_agent_by_id(external_agent_id: int, tenant_id: str) -> Optional
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
"source_url": agent.source_url,
+ "base_url": agent.base_url,
"nacos_config_id": agent.nacos_config_id,
"nacos_agent_name": agent.nacos_agent_name,
"raw_card": agent.raw_card,
@@ -416,6 +445,8 @@ def list_external_agents(
"protocol_type": agent.protocol_type,
"supported_interfaces": agent.supported_interfaces,
"source_type": agent.source_type,
+ "source_url": agent.source_url,
+ "base_url": agent.base_url,
"is_available": agent.is_available,
"last_check_result": agent.last_check_result,
"create_time": agent.create_time.isoformat() if agent.create_time else None,
@@ -1714,6 +1745,7 @@ def get_nacos_config_by_id(config_id: str, tenant_id: str) -> Optional[Dict[str,
"name": config.name,
"nacos_addr": config.nacos_addr,
"nacos_username": config.nacos_username,
+ "nacos_password": config.nacos_password,
"namespace_id": config.namespace_id,
"description": config.description,
"is_active": config.is_active,
@@ -1749,6 +1781,8 @@ def list_nacos_configs(tenant_id: str, is_active: Optional[bool] = None) -> List
"name": config.name,
"nacos_addr": config.nacos_addr,
"namespace_id": config.namespace_id,
+ "nacos_username": config.nacos_username,
+ "nacos_password": config.nacos_password,
"is_active": config.is_active,
"last_scan_at": config.last_scan_at.isoformat() if config.last_scan_at else None,
}
@@ -1804,6 +1838,75 @@ def delete_nacos_config(config_id: str, tenant_id: str) -> bool:
return True
+def update_nacos_config(
+ config_id: str,
+ tenant_id: str,
+ user_id: str,
+ name: Optional[str] = None,
+ nacos_addr: Optional[str] = None,
+ nacos_username: Optional[str] = None,
+ nacos_password: Optional[str] = None,
+ namespace_id: Optional[str] = None,
+ description: Optional[str] = None,
+ is_active: Optional[bool] = None
+) -> Optional[Dict[str, Any]]:
+ """Update a Nacos config.
+
+ Args:
+ config_id: The config ID.
+ tenant_id: Tenant ID.
+ user_id: User who is updating this config.
+ name: Optional new display name.
+ nacos_addr: Optional new Nacos server address.
+ nacos_username: Optional new Nacos username.
+ nacos_password: Optional new Nacos password.
+ namespace_id: Optional new Nacos namespace.
+ description: Optional new description.
+ is_active: Optional active status.
+
+ Returns:
+ Updated config information dict, or None if not found.
+ """
+ with _get_db_session() as session:
+ config = session.query(A2ANacosConfig).filter(
+ A2ANacosConfig.config_id == config_id,
+ A2ANacosConfig.tenant_id == tenant_id,
+ A2ANacosConfig.delete_flag != 'Y'
+ ).first()
+
+ if not config:
+ return None
+
+ if name is not None:
+ config.name = name
+ if nacos_addr is not None:
+ config.nacos_addr = nacos_addr
+ if nacos_username is not None:
+ config.nacos_username = nacos_username
+ if nacos_password is not None:
+ config.nacos_password = nacos_password
+ if namespace_id is not None:
+ config.namespace_id = namespace_id
+ if description is not None:
+ config.description = description
+ if is_active is not None:
+ config.is_active = is_active
+
+ config.updated_by = user_id
+ session.flush()
+
+ return {
+ "id": config.id,
+ "config_id": config.config_id,
+ "name": config.name,
+ "nacos_addr": config.nacos_addr,
+ "namespace_id": config.namespace_id,
+ "nacos_username": config.nacos_username,
+ "nacos_password": config.nacos_password,
+ "is_active": config.is_active,
+ }
+
+
# =============================================================================
# A2A Artifact Operations
# =============================================================================
diff --git a/backend/database/agent_db.py b/backend/database/agent_db.py
index 3ced7625b..533659b0f 100644
--- a/backend/database/agent_db.py
+++ b/backend/database/agent_db.py
@@ -1,9 +1,11 @@
import logging
-from typing import List
-from sqlalchemy import update
+from typing import List, Optional
+from sqlalchemy import or_, update
from database.client import get_db_session, as_dict, filter_property
from database.db_models import AgentInfo, ToolInstance, AgentRelation
+from database.agent_version_db import query_current_version_no
+from consts.const import ASSET_OWNER_TENANT_ID
from utils.str_utils import convert_list_to_string
logger = logging.getLogger("agent_db")
@@ -22,9 +24,12 @@ def search_agent_info_by_agent_id(agent_id: int, tenant_id: str, version_no: int
with get_db_session() as session:
agent = session.query(AgentInfo).filter(
AgentInfo.agent_id == agent_id,
- AgentInfo.tenant_id == tenant_id,
AgentInfo.version_no == version_no,
- AgentInfo.delete_flag != 'Y'
+ or_(
+ AgentInfo.tenant_id == tenant_id,
+ AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID,
+ ),
+ AgentInfo.delete_flag != 'Y',
).first()
if not agent:
@@ -98,6 +103,40 @@ def query_sub_agents_id_list(main_agent_id: int, tenant_id: str, version_no: int
return [relation.selected_agent_id for relation in relations]
+def query_sub_agent_relations(main_agent_id: int, tenant_id: str, version_no: int = 0) -> List[dict]:
+ """
+ Query sub-agent relations by main agent id, including pinned version info.
+ Default version_no=0 queries the draft version.
+
+ Args:
+ main_agent_id: Parent agent ID
+ tenant_id: Tenant ID
+ version_no: Version number to filter. Default 0 = draft/editing state
+ """
+ with get_db_session() as session:
+ query = session.query(AgentRelation).filter(
+ AgentRelation.parent_agent_id == main_agent_id,
+ AgentRelation.tenant_id == tenant_id,
+ AgentRelation.version_no == version_no,
+ AgentRelation.delete_flag != 'Y')
+ relations = query.all()
+ return [as_dict(relation) for relation in relations]
+
+
+def resolve_sub_agent_version_no(
+ selected_agent_id: int,
+ selected_agent_version_no: Optional[int],
+ tenant_id: str,
+) -> int:
+ """
+ Resolve the effective version number for a sub-agent relation.
+ Uses pinned version when set; otherwise falls back to child's current published version.
+ """
+ if selected_agent_version_no is not None:
+ return selected_agent_version_no
+ return query_current_version_no(agent_id=selected_agent_id, tenant_id=tenant_id) or 0
+
+
def clear_agent_new_mark(agent_id: int, tenant_id: str, user_id: str, version_no: int = 0):
"""
Clear the NEW mark for an agent.
@@ -158,7 +197,8 @@ def create_agent(agent_info, tenant_id: str, user_id: str):
:return: Created agent object
"""
info_with_metadata = dict(agent_info)
- info_with_metadata.setdefault("max_steps", 5)
+ info_with_metadata.setdefault("max_steps", 15)
+ info_with_metadata.setdefault("verification_config", None)
info_with_metadata.update({
"tenant_id": tenant_id,
"version_no": 0, # Default to draft version
@@ -192,8 +232,14 @@ def create_agent(agent_info, tenant_id: str, user_id: str):
"business_description": new_agent.business_description,
"business_logic_model_id": new_agent.business_logic_model_id,
"business_logic_model_name": new_agent.business_logic_model_name,
+ "prompt_template_id": new_agent.prompt_template_id,
+ "prompt_template_name": new_agent.prompt_template_name,
"group_ids": new_agent.group_ids,
"is_new": new_agent.is_new,
+ "enable_context_manager": new_agent.enable_context_manager,
+ "verification_config": new_agent.verification_config,
+ "greeting_message": new_agent.greeting_message,
+ "example_questions": new_agent.example_questions,
"current_version_no": new_agent.current_version_no,
"version_no": new_agent.version_no,
"created_by": new_agent.created_by,
diff --git a/backend/database/agent_repository_db.py b/backend/database/agent_repository_db.py
new file mode 100644
index 000000000..a6bb4f48b
--- /dev/null
+++ b/backend/database/agent_repository_db.py
@@ -0,0 +1,358 @@
+import logging
+import math
+from typing import Any, Dict, List, Optional
+
+from sqlalchemy import func, or_, update
+
+from database.client import as_dict, filter_property, get_db_session
+from database.db_models import AgentRepository
+
+logger = logging.getLogger("agent_repository_db")
+
+# Listing status: NOT_SHARED (未共享), PENDING_REVIEW (待审核),
+# REJECTED (审核驳回), SHARED (已共享)
+STATUS_NOT_SHARED = "NOT_SHARED"
+STATUS_PENDING_REVIEW = "PENDING_REVIEW"
+STATUS_REJECTED = "REJECTED"
+STATUS_SHARED = "SHARED"
+
+VALID_REPOSITORY_STATUSES = frozenset({
+ STATUS_NOT_SHARED,
+ STATUS_PENDING_REVIEW,
+ STATUS_REJECTED,
+ STATUS_SHARED,
+})
+
+_UPSERT_IMMUTABLE_FIELDS = frozenset({
+ "agent_id",
+ "agent_repository_id",
+ "publisher_tenant_id",
+})
+
+_UPSERT_SNAPSHOT_FIELDS = frozenset({
+ "source_version_no",
+ "name",
+ "display_name",
+ "description",
+ "author",
+ "category_id",
+ "tags",
+ "tool_count",
+ "version_label",
+ "agent_info_json",
+})
+
+
+def insert_agent_repository_record(
+ repository_data: Dict[str, Any],
+ publisher_tenant_id: str,
+ publisher_user_id: str,
+) -> int:
+ """Insert a new agent repository listing record."""
+ with get_db_session() as session:
+ payload = {
+ **repository_data,
+ "publisher_tenant_id": publisher_tenant_id,
+ "publisher_user_id": publisher_user_id,
+ "created_by": publisher_user_id,
+ "updated_by": publisher_user_id,
+ "delete_flag": "N",
+ }
+ if payload.get("status") is None:
+ payload["status"] = STATUS_NOT_SHARED
+
+ new_record = AgentRepository(
+ **filter_property(payload, AgentRepository)
+ )
+ session.add(new_record)
+ session.flush()
+ return int(new_record.agent_repository_id)
+
+
+def get_agent_repository_by_id(repository_id: int) -> Optional[dict]:
+ """Fetch a repository listing by primary key."""
+ with get_db_session() as session:
+ record = session.query(AgentRepository).filter(
+ AgentRepository.agent_repository_id == repository_id,
+ AgentRepository.delete_flag != "Y",
+ ).first()
+ return as_dict(record) if record else None
+
+
+def get_agent_repository_by_id_and_publisher(
+ repository_id: int,
+ publisher_tenant_id: str,
+) -> Optional[dict]:
+ """Fetch a repository listing scoped to the publisher tenant."""
+ with get_db_session() as session:
+ record = session.query(AgentRepository).filter(
+ AgentRepository.agent_repository_id == repository_id,
+ AgentRepository.publisher_tenant_id == publisher_tenant_id,
+ AgentRepository.delete_flag != "Y",
+ ).first()
+ return as_dict(record) if record else None
+
+
+def get_agent_repository_by_agent_id(agent_id: int) -> Optional[dict]:
+ """Fetch an active repository listing by root agent_id."""
+ with get_db_session() as session:
+ record = session.query(AgentRepository).filter(
+ AgentRepository.agent_id == agent_id,
+ AgentRepository.delete_flag != "Y",
+ ).first()
+ return as_dict(record) if record else None
+
+
+def upsert_agent_repository_record(
+ repository_data: Dict[str, Any],
+ publisher_tenant_id: str,
+ publisher_user_id: str,
+) -> tuple[int, bool]:
+ """Insert or update a repository listing keyed by agent_id.
+
+ When no record exists, inserts a new listing. When a record exists:
+ - Same source_version_no: updates status (and updated_by) only.
+ - Different source_version_no: updates all snapshot fields, preserving
+ agent_id, agent_repository_id, and publisher_tenant_id.
+
+ Returns:
+ Tuple of (agent_repository_id, is_updated). is_updated is False on insert.
+ """
+ agent_id = repository_data.get("agent_id")
+ if agent_id is None:
+ raise ValueError("agent_id is required for repository upsert")
+
+ existing = get_agent_repository_by_agent_id(int(agent_id))
+ if not existing:
+ repository_id = insert_agent_repository_record(
+ repository_data=repository_data,
+ publisher_tenant_id=publisher_tenant_id,
+ publisher_user_id=publisher_user_id,
+ )
+ return repository_id, False
+
+ existing_version = existing.get("source_version_no")
+ incoming_version = repository_data.get("source_version_no")
+ repository_id = int(existing["agent_repository_id"])
+
+ if existing_version == incoming_version:
+ update_fields: Dict[str, Any] = {
+ "status": repository_data.get("status", STATUS_NOT_SHARED),
+ "updated_by": publisher_user_id,
+ }
+ else:
+ update_fields = {
+ key: repository_data[key]
+ for key in _UPSERT_SNAPSHOT_FIELDS
+ if key in repository_data
+ }
+ update_fields["publisher_user_id"] = publisher_user_id
+ update_fields["updated_by"] = publisher_user_id
+ update_fields["status"] = repository_data.get("status", STATUS_NOT_SHARED)
+
+ with get_db_session() as session:
+ session.execute(
+ update(AgentRepository)
+ .where(
+ AgentRepository.agent_repository_id == repository_id,
+ AgentRepository.publisher_tenant_id == publisher_tenant_id,
+ AgentRepository.delete_flag != "Y",
+ )
+ .values(**update_fields)
+ )
+ return repository_id, True
+
+
+def list_agent_repository_summaries(
+ *,
+ status: Optional[str] = None,
+) -> List[dict]:
+ """List all active repository summaries without heavy JSON blobs."""
+ with get_db_session() as session:
+ query = session.query(
+ AgentRepository.agent_repository_id,
+ AgentRepository.author,
+ AgentRepository.name,
+ AgentRepository.display_name,
+ AgentRepository.description,
+ AgentRepository.status,
+ ).filter(
+ AgentRepository.delete_flag != "Y",
+ )
+ if status:
+ query = query.filter(AgentRepository.status == status)
+ rows = query.order_by(AgentRepository.agent_repository_id.desc()).all()
+ return [
+ {
+ "agent_repository_id": row.agent_repository_id,
+ "author": row.author,
+ "name": row.name,
+ "display_name": row.display_name,
+ "description": row.description,
+ "status": row.status,
+ }
+ for row in rows
+ ]
+
+
+def query_agent_repository_list(
+ *,
+ page: int = 1,
+ page_size: int = 20,
+ search: Optional[str] = None,
+ tag: Optional[str] = None,
+ category_id: Optional[int] = None,
+ status: Optional[str] = STATUS_SHARED,
+ publisher_tenant_id: Optional[str] = None,
+) -> Dict[str, Any]:
+ """Query repository listings with offset pagination."""
+ page = max(page, 1)
+ page_size = max(min(page_size, 100), 1)
+ offset = (page - 1) * page_size
+
+ with get_db_session() as session:
+ query = session.query(AgentRepository).filter(
+ AgentRepository.delete_flag != "Y",
+ )
+
+ if status:
+ query = query.filter(AgentRepository.status == status)
+ if publisher_tenant_id:
+ query = query.filter(
+ AgentRepository.publisher_tenant_id == publisher_tenant_id
+ )
+ if category_id is not None:
+ query = query.filter(AgentRepository.category_id == category_id)
+ if tag:
+ query = query.filter(AgentRepository.tags.any(tag))
+ if search:
+ keyword = f"%{search}%"
+ query = query.filter(
+ or_(
+ AgentRepository.name.ilike(keyword),
+ AgentRepository.display_name.ilike(keyword),
+ AgentRepository.description.ilike(keyword),
+ AgentRepository.author.ilike(keyword),
+ func.array_to_string(AgentRepository.tags, ",").ilike(keyword),
+ )
+ )
+
+ total = query.count()
+ rows = (
+ query.order_by(AgentRepository.agent_repository_id.desc())
+ .offset(offset)
+ .limit(page_size)
+ .all()
+ )
+
+ total_pages = math.ceil(total / page_size) if total else 0
+ return {
+ "items": [as_dict(row) for row in rows],
+ "pagination": {
+ "page": page,
+ "page_size": page_size,
+ "total": total,
+ "total_pages": total_pages,
+ },
+ }
+
+
+def update_agent_repository_by_id(
+ *,
+ repository_id: int,
+ publisher_tenant_id: str,
+ user_id: str,
+ updates: Dict[str, Any],
+) -> int:
+ """Update a repository listing owned by the publisher tenant. Returns affected row count."""
+ allowed_fields = {
+ "display_name",
+ "description",
+ "author",
+ "category_id",
+ "tags",
+ "tool_count",
+ "version_label",
+ "source_version_no",
+ "agent_info_json",
+ "status",
+ }
+ update_fields = {
+ key: value
+ for key, value in updates.items()
+ if key in allowed_fields
+ }
+ if not update_fields:
+ return 0
+
+ update_fields["updated_by"] = user_id
+
+ with get_db_session() as session:
+ result = session.execute(
+ update(AgentRepository)
+ .where(
+ AgentRepository.agent_repository_id == repository_id,
+ AgentRepository.publisher_tenant_id == publisher_tenant_id,
+ AgentRepository.delete_flag != "Y",
+ )
+ .values(**update_fields)
+ )
+ return int(result.rowcount or 0)
+
+
+def update_agent_repository_status_by_id(
+ *,
+ repository_id: int,
+ status: str,
+ user_id: str,
+) -> int:
+ """Update repository listing status by primary key. Returns affected row count."""
+ with get_db_session() as session:
+ result = session.execute(
+ update(AgentRepository)
+ .where(
+ AgentRepository.agent_repository_id == repository_id,
+ AgentRepository.delete_flag != "Y",
+ )
+ .values(status=status, updated_by=user_id)
+ )
+ return int(result.rowcount or 0)
+
+
+def soft_delete_agent_repository_by_id(
+ *,
+ repository_id: int,
+ publisher_tenant_id: str,
+ user_id: str,
+) -> int:
+ """Soft-delete a repository listing owned by the publisher tenant."""
+ with get_db_session() as session:
+ result = session.execute(
+ update(AgentRepository)
+ .where(
+ AgentRepository.agent_repository_id == repository_id,
+ AgentRepository.publisher_tenant_id == publisher_tenant_id,
+ AgentRepository.delete_flag != "Y",
+ )
+ .values(delete_flag="Y", updated_by=user_id)
+ )
+ return int(result.rowcount or 0)
+
+
+def list_agent_repository_by_publisher(
+ publisher_tenant_id: str,
+ *,
+ publisher_user_id: Optional[str] = None,
+) -> List[dict]:
+ """List all repository listings published by a tenant."""
+ with get_db_session() as session:
+ query = session.query(AgentRepository).filter(
+ AgentRepository.publisher_tenant_id == publisher_tenant_id,
+ AgentRepository.delete_flag != "Y",
+ )
+ if publisher_user_id:
+ query = query.filter(
+ AgentRepository.publisher_user_id == publisher_user_id
+ )
+ rows = query.order_by(AgentRepository.agent_repository_id.desc()).all()
+ return [as_dict(row) for row in rows]
diff --git a/backend/database/agent_version_db.py b/backend/database/agent_version_db.py
index 4df0158a8..c895cb249 100644
--- a/backend/database/agent_version_db.py
+++ b/backend/database/agent_version_db.py
@@ -1,9 +1,10 @@
import logging
from typing import List, Optional, Tuple
-from sqlalchemy import select, insert, update, func
+from sqlalchemy import or_, select, insert, update, delete, func
from database.client import get_db_session, as_dict
from database.db_models import AgentInfo, ToolInstance, AgentRelation, AgentVersion, SkillInstance
+from consts.const import ASSET_OWNER_TENANT_ID
logger = logging.getLogger("agent_version_db")
@@ -28,7 +29,6 @@ def search_version_by_version_no(
with get_db_session() as session:
version = session.query(AgentVersion).filter(
AgentVersion.agent_id == agent_id,
- AgentVersion.tenant_id == tenant_id,
AgentVersion.version_no == version_no,
AgentVersion.delete_flag == 'N',
).first()
@@ -77,7 +77,10 @@ def query_current_version_no(
with get_db_session() as session:
agent = session.query(AgentInfo).filter(
AgentInfo.agent_id == agent_id,
- AgentInfo.tenant_id == tenant_id,
+ or_(
+ AgentInfo.tenant_id == tenant_id,
+ AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID,
+ ),
AgentInfo.version_no == 0,
AgentInfo.delete_flag == 'N',
).first()
@@ -96,11 +99,17 @@ def query_agent_snapshot(
# Query agent info snapshot
agent = session.query(AgentInfo).filter(
AgentInfo.agent_id == agent_id,
- AgentInfo.tenant_id == tenant_id,
+ or_(
+ AgentInfo.tenant_id == tenant_id,
+ AgentInfo.tenant_id == ASSET_OWNER_TENANT_ID,
+ ),
AgentInfo.version_no == version_no,
AgentInfo.delete_flag == 'N',
).first()
+ if agent is not None:
+ tenant_id = agent.tenant_id
+
# Query tool instances snapshot
tools = session.query(ToolInstance).filter(
ToolInstance.agent_id == agent_id,
@@ -370,6 +379,96 @@ def delete_relation_snapshot(
return result.rowcount
+# ============== Restore Draft from Version Snapshot ==============
+# Used by rollback: copies a published version's data back into draft (version_no=0)
+
+def restore_agent_draft(
+ agent_id: int,
+ tenant_id: str,
+ target_version_no: int,
+ target_agent_snapshot: dict,
+ target_tool_snapshots: List[dict],
+ target_relation_snapshots: List[dict],
+ target_skill_snapshots: List[dict],
+) -> None:
+ """
+ Atomically restore the agent draft (version_no=0) from a published version snapshot.
+ This replaces all draft data with the target version's data.
+
+ Operations in a single transaction:
+ 1. Hard-delete current draft tools, relations, skills (version_no=0) to free up PK slots
+ 2. Update agent draft record with target version's agent data
+ 3. Bulk-insert tools copied from target version with version_no=0
+ 4. Bulk-insert relations copied from target version with version_no=0
+ 5. Bulk-insert skills copied from target version with version_no=0
+ 6. Update current_version_no to point to target_version_no
+ """
+
+ with get_db_session() as session:
+ # 1. Hard-delete current draft tools to free up (tool_instance_id, version_no=0) keys
+ session.execute(
+ delete(ToolInstance).where(
+ ToolInstance.agent_id == agent_id,
+ ToolInstance.tenant_id == tenant_id,
+ ToolInstance.version_no == 0,
+ )
+ )
+
+ # 2. Hard-delete current draft relations
+ session.execute(
+ delete(AgentRelation).where(
+ AgentRelation.parent_agent_id == agent_id,
+ AgentRelation.tenant_id == tenant_id,
+ AgentRelation.version_no == 0,
+ )
+ )
+
+ # 3. Hard-delete current draft skills
+ session.execute(
+ delete(SkillInstance).where(
+ SkillInstance.agent_id == agent_id,
+ SkillInstance.tenant_id == tenant_id,
+ SkillInstance.version_no == 0,
+ )
+ )
+
+ # 4. Update agent draft record with target version's data
+ draft_values = {k: v for k, v in target_agent_snapshot.items()
+ if k not in ('version_no', 'current_version_no')}
+ draft_values['current_version_no'] = target_version_no
+ session.execute(
+ update(AgentInfo)
+ .where(
+ AgentInfo.agent_id == agent_id,
+ AgentInfo.tenant_id == tenant_id,
+ AgentInfo.version_no == 0,
+ AgentInfo.delete_flag == 'N',
+ )
+ .values(**draft_values)
+ )
+
+ # 5. Bulk-insert tools from target version (with version_no=0)
+ for tool in target_tool_snapshots:
+ tool_copy = {k: v for k, v in tool.items()
+ if k not in ('version_no',)}
+ tool_copy['version_no'] = 0
+ session.execute(insert(ToolInstance).values(**tool_copy))
+
+ # 6. Bulk-insert relations from target version (with version_no=0)
+ for rel in target_relation_snapshots:
+ rel_copy = {k: v for k, v in rel.items()
+ if k not in ('version_no',)}
+ rel_copy['version_no'] = 0
+ session.execute(insert(AgentRelation).values(**rel_copy))
+
+ # 7. Bulk-insert skills from target version (with version_no=0)
+ for skill in target_skill_snapshots:
+ skill_copy = {k: v for k, v in skill.items()
+ if k not in ('version_no',)}
+ skill_copy['version_no'] = 0
+ session.execute(insert(SkillInstance).values(**skill_copy))
+
+
def delete_skill_snapshot(
agent_id: int,
tenant_id: str,
diff --git a/backend/database/attachment_db.py b/backend/database/attachment_db.py
index 1faabac23..06b84e5ac 100644
--- a/backend/database/attachment_db.py
+++ b/backend/database/attachment_db.py
@@ -2,9 +2,81 @@
import os
import uuid
from datetime import datetime
-from typing import Any, BinaryIO, Dict, List, Optional
+from typing import Any, BinaryIO, Dict, List, Optional, Tuple
from .client import minio_client
+from consts.const import S3_URL_PREFIX
+from consts.const import NORTHBOUND_EXTERNAL_URL
+from urllib.parse import quote
+
+
+def _normalize_object_and_bucket(object_name: str, bucket: Optional[str] = None) -> Tuple[str, Optional[str]]:
+ """
+ Normalize object_name + bucket from supported URL styles.
+
+ Supports:
+ - s3://bucket/key
+ - /bucket/key
+ - key (uses provided bucket or default bucket)
+ """
+ if not object_name:
+ return object_name, bucket
+
+ if object_name.startswith(S3_URL_PREFIX):
+ s3_path = object_name[len(S3_URL_PREFIX) :]
+ parts = s3_path.split("/", 1)
+ parsed_bucket = parts[0] if parts[0] else None
+ parsed_key = parts[1] if len(parts) > 1 else ""
+ return parsed_key, parsed_bucket or bucket
+
+ if object_name.startswith("/"):
+ path = object_name.lstrip("/")
+ parts = path.split("/", 1)
+ parsed_bucket = parts[0] if parts[0] else None
+ parsed_key = parts[1] if len(parts) > 1 else ""
+ return parsed_key, parsed_bucket or bucket
+
+ return object_name, bucket
+
+
+def build_s3_url(object_name: str, bucket: Optional[str] = None) -> str:
+ """
+ Build an s3://bucket/key style URL from an object name (or passthrough if already s3://).
+ """
+ if not object_name:
+ return ""
+
+ if object_name.startswith(S3_URL_PREFIX):
+ return object_name
+
+ if object_name.startswith("/"):
+ path = object_name.lstrip("/")
+ parts = path.split("/", 1)
+ if len(parts) == 2:
+ return f"{S3_URL_PREFIX}{parts[0]}/{parts[1]}"
+ return f"{S3_URL_PREFIX}{parts[0]}/"
+
+ resolved_bucket = bucket or minio_client.default_bucket
+ if resolved_bucket:
+ return f"{S3_URL_PREFIX}{resolved_bucket}/{object_name}"
+ return f"{S3_URL_PREFIX}{object_name}"
+
+
+def _build_mcp_presigned_url(presigned_url: str) -> str:
+ """
+ Build northbound API proxy URL for MCP tools.
+
+ Args:
+ presigned_url: Original MinIO presigned URL
+
+ Returns:
+ str: URL wrapped with northbound API proxy, with presigned_url URL-encoded
+ """
+ if not presigned_url:
+ return ""
+ # URL-encode the presigned_url before embedding it as a query parameter
+ encoded_presigned_url = quote(presigned_url, safe='')
+ return f"{NORTHBOUND_EXTERNAL_URL}/nb/v1/file/fetch?presigned_url={encoded_presigned_url}"
def generate_object_name(file_name: str, prefix: str = "attachments") -> str:
@@ -28,7 +100,13 @@ def generate_object_name(file_name: str, prefix: str = "attachments") -> str:
return f"{prefix}/{timestamp}_{unique_id}{ext}"
-def upload_file(file_path: str, object_name: Optional[str] = None, bucket: Optional[str] = None) -> Dict[str, Any]:
+def upload_file(
+ file_path: str,
+ object_name: Optional[str] = None,
+ bucket: Optional[str] = None,
+ generate_presigned_url: bool = True,
+ presigned_url_expires: int = 86400
+) -> Dict[str, Any]:
"""
Upload local file to MinIO
@@ -36,6 +114,8 @@ def upload_file(file_path: str, object_name: Optional[str] = None, bucket: Optio
file_path: Local file path
object_name: Object name, if not specified will be auto-generated
bucket: Bucket name, if not specified will use default bucket
+ generate_presigned_url: Whether to generate presigned URL for external access (default True)
+ presigned_url_expires: Expiration time in seconds for presigned URL (default 86400 = 24 hours)
Returns:
Dict[str, Any]: Upload result, containing success flag, URL and error message (if any)
@@ -55,6 +135,12 @@ def upload_file(file_path: str, object_name: Optional[str] = None, bucket: Optio
if success:
response["url"] = result
+ # Generate presigned URL for external access if requested
+ if generate_presigned_url:
+ presigned_result = get_file_url(object_name, bucket, presigned_url_expires)
+ if presigned_result.get("success"):
+ # Only expose MCP URL (with proxy prefix), not raw MinIO URL
+ response["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"])
else:
response["error"] = result
@@ -65,7 +151,10 @@ def upload_fileobj(
file_obj: BinaryIO,
file_name: str,
bucket: Optional[str] = None,
- prefix: str = "attachments"
+ prefix: str = "attachments",
+ generate_presigned_url: bool = True,
+ presigned_url_expires: int = 86400,
+ file_size: Optional[int] = None
) -> Dict[str, Any]:
"""
Upload file object to MinIO
@@ -75,6 +164,9 @@ def upload_fileobj(
file_name: File name
bucket: Bucket name, if not specified will use default bucket
prefix: Object name prefix, default is "attachments"
+ generate_presigned_url: Whether to generate presigned URL for external access (default True)
+ presigned_url_expires: Expiration time in seconds for presigned URL (default 86400 = 24 hours)
+ file_size: Pre-calculated file size in bytes. If not provided, will be calculated internally.
Returns:
Dict[str, Any]: Upload result, containing success flag, URL and error message (if any)
@@ -82,26 +174,39 @@ def upload_fileobj(
# Generate object name
object_name = generate_object_name(file_name, prefix=prefix)
- # Get current position
- current_pos = file_obj.tell()
-
- # Calculate file size
- file_obj.seek(0, os.SEEK_END)
- file_size = file_obj.tell()
-
- # Reset to original position
- file_obj.seek(current_pos)
+ # Calculate file size if not provided
+ if file_size is None:
+ try:
+ current_pos = file_obj.tell()
+ file_obj.seek(0, os.SEEK_END)
+ file_size = file_obj.tell()
+ file_obj.seek(0) # Seek to beginning for upload
+ except (ValueError, IOError):
+ file_size = 0
+ file_obj.seek(0) # Try to seek to beginning anyway
# Upload file
success, result = minio_client.upload_fileobj(
file_obj, object_name, bucket)
+ # Restore original position (if file is still open)
+ try:
+ file_obj.seek(0)
+ except (ValueError, IOError):
+ pass # File is closed, ignore
+
# Build response
response = {"success": success, "object_name": object_name, "file_name": file_name, "file_size": file_size,
"content_type": get_content_type(file_name), "upload_time": datetime.now().isoformat()}
if success:
response["url"] = result
+ # Generate presigned URL for external access if requested
+ if generate_presigned_url:
+ presigned_result = get_file_url(object_name, bucket, presigned_url_expires)
+ if presigned_result.get("success"):
+ # Only expose MCP URL (with proxy prefix), not raw MinIO URL
+ response["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"])
else:
response["error"] = result
@@ -134,14 +239,14 @@ def download_file(object_name: str, file_path: str, bucket: Optional[str] = None
return response
-def get_file_url(object_name: str, bucket: Optional[str] = None, expires: int = 3600) -> Dict[str, Any]:
+def get_file_url(object_name: str, bucket: Optional[str] = None, expires: int = 86400) -> Dict[str, Any]:
"""
Get presigned URL for file
Args:
object_name: Object name
bucket: Bucket name, if not specified will use default bucket
- expires: URL expiration time in seconds
+ expires: URL expiration time in seconds (default 86400 = 24 hours)
Returns:
Dict[str, Any]: Result containing success flag, URL and error message (if any)
@@ -165,6 +270,9 @@ def get_file_size_from_minio(object_name: str, bucket: Optional[str] = None) ->
"""
Get file size by object name
"""
+ object_name, bucket = _normalize_object_and_bucket(object_name, bucket)
+ # Ensure minio_client is initialized before accessing storage_config
+ minio_client._ensure_initialized()
bucket = bucket or minio_client.storage_config.default_bucket
return minio_client.get_file_size(object_name, bucket)
@@ -172,15 +280,16 @@ def get_file_size_from_minio(object_name: str, bucket: Optional[str] = None) ->
def file_exists(object_name: str, bucket: Optional[str] = None) -> bool:
"""
Check if a file exists in the bucket.
-
+
Args:
object_name: Object name in storage
bucket: Bucket name, if not specified will use default bucket
-
+
Returns:
bool: True if file exists, False otherwise
"""
try:
+ object_name, bucket = _normalize_object_and_bucket(object_name, bucket)
return minio_client.file_exists(object_name, bucket)
except Exception:
return False
@@ -189,15 +298,17 @@ def file_exists(object_name: str, bucket: Optional[str] = None) -> bool:
def copy_file(source_object: str, dest_object: str, bucket: Optional[str] = None) -> Dict[str, Any]:
"""
Copy a file within the same bucket (atomic operation in MinIO).
-
+
Args:
source_object: Source object name
dest_object: Destination object name
bucket: Bucket name, if not specified will use default bucket
-
+
Returns:
Dict[str, Any]: Result containing success flag and error message (if any)
"""
+ source_object, bucket = _normalize_object_and_bucket(source_object, bucket)
+ dest_object, bucket = _normalize_object_and_bucket(dest_object, bucket)
success, result = minio_client.copy_file(source_object, dest_object, bucket)
if success:
return {"success": True, "object_name": result}
@@ -223,8 +334,8 @@ def list_files(prefix: str = "", bucket: Optional[str] = None) -> List[Dict[str,
for file in files:
file["content_type"] = get_content_type(file["key"])
- # Get presigned URL (valid for 1 hour)
- success, url = minio_client.get_file_url(file["key"], bucket, 3600)
+ # Get presigned URL (valid for 24 hours)
+ success, url = minio_client.get_file_url(file["key"], bucket, 86400)
if success:
file["url"] = url
@@ -242,7 +353,9 @@ def delete_file(object_name: str, bucket: Optional[str] = None) -> Dict[str, Any
Returns:
Dict[str, Any]: Delete result, containing success flag and error message (if any)
"""
+ object_name, bucket = _normalize_object_and_bucket(object_name, bucket)
if not bucket:
+ minio_client._ensure_initialized()
bucket = minio_client.storage_config.default_bucket
success, result = minio_client.delete_file(object_name, bucket)
@@ -265,6 +378,7 @@ def get_file_stream(object_name: str, bucket: Optional[str] = None) -> Optional[
Returns:
Optional[BinaryIO]: Standard BinaryIO stream object, or None if failed
"""
+ object_name, bucket = _normalize_object_and_bucket(object_name, bucket)
success, result = minio_client.get_file_stream(object_name, bucket)
if not success:
return None
@@ -341,6 +455,7 @@ def get_content_type(file_path: str) -> str:
'.html': 'text/html',
'.htm': 'text/html',
'.json': 'application/json',
+ '.epub': 'application/epub',
'.xml': 'application/xml',
'.zip': 'application/zip',
'.rar': 'application/x-rar-compressed',
diff --git a/backend/database/cas_session_db.py b/backend/database/cas_session_db.py
new file mode 100644
index 000000000..57d1aa8ea
--- /dev/null
+++ b/backend/database/cas_session_db.py
@@ -0,0 +1,134 @@
+"""
+Database operations for CAS-backed web sessions.
+"""
+
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+from database.client import as_dict, get_db_session
+from database.db_models import UserCasSession
+
+CAS_SESSION_ACTIVE = "active"
+CAS_SESSION_REVOKED = "revoked"
+
+
+def create_cas_session(
+ *,
+ session_id: str,
+ user_id: str,
+ cas_user_id: str,
+ expires_at: datetime,
+ cas_session_index: Optional[str] = None,
+) -> Dict[str, Any]:
+ with get_db_session() as session:
+ record = UserCasSession(
+ session_id=session_id,
+ user_id=user_id,
+ cas_user_id=cas_user_id,
+ cas_session_index=cas_session_index,
+ status=CAS_SESSION_ACTIVE,
+ expires_at=expires_at,
+ created_by=user_id,
+ updated_by=user_id,
+ )
+ session.add(record)
+ session.flush()
+ return as_dict(record)
+
+
+def get_cas_session_by_session_id(session_id: str) -> Optional[Dict[str, Any]]:
+ if not session_id:
+ return None
+ with get_db_session() as session:
+ result = (
+ session.query(UserCasSession)
+ .filter(
+ UserCasSession.session_id == session_id,
+ UserCasSession.delete_flag == "N",
+ )
+ .first()
+ )
+ return as_dict(result) if result else None
+
+
+def is_cas_session_active(session_id: str) -> bool:
+ if not session_id:
+ return False
+ with get_db_session() as session:
+ result = (
+ session.query(UserCasSession)
+ .filter(
+ UserCasSession.session_id == session_id,
+ UserCasSession.status == CAS_SESSION_ACTIVE,
+ UserCasSession.expires_at > datetime.now(),
+ UserCasSession.delete_flag == "N",
+ )
+ .first()
+ )
+ return result is not None
+
+
+def revoke_cas_session_by_session_id(session_id: str, actor: str = "cas") -> int:
+ if not session_id:
+ return 0
+ with get_db_session() as session:
+ result = (
+ session.query(UserCasSession)
+ .filter(
+ UserCasSession.session_id == session_id,
+ UserCasSession.status == CAS_SESSION_ACTIVE,
+ UserCasSession.delete_flag == "N",
+ )
+ .update(
+ {
+ "status": CAS_SESSION_REVOKED,
+ "revoked_at": datetime.now(),
+ "updated_by": actor,
+ }
+ )
+ )
+ return result
+
+
+def revoke_cas_sessions_by_user_id(cas_user_id: str, actor: str = "cas") -> int:
+ if not cas_user_id:
+ return 0
+ with get_db_session() as session:
+ result = (
+ session.query(UserCasSession)
+ .filter(
+ UserCasSession.cas_user_id == cas_user_id,
+ UserCasSession.status == CAS_SESSION_ACTIVE,
+ UserCasSession.delete_flag == "N",
+ )
+ .update(
+ {
+ "status": CAS_SESSION_REVOKED,
+ "revoked_at": datetime.now(),
+ "updated_by": actor,
+ }
+ )
+ )
+ return result
+
+
+def revoke_cas_session_by_index(cas_session_index: str, actor: str = "cas") -> int:
+ if not cas_session_index:
+ return 0
+ with get_db_session() as session:
+ result = (
+ session.query(UserCasSession)
+ .filter(
+ UserCasSession.cas_session_index == cas_session_index,
+ UserCasSession.status == CAS_SESSION_ACTIVE,
+ UserCasSession.delete_flag == "N",
+ )
+ .update(
+ {
+ "status": CAS_SESSION_REVOKED,
+ "revoked_at": datetime.now(),
+ "updated_by": actor,
+ }
+ )
+ )
+ return result
diff --git a/backend/database/client.py b/backend/database/client.py
index 9b0b97a52..e095c5636 100644
--- a/backend/database/client.py
+++ b/backend/database/client.py
@@ -89,6 +89,9 @@ def __init__(self):
if MinioClient._initialized:
return
MinioClient._initialized = True
+ # Explicitly initialize attributes so external callers never hit missing-attribute errors.
+ self._storage_client = None
+ self.storage_config = None
def _ensure_initialized(self):
"""Lazily initialize the storage client on first use."""
@@ -108,6 +111,23 @@ def _ensure_initialized(self):
return True
return False
+ @property
+ def default_bucket(self) -> Optional[str]:
+ """
+ Resolve default bucket safely for callers that need bucket info.
+ Falls back to configured constant when lazy init has not run yet.
+ """
+ try:
+ self._ensure_initialized()
+ except Exception:
+ # Keep this accessor resilient; operational methods can still raise
+ # detailed storage errors when invoked.
+ pass
+
+ if getattr(self, "storage_config", None) is not None:
+ return self.storage_config.default_bucket
+ return MINIO_DEFAULT_BUCKET
+
def upload_file(
self,
file_path: str,
@@ -158,14 +178,14 @@ def download_file(self, object_name: str, file_path: str, bucket: Optional[str]
self._ensure_initialized()
return self._storage_client.download_file(object_name, file_path, bucket)
- def get_file_url(self, object_name: str, bucket: Optional[str] = None, expires: int = 3600) -> Tuple[bool, str]:
+ def get_file_url(self, object_name: str, bucket: Optional[str] = None, expires: int = 86400) -> Tuple[bool, str]:
"""
Get presigned URL for file
Args:
object_name: Object name
bucket: Bucket name, if not specified use default bucket
- expires: URL expiration time in seconds
+ expires: URL expiration time in seconds (default 86400 = 24 hours)
Returns:
Tuple[bool, str]: (Success status, Presigned URL or error message)
@@ -330,3 +350,51 @@ def filter_property(data, model_class):
"""
model_fields = model_class.__table__.columns.keys()
return {key: value for key, value in data.items() if key in model_fields}
+
+
+# ---------------------------------------------------------------------------
+# Monitoring-specific, isolated engine and session management
+# ---------------------------------------------------------------------------
+# Internal engine and session maker for monitoring data, isolated from main pool
+_monitoring_engine = None
+_monitoring_session_maker = None
+
+
+def _get_monitoring_engine():
+ global _monitoring_engine, _monitoring_session_maker
+ if _monitoring_engine is None:
+ _monitoring_engine = create_engine(
+ "postgresql://",
+ connect_args={
+ "host": POSTGRES_HOST,
+ "user": POSTGRES_USER,
+ "password": NEXENT_POSTGRES_PASSWORD,
+ "database": POSTGRES_DB,
+ "port": POSTGRES_PORT,
+ "client_encoding": "utf8",
+ },
+ echo=False,
+ pool_size=3,
+ pool_pre_ping=True,
+ pool_timeout=30,
+ )
+ _monitoring_session_maker = sessionmaker(bind=_monitoring_engine)
+ return _monitoring_engine
+
+
+@contextmanager
+def get_monitoring_db_session(db_session=None):
+ _get_monitoring_engine()
+ session = _monitoring_session_maker() if db_session is None else db_session
+ try:
+ yield session
+ if db_session is None:
+ session.commit()
+ except Exception as e:
+ if db_session is None:
+ session.rollback()
+ logger.error(f"Monitoring database operation failed: {str(e)}")
+ raise
+ finally:
+ if db_session is None:
+ session.close()
diff --git a/backend/database/community_mcp_db.py b/backend/database/community_mcp_db.py
new file mode 100644
index 000000000..92b78a4ed
--- /dev/null
+++ b/backend/database/community_mcp_db.py
@@ -0,0 +1,181 @@
+import logging
+from typing import Any, Dict, List
+
+from sqlalchemy import func, or_
+
+from database.client import as_dict, filter_property, get_db_session
+from database.db_models import McpCommunityRecord
+
+logger = logging.getLogger("community_mcp_db")
+
+
+def get_mcp_community_records(
+ *,
+ search: str | None = None,
+ tag: str | None = None,
+ transport_type: str | None = None,
+ cursor: str | None = None,
+ limit: int = 30,
+) -> Dict[str, Any]:
+ with get_db_session() as session:
+ query = session.query(McpCommunityRecord).filter(
+ McpCommunityRecord.delete_flag != "Y"
+ )
+
+ if transport_type:
+ query = query.filter(McpCommunityRecord.transport_type == transport_type)
+
+ if tag:
+ query = query.filter(McpCommunityRecord.tags.any(tag))
+
+ if search:
+ keyword = f"%{search}%"
+ query = query.filter(
+ or_(
+ McpCommunityRecord.mcp_name.ilike(keyword),
+ McpCommunityRecord.description.ilike(keyword),
+ func.array_to_string(McpCommunityRecord.tags, ",").ilike(keyword),
+ )
+ )
+
+ cursor_id: int | None = None
+ if cursor:
+ try:
+ cursor_id = int(cursor)
+ except ValueError:
+ cursor_id = None
+
+ if cursor_id is not None:
+ query = query.filter(McpCommunityRecord.community_id < cursor_id)
+
+ rows: List[McpCommunityRecord] = (
+ query.order_by(McpCommunityRecord.community_id.desc())
+ .limit(limit + 1)
+ .all()
+ )
+
+ has_next = len(rows) > limit
+ page_rows = rows[:limit]
+
+ next_cursor = None
+ if has_next and page_rows:
+ next_cursor = str(page_rows[-1].community_id)
+
+ return {
+ "count": len(page_rows),
+ "nextCursor": next_cursor,
+ "items": [as_dict(row) for row in page_rows],
+ }
+
+
+def get_mcp_community_tag_stats() -> List[Dict[str, Any]]:
+ with get_db_session() as session:
+ rows = (
+ session.query(
+ func.unnest(McpCommunityRecord.tags).label("tag"),
+ func.count(McpCommunityRecord.community_id).label("count"),
+ )
+ .filter(
+ McpCommunityRecord.delete_flag != "Y",
+ )
+ .group_by("tag")
+ .order_by(func.count(McpCommunityRecord.community_id).desc(), "tag")
+ .all()
+ )
+ return [{"tag": str(row.tag), "count": int(row.count)} for row in rows if row.tag]
+
+
+def create_mcp_community_record(mcp_data: Dict[str, Any], tenant_id: str, user_id: str) -> int:
+ with get_db_session() as session:
+ mcp_data.update({
+ "tenant_id": tenant_id,
+ "user_id": user_id,
+ "created_by": user_id,
+ "updated_by": user_id,
+ "delete_flag": "N",
+ "source": "community",
+ })
+ new_record = McpCommunityRecord(**filter_property(mcp_data, McpCommunityRecord))
+ session.add(new_record)
+ session.flush()
+ return int(new_record.community_id)
+
+
+def get_mcp_community_record_by_id_and_tenant(community_id: int, tenant_id: str) -> Dict[str, Any] | None:
+ with get_db_session() as session:
+ record = session.query(McpCommunityRecord).filter(
+ McpCommunityRecord.community_id == community_id,
+ McpCommunityRecord.tenant_id == tenant_id,
+ McpCommunityRecord.delete_flag != "Y",
+ ).first()
+ return as_dict(record) if record else None
+
+
+def update_mcp_community_record_by_id(
+ *,
+ community_id: int,
+ tenant_id: str,
+ user_id: str,
+ name: str | None = None,
+ description: str | None = None,
+ tags: List[str] | None = None,
+ version: str | None = None,
+ registry_json: Dict[str, Any] | None = None,
+ config_json: Dict[str, Any] | None = None,
+) -> None:
+ update_fields: Dict[str, Any] = {"updated_by": user_id}
+
+ if name is not None:
+ update_fields["mcp_name"] = name
+ if description is not None:
+ update_fields["description"] = description
+ if tags is not None:
+ update_fields["tags"] = tags
+ if version is not None:
+ update_fields["version"] = version
+ if registry_json is not None:
+ update_fields["registry_json"] = registry_json
+ if config_json is not None:
+ update_fields["config_json"] = config_json
+
+ with get_db_session() as session:
+ session.query(McpCommunityRecord).filter(
+ McpCommunityRecord.community_id == community_id,
+ McpCommunityRecord.tenant_id == tenant_id,
+ McpCommunityRecord.delete_flag != "Y",
+ ).update(update_fields)
+
+
+def delete_mcp_community_record_by_id(*, community_id: int, tenant_id: str, user_id: str) -> None:
+ with get_db_session() as session:
+ session.query(McpCommunityRecord).filter(
+ McpCommunityRecord.community_id == community_id,
+ McpCommunityRecord.tenant_id == tenant_id,
+ McpCommunityRecord.delete_flag != "Y",
+ ).update({"delete_flag": "Y", "updated_by": user_id})
+
+
+def list_mcp_community_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]:
+ with get_db_session() as session:
+ rows = session.query(McpCommunityRecord).filter(
+ McpCommunityRecord.tenant_id == tenant_id,
+ McpCommunityRecord.delete_flag != "Y",
+ ).order_by(McpCommunityRecord.community_id.desc()).all()
+ return [as_dict(row) for row in rows]
+
+def get_mcp_community_tag_stats_by_tenant(tenant_id: str) -> List[Dict[str, Any]]:
+ with get_db_session() as session:
+ rows = (
+ session.query(
+ func.unnest(McpCommunityRecord.tags).label("tag"),
+ func.count(McpCommunityRecord.community_id).label("count"),
+ )
+ .filter(
+ McpCommunityRecord.tenant_id == tenant_id,
+ McpCommunityRecord.delete_flag != "Y",
+ )
+ .group_by("tag")
+ .order_by(func.count(McpCommunityRecord.community_id).desc(), "tag")
+ .all()
+ )
+ return [{"tag": str(row.tag), "count": int(row.count)} for row in rows if row.tag]
diff --git a/backend/database/conversation_db.py b/backend/database/conversation_db.py
index 18c0ee9fc..e401beda9 100644
--- a/backend/database/conversation_db.py
+++ b/backend/database/conversation_db.py
@@ -623,9 +623,18 @@ def get_conversation_history(conversation_id: int, user_id: Optional[str] = None
}
+def _image_exists(session, message_id: int, image_url: str) -> bool:
+ stmt = select(ConversationSourceImage).where(
+ ConversationSourceImage.message_id == message_id,
+ ConversationSourceImage.image_url == image_url,
+ ConversationSourceImage.delete_flag == 'N'
+ ).limit(1)
+ return session.execute(stmt).scalar_one_or_none() is not None
+
+
def create_source_image(image_data: Dict[str, Any], user_id: Optional[str] = None) -> int:
"""
- Create image source reference
+ Create image source reference (skips if the same message_id + image_url already exists).
Args:
image_data: Dictionary containing image data, must include the following fields:
@@ -634,17 +643,22 @@ def create_source_image(image_data: Dict[str, Any], user_id: Optional[str] = Non
user_id: Reserved parameter for created_by and updated_by fields
Returns:
- int: Newly created image ID (auto-increment ID)
+ int: Newly created image ID (auto-increment ID), or -1 if skipped due to duplicate
"""
with get_db_session() as session:
# Ensure message_id is of integer type
message_id = int(image_data['message_id'])
+ image_url = image_data['image_url']
+
+ # Skip duplicate: same message_id + image_url already in DB
+ if _image_exists(session, message_id, image_url):
+ return -1
# Prepare data dictionary
data = {
"message_id": message_id,
"conversation_id": image_data.get('conversation_id'),
- "image_url": image_data['image_url'],
+ "image_url": image_url,
"delete_flag": 'N',
# Use the database's CURRENT_TIMESTAMP function
"create_time": func.current_timestamp()
@@ -1016,3 +1030,71 @@ def get_message_id_by_index(conversation_id: int, message_index: int) -> Optiona
result = session.execute(stmt).scalar()
return result
+
+
+def get_latest_assistant_message_id(conversation_id: int, user_id: Optional[str] = None) -> Optional[int]:
+ """
+ Get the most recent assistant message ID for a conversation.
+
+ Args:
+ conversation_id: Conversation ID (integer)
+ user_id: Optional user ID for ownership check
+
+ Returns:
+ Optional[int]: The latest assistant message ID, or None if not found
+ """
+ with get_db_session() as session:
+ conversation_id = int(conversation_id)
+
+ stmt = select(ConversationMessage.message_id).where(
+ ConversationMessage.conversation_id == conversation_id,
+ ConversationMessage.delete_flag == 'N',
+ ConversationMessage.message_role == 'assistant'
+ ).order_by(desc(ConversationMessage.message_index)).limit(1)
+
+ if user_id:
+ stmt = stmt.join(
+ ConversationRecord,
+ ConversationMessage.conversation_id == ConversationRecord.conversation_id
+ ).where(ConversationRecord.created_by == user_id)
+
+ result = session.execute(stmt).scalar()
+ return result
+
+
+def update_message_minio_files(message_id: int, skill_file_uploads: List[Dict[str, Any]]) -> bool:
+ """
+ Merge skill file uploads into an existing message's minio_files field.
+
+ Args:
+ message_id: Message ID to update
+ skill_file_uploads: List of skill file upload metadata dicts to append
+
+ Returns:
+ bool: True if the message was updated, False if the message was not found
+ """
+ with get_db_session() as session:
+ message_id = int(message_id)
+
+ stmt = select(ConversationMessage).where(
+ ConversationMessage.message_id == message_id,
+ ConversationMessage.delete_flag == 'N'
+ )
+ record = session.scalars(stmt).first()
+ if not record:
+ return False
+
+ existing = record.minio_files
+ if existing:
+ try:
+ if isinstance(existing, str):
+ existing = json.loads(existing)
+ except (json.JSONDecodeError, TypeError):
+ existing = []
+ else:
+ existing = []
+
+ existing.extend(skill_file_uploads)
+ record.minio_files = json.dumps(existing, ensure_ascii=False)
+
+ return True
diff --git a/backend/database/db_models.py b/backend/database/db_models.py
index 3741dd559..5450b5f74 100644
--- a/backend/database/db_models.py
+++ b/backend/database/db_models.py
@@ -1,5 +1,5 @@
-from sqlalchemy import BigInteger, Boolean, Column, ForeignKey, ForeignKeyConstraint, Integer, JSON, Numeric, PrimaryKeyConstraint, Sequence, String, Text, TIMESTAMP, UniqueConstraint
-from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy import BigInteger, Boolean, Column, Integer, JSON, Numeric, Sequence, String, Text, TIMESTAMP, UniqueConstraint, Index, Float, text
+from sqlalchemy.dialects.postgresql import ARRAY, JSONB
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.sql import func
@@ -15,6 +15,8 @@
_TENANT_ID_DOC = "Tenant ID for multi-tenancy isolation"
# Base class for tables without audit fields
+
+
class SimpleTableBase(DeclarativeBase):
pass
@@ -178,6 +180,90 @@ class ModelRecord(TableBase):
Boolean, default=True, doc="Whether to verify SSL certificates when connecting to this model API. Default is true. Set to false for local services without SSL support.")
chunk_batch = Column(
Integer, doc="Batch size for concurrent embedding requests during document chunking")
+ model_appid = Column(
+ String(100), doc="Application ID for model authentication (used by some STT/TTS providers like Volcano Engine)")
+ access_token = Column(
+ String(100), doc="Access token for model authentication (used by some STT/TTS providers like Volcano Engine)")
+ timeout_seconds = Column(
+ Integer, doc="Request timeout in seconds for this model. Default is 120 seconds.")
+ concurrency_limit = Column(
+ Integer, doc="Maximum concurrent requests for this model. Default is null (unlimited).")
+
+
+class ModelMonitoringRecord(SimpleTableBase):
+ """
+ Model monitoring record table - stores per-request LLM performance metrics.
+ Uses SimpleTableBase to avoid audit fields (created_by, updated_by, etc.).
+ """
+
+ __tablename__ = "model_monitoring_record_t"
+ __table_args__ = (
+ Index("ix_monitoring_model_id", "model_id"),
+ Index("ix_monitoring_tenant_id", "tenant_id"),
+ Index("ix_monitoring_agent_id", "agent_id"),
+ Index("ix_monitoring_create_time", "create_time"),
+ Index("ix_monitoring_is_error", "is_error"),
+ Index("ix_monitoring_model_time", "model_id", "create_time"),
+ Index("ix_monitoring_model_type", "model_type"),
+ {"schema": SCHEMA},
+ )
+
+ monitoring_id = Column(
+ Integer,
+ Sequence("model_monitoring_record_t_monitoring_id_seq", schema=SCHEMA),
+ primary_key=True,
+ nullable=False,
+ doc="Monitoring record ID, auto-increment primary key",
+ )
+ model_id = Column(
+ Integer, doc="Model ID, foreign key reference to model_record_t.model_id"
+ )
+ model_name = Column(
+ String(100), nullable=False, doc="Model name at the time of the request"
+ )
+ agent_id = Column(Integer, doc="Agent ID that initiated the request")
+ agent_name = Column(
+ String(100), doc="Agent name at the time of the request")
+ conversation_id = Column(
+ Integer, doc="Conversation ID associated with this request"
+ )
+ tenant_id = Column(
+ String(100), nullable=False, doc="Tenant ID for multi-tenant isolation"
+ )
+ user_id = Column(String(100), doc="User ID who initiated the request")
+ request_duration_ms = Column(
+ Integer, doc="Total request duration in milliseconds")
+ ttft_ms = Column(Integer, doc="Time to first token in milliseconds")
+ input_tokens = Column(Integer, doc="Number of input tokens")
+ output_tokens = Column(Integer, doc="Number of output tokens")
+ total_tokens = Column(Integer, doc="Total tokens (input + output)")
+ generation_rate = Column(
+ Float, doc="Token generation rate (tokens per second)")
+ is_streaming = Column(
+ Boolean, default=False, doc="Whether the request used streaming"
+ )
+ is_success = Column(
+ Boolean, default=True, doc="Whether the request completed successfully"
+ )
+ is_error = Column(
+ Boolean, default=False, doc="Whether the request resulted in an error"
+ )
+ error_type = Column(
+ String(50), doc="Error type classification (e.g., auth_error, rate_limit)"
+ )
+ error_message = Column(Text, doc="Error message details")
+ retry_count = Column(Integer, default=0, doc="Number of retry attempts")
+ operation = Column(
+ String(50), doc="Operation type (e.g., llm_completion, llm_chat)"
+ )
+ create_time = Column(
+ TIMESTAMP(timezone=False), server_default=func.now(), doc="Record creation time"
+ )
+ delete_flag = Column(String(1), default="N", doc="Soft delete flag: Y/N")
+ display_name = Column(String(200), doc="User-facing model display name")
+ model_type = Column(
+ String(20), default="llm", doc="Model type: llm, embedding, multi_embedding"
+ )
class ToolInfo(TableBase):
@@ -213,13 +299,16 @@ class AgentInfo(TableBase):
agent_id = Column(Integer, Sequence(
"ag_tenant_agent_t_agent_id_seq", schema=SCHEMA), nullable=False, primary_key=True, autoincrement=True, doc="ID")
- version_no = Column(Integer, default=0, nullable=False, primary_key=True, doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
+ version_no = Column(Integer, default=0, nullable=False, primary_key=True,
+ doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
name = Column(String(100), doc="Agent name")
display_name = Column(String(100), doc="Agent display name")
description = Column(Text, doc="Description")
author = Column(String(100), doc="Agent author")
- model_name = Column(String(100), doc="[DEPRECATED] Name of the model used, use model_id instead")
- model_id = Column(Integer, doc="Model ID, foreign key reference to model_record_t.model_id")
+ model_name = Column(
+ String(100), doc="[DEPRECATED] Name of the model used, use model_id instead")
+ model_id = Column(
+ Integer, doc="Model ID, foreign key reference to model_record_t.model_id")
max_steps = Column(Integer, doc="Maximum number of steps")
duty_prompt = Column(Text, doc="Duty prompt content")
constraint_prompt = Column(Text, doc="Constraint prompt content")
@@ -231,12 +320,60 @@ class AgentInfo(TableBase):
Boolean, doc="Whether to provide the running summary to the manager agent")
business_description = Column(
Text, doc="Manually entered by the user to describe the entire business process")
- business_logic_model_name = Column(String(100), doc="Model name used for business logic prompt generation")
- business_logic_model_id = Column(Integer, doc="Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id")
+ business_logic_model_name = Column(
+ String(100), doc="Model name used for business logic prompt generation")
+ business_logic_model_id = Column(
+ Integer, doc="Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id")
+ prompt_template_id = Column(
+ Integer, doc="Prompt template ID used for business logic prompt generation")
+ prompt_template_name = Column(String(
+ 100), doc="Prompt template name used for business logic prompt generation")
group_ids = Column(String, doc="Agent group IDs list")
is_new = Column(Boolean, default=False, doc="Whether this agent is marked as new for the user")
current_version_no = Column(Integer, nullable=True, doc="Current published version number. NULL means no version published yet")
ingroup_permission = Column(String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE")
+ enable_context_manager = Column(Boolean, default=False, doc="Whether to enable context management (compression) for this agent")
+ verification_config = Column(JSONB, doc="Layered ReAct self-verification configuration")
+ greeting_message = Column(Text, doc="Agent greeting message displayed on chat initial screen")
+ example_questions = Column(JSONB, doc="List of example questions for starting a conversation with this agent")
+
+
+class PromptTemplate(TableBase):
+ """
+ Prompt template table for user-defined prompt generation templates.
+ """
+ __tablename__ = "ag_prompt_template_t"
+ __table_args__ = (
+ Index(
+ "uq_prompt_template_user_name_active",
+ "tenant_id",
+ "user_id",
+ "template_name",
+ unique=True,
+ postgresql_where=text("delete_flag = 'N'"),
+ ),
+ Index(
+ "idx_ag_prompt_template_t_user",
+ "tenant_id",
+ "user_id",
+ "template_type",
+ postgresql_where=text("delete_flag = 'N'"),
+ ),
+ {"schema": SCHEMA},
+ )
+
+ template_id = Column(Integer, Sequence(
+ "ag_prompt_template_t_template_id_seq", schema=SCHEMA), primary_key=True, nullable=False, autoincrement=True, doc="Prompt template ID")
+ template_name = Column(String(100), nullable=False,
+ doc="Prompt template name")
+ description = Column(String(500), doc="Prompt template description")
+ template_type = Column(String(50), nullable=False,
+ default="agent_generate", doc="Prompt template type")
+ tenant_id = Column(String(100), nullable=False, doc="Tenant ID")
+ user_id = Column(String(100), nullable=False, doc="User ID")
+ template_content_zh = Column(
+ JSONB, nullable=False, doc="Chinese prompt template content")
+ template_content_en = Column(JSONB, doc="English prompt template content")
class ToolInstance(TableBase):
@@ -259,7 +396,8 @@ class ToolInstance(TableBase):
user_id = Column(String(100), doc="User ID")
tenant_id = Column(String(100), doc="Tenant ID")
enabled = Column(Boolean, doc="Enabled")
- version_no = Column(Integer, default=0, primary_key=True, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
+ version_no = Column(Integer, default=0, primary_key=True, nullable=False,
+ doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
class KnowledgeRecord(TableBase):
@@ -275,11 +413,25 @@ class KnowledgeRecord(TableBase):
knowledge_name = Column(String(100), doc="User-facing knowledge base name")
knowledge_describe = Column(String(3000), doc="Knowledge base description")
knowledge_sources = Column(String(300), doc="Knowledge base sources")
- embedding_model_name = Column(String(200), doc="Embedding model name, used to record the embedding model used by the knowledge base")
+ embedding_model_name = Column(String(
+ 200), doc="Embedding model name, used to record the embedding model used by the knowledge base")
+ embedding_model_id = Column(
+ Integer, doc="Embedding model ID, foreign key reference to model_record_t.model_id")
tenant_id = Column(String(100), doc="Tenant ID")
group_ids = Column(String, doc="Knowledge base group IDs list")
ingroup_permission = Column(
String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE")
+ summary_frequency = Column(String(10), nullable=True,
+ doc="Auto-summary frequency: '3h', '5h', '1d', '1w', or NULL (disabled)")
+ last_summary_time = Column(TIMESTAMP(timezone=False), nullable=True,
+ doc="Timestamp of last summary generation")
+ last_doc_update_time = Column(TIMESTAMP(timezone=False), nullable=True,
+ doc="Timestamp of last document add/delete operation")
+ preserve_source_file = Column(
+ Boolean,
+ default=True,
+ doc="Whether to preserve uploaded source documents after vectorization",
+ )
class TenantConfig(TableBase):
@@ -338,11 +490,54 @@ class McpRecord(TableBase):
String(200),
doc="Docker container ID for MCP service, None for non-containerized MCP",
)
+ container_port = Column(
+ Integer,
+ doc="Host port bound for containerized MCP service",
+ )
authorization_token = Column(
String(500),
doc="Authorization token for MCP server authentication (e.g., Bearer token)",
default=None,
)
+ custom_headers = Column(
+ JSON,
+ doc="Custom HTTP headers as JSON object for MCP server requests",
+ default=None,
+ )
+ source = Column(
+ String(30), doc="Source type: local/mcp_registry/community")
+ registry_json = Column(JSONB, doc="Full MCP registry server.json snapshot")
+ config_json = Column(JSON, doc="MCP config data")
+ enabled = Column(Boolean, default=True, doc="Enabled")
+ tags = Column(ARRAY(Text), doc="Tags")
+ description = Column(Text, doc="Description")
+
+
+class McpCommunityRecord(TableBase):
+ """Community MCP market records table."""
+
+ __tablename__ = "mcp_community_record_t"
+ __table_args__ = {"schema": SCHEMA}
+
+ community_id = Column(
+ Integer,
+ Sequence("mcp_community_record_t_community_id_seq", schema=SCHEMA),
+ primary_key=True,
+ nullable=False,
+ doc="Community record ID, unique primary key",
+ )
+ tenant_id = Column(String(100), doc="Publisher tenant ID")
+ user_id = Column(String(100), doc="Publisher user ID")
+ mcp_name = Column(String(100), doc="MCP name")
+ mcp_server = Column(String(500), doc="MCP server URL")
+ source = Column(String(30), doc="Source type, fixed to community")
+ version = Column(String(50), doc="MCP version")
+ registry_json = Column(JSONB, doc="Full MCP metadata JSON")
+ transport_type = Column(
+ String(30), doc="Transport type: http/sse/container")
+ config_json = Column(JSON, doc="Public-shareable MCP configuration JSON")
+ tags = Column(ARRAY(Text), doc="Tags")
+ description = Column(Text, doc="Description")
class UserTenant(TableBase):
@@ -356,7 +551,8 @@ class UserTenant(TableBase):
primary_key=True, nullable=False, doc="User tenant relationship ID, unique primary key")
user_id = Column(String(100), nullable=False, doc="User ID")
tenant_id = Column(String(100), nullable=False, doc="Tenant ID")
- user_role = Column(String(30), doc="User role: SUPER_ADMIN, ADMIN, DEV, USER")
+ user_role = Column(
+ String(30), doc="User role: SUPER_ADMIN, ADMIN, DEV, USER")
user_email = Column(String(255), doc="User email address")
@@ -367,11 +563,18 @@ class AgentRelation(TableBase):
__tablename__ = "ag_agent_relation_t"
__table_args__ = {"schema": SCHEMA}
- relation_id = Column(Integer, Sequence("ag_agent_relation_t_relation_id_seq", schema=SCHEMA), primary_key=True, nullable=False, doc="Relationship ID, primary key")
- selected_agent_id = Column(Integer, primary_key=True, doc="Selected agent ID")
+ relation_id = Column(Integer, Sequence("ag_agent_relation_t_relation_id_seq", schema=SCHEMA),
+ primary_key=True, nullable=False, doc="Relationship ID, primary key")
+ selected_agent_id = Column(
+ Integer, primary_key=True, doc="Selected agent ID")
parent_agent_id = Column(Integer, doc="Parent agent ID")
tenant_id = Column(String(100), doc="Tenant ID")
- version_no = Column(Integer, default=0, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
+ version_no = Column(Integer, default=0, nullable=False,
+ doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
+ selected_agent_version_no = Column(
+ Integer, nullable=True,
+ doc="Pinned version of selected_agent_id. NULL = runtime fallback to child current_version_no",
+ )
class PartnerMappingId(TableBase):
@@ -487,12 +690,51 @@ class AgentVersion(TableBase):
primary_key=True, nullable=False, doc=_PRIMARY_KEY_DOC)
tenant_id = Column(String(100), nullable=False, doc="Tenant ID")
agent_id = Column(Integer, nullable=False, doc="Agent ID")
- version_no = Column(Integer, nullable=False, doc="Version number, starts from 1. Does not include 0 (draft)")
- version_name = Column(String(100), doc="User-defined version name for display")
+ version_no = Column(Integer, nullable=False,
+ doc="Version number, starts from 1. Does not include 0 (draft)")
+ version_name = Column(
+ String(100), doc="User-defined version name for display")
release_note = Column(Text, doc="Release notes / publish remarks")
- source_version_no = Column(Integer, doc="Source version number. If this version is a rollback, record the source version")
- source_type = Column(String(30), doc="Source type: NORMAL (normal publish) / ROLLBACK (rollback and republish)")
- status = Column(String(30), default="RELEASED", doc="Version status: RELEASED / DISABLED / ARCHIVED")
+ source_version_no = Column(
+ Integer, doc="Source version number. If this version is a rollback, record the source version")
+ source_type = Column(String(
+ 30), doc="Source type: NORMAL (normal publish) / ROLLBACK (rollback and republish)")
+ status = Column(String(30), default="RELEASED",
+ doc="Version status: RELEASED / DISABLED / ARCHIVED")
+ is_a2a = Column(Boolean, default=False,
+ doc="Whether this version is published as an A2A Server agent")
+
+
+class AgentRepository(TableBase):
+ """
+ Agent repository (marketplace) table. Frozen snapshot of a published agent tree for sharing.
+ """
+ __tablename__ = "ag_agent_repository_t"
+ __table_args__ = {"schema": SCHEMA}
+
+ agent_repository_id = Column(BigInteger, Sequence("ag_agent_repository_t_agent_repository_id_seq", schema=SCHEMA),
+ primary_key=True, nullable=False, doc="Agent repository listing ID, unique primary key")
+ publisher_tenant_id = Column(String(100), nullable=False, doc="Publisher tenant ID")
+ publisher_user_id = Column(String(100), nullable=False, doc="Publisher user ID")
+ agent_id = Column(Integer, nullable=False,
+ doc="Root agent ID from ag_tenant_agent_t; upsert key")
+ source_version_no = Column(Integer, nullable=False,
+ doc="Published version number frozen at share time")
+ name = Column(String(100), nullable=False,
+ doc="Root agent programmatic name for display and search")
+ display_name = Column(String(100), doc="Root agent display name")
+ description = Column(Text, doc="Root agent description")
+ author = Column(String(100), doc="Agent author")
+ category_id = Column(Integer, doc="Optional marketplace category ID")
+ tags = Column(ARRAY(Text), doc="Marketplace tags")
+ tool_count = Column(Integer,
+ doc="Total tool count across all agents in the bundle (display only)")
+ version_label = Column(String(100),
+ doc="Repository entry version label for display (e.g. v1.0)")
+ agent_info_json = Column(JSONB, nullable=False,
+ doc="Frozen ExportAndImportDataFormat snapshot with optional skills")
+ status = Column(String(30), default="NOT_SHARED",
+ doc="Listing status: NOT_SHARED (未共享) / PENDING_REVIEW (待审核) / REJECTED (审核驳回) / SHARED (已共享)")
class UserTokenInfo(TableBase):
@@ -505,7 +747,8 @@ class UserTokenInfo(TableBase):
token_id = Column(Integer, Sequence("user_token_info_t_token_id_seq", schema=SCHEMA),
primary_key=True, nullable=False, doc="Token ID, unique primary key")
access_key = Column(String(100), nullable=False, doc="Access Key (AK)")
- user_id = Column(String(100), nullable=False, doc="User ID who owns this token")
+ user_id = Column(String(100), nullable=False,
+ doc="User ID who owns this token")
class UserTokenUsageLog(TableBase):
@@ -517,10 +760,68 @@ class UserTokenUsageLog(TableBase):
token_usage_id = Column(Integer, Sequence("user_token_usage_log_t_token_usage_id_seq", schema=SCHEMA),
primary_key=True, nullable=False, doc="Token usage log ID, unique primary key")
- token_id = Column(Integer, nullable=False, doc="Foreign key to user_token_info_t.token_id")
- call_function_name = Column(String(100), doc="API function name being called")
- related_id = Column(Integer, doc="Related resource ID (e.g., conversation_id)")
- meta_data = Column(JSONB, doc="Additional metadata for this usage log entry, stored as JSON")
+ token_id = Column(Integer, nullable=False,
+ doc="Foreign key to user_token_info_t.token_id")
+ call_function_name = Column(
+ String(100), doc="API function name being called")
+ related_id = Column(
+ Integer, doc="Related resource ID (e.g., conversation_id)")
+ meta_data = Column(
+ JSONB, doc="Additional metadata for this usage log entry, stored as JSON")
+
+
+class UserOAuthAccount(TableBase):
+ __tablename__ = "user_oauth_account_t"
+ __table_args__ = (
+ UniqueConstraint("provider", "provider_user_id",
+ name="uq_oauth_provider_user"),
+ {"schema": SCHEMA},
+ )
+
+ oauth_account_id = Column(
+ Integer,
+ Sequence("user_oauth_account_t_oauth_account_id_seq", schema=SCHEMA),
+ primary_key=True,
+ nullable=False,
+ doc="OAuth account ID, primary key",
+ )
+ user_id = Column(String(100), nullable=False, doc="Supabase user UUID")
+ provider = Column(
+ String(30), nullable=False, doc="OAuth provider name: github, wechat, gde, link_app"
+ )
+ provider_user_id = Column(
+ String(200), nullable=False, doc="User ID from the OAuth provider"
+ )
+ provider_email = Column(
+ String(255), doc="Email address from the OAuth provider")
+ provider_username = Column(
+ String(200), doc="Display name from the OAuth provider")
+ tenant_id = Column(String(100), doc="Tenant ID at time of linking")
+
+
+class UserCasSession(TableBase):
+ __tablename__ = "user_cas_session_t"
+ __table_args__ = (
+ Index("ix_user_cas_session_session_id", "session_id"),
+ Index("ix_user_cas_session_user_id", "user_id"),
+ Index("ix_user_cas_session_cas_user_id", "cas_user_id"),
+ {"schema": SCHEMA},
+ )
+
+ cas_session_id = Column(
+ Integer,
+ Sequence("user_cas_session_t_cas_session_id_seq", schema=SCHEMA),
+ primary_key=True,
+ nullable=False,
+ doc="CAS session record ID",
+ )
+ session_id = Column(String(100), nullable=False, unique=True, doc="JWT session ID")
+ user_id = Column(String(100), nullable=False, doc="Supabase user UUID")
+ cas_user_id = Column(String(200), nullable=False, doc="User ID from CAS")
+ cas_session_index = Column(String(500), doc="CAS SessionIndex or service ticket")
+ status = Column(String(30), nullable=False, default="active", doc="active/revoked")
+ expires_at = Column(TIMESTAMP(timezone=False), nullable=False, doc="Session expiration time")
+ revoked_at = Column(TIMESTAMP(timezone=False), doc="Revocation time")
class SkillInfo(TableBase):
@@ -532,11 +833,17 @@ class SkillInfo(TableBase):
skill_id = Column(Integer, Sequence("ag_skill_info_t_skill_id_seq", schema=SCHEMA),
primary_key=True, nullable=False, autoincrement=True, doc="Skill ID")
- skill_name = Column(String(100), nullable=False, unique=True, doc="Unique skill name")
+ skill_name = Column(String(100), nullable=False,
+ unique=True, doc="Unique skill name")
+ tenant_id = Column(String(100), nullable=True,
+ doc="Tenant ID for multi-tenancy. NULL for pre-existing skills.")
skill_description = Column(String(1000), doc="Skill description")
skill_tags = Column(JSON, doc="Skill tags as JSON array")
skill_content = Column(Text, doc="Skill content in markdown format")
- params = Column(JSON, doc="Skill configuration parameters as JSON object")
+ config_schemas = Column(
+ JSON, doc="Parameter metadata from config/schema.yaml")
+ config_values = Column(
+ JSON, doc="Runtime parameter values from config/config.yaml")
source = Column(String(30), nullable=False, default="official",
doc="Skill source: official, custom, etc.")
@@ -550,8 +857,10 @@ class SkillToolRelation(TableBase):
rel_id = Column(Integer, Sequence("ag_skill_tools_rel_t_rel_id_seq", schema=SCHEMA),
primary_key=True, nullable=False, autoincrement=True, doc="Relation ID")
- skill_id = Column(Integer, nullable=False, doc="Foreign key to ag_skill_info_t.skill_id")
- tool_id = Column(Integer, nullable=False, doc="Foreign key to ag_tool_info_t.tool_id")
+ skill_id = Column(Integer, nullable=False,
+ doc="Foreign key to ag_skill_info_t.skill_id")
+ tool_id = Column(Integer, nullable=False,
+ doc="Foreign key to ag_tool_info_t.tool_id")
class SkillInstance(TableBase):
@@ -570,12 +879,19 @@ class SkillInstance(TableBase):
nullable=False,
doc="Skill instance ID"
)
- skill_id = Column(Integer, nullable=False, doc="Foreign key to ag_skill_info_t.skill_id")
+ skill_id = Column(Integer, nullable=False,
+ doc="Foreign key to ag_skill_info_t.skill_id")
agent_id = Column(Integer, nullable=False, doc="Agent ID")
user_id = Column(String(100), doc="User ID")
tenant_id = Column(String(100), doc="Tenant ID")
- enabled = Column(Boolean, default=True, doc="Whether this skill is enabled for the agent")
- version_no = Column(Integer, default=0, primary_key=True, nullable=False, doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
+ enabled = Column(Boolean, default=True,
+ doc="Whether this skill is enabled for the agent")
+ version_no = Column(Integer, default=0, primary_key=True, nullable=False,
+ doc="Version number. 0 = draft/editing state, >=1 = published snapshot")
+ config_values = Column(
+ JSON, doc="Per-agent runtime parameter values (mirrors ag_tool_instance_t.params)")
+ config_schemas = Column(
+ JSON, doc="Per-agent parameter schema overrides from config/schema.yaml")
class OuterApiService(TableBase):
@@ -588,13 +904,16 @@ class OuterApiService(TableBase):
id = Column(BigInteger, Sequence("ag_outer_api_services_id_seq", schema=SCHEMA),
primary_key=True, nullable=False, doc="Service ID, unique primary key")
- mcp_service_name = Column(String(100), nullable=False, doc="MCP service name (unique identifier per tenant)")
+ mcp_service_name = Column(String(100), nullable=False,
+ doc="MCP service name (unique identifier per tenant)")
description = Column(Text, doc="Service description from OpenAPI info")
openapi_json = Column(JSONB, doc="Complete OpenAPI JSON specification")
server_url = Column(String(500), doc="Base URL of the REST API server")
headers_template = Column(JSONB, doc="Default headers template as JSON")
- tenant_id = Column(String(100), nullable=False, doc="Tenant ID for multi-tenancy")
- is_available = Column(Boolean, default=True, doc="Whether the service is available")
+ tenant_id = Column(String(100), nullable=False,
+ doc="Tenant ID for multi-tenancy")
+ is_available = Column(Boolean, default=True,
+ doc="Whether the service is available")
# Alias for backward compatibility
@@ -609,27 +928,37 @@ class A2ANacosConfig(TableBase):
__tablename__ = "ag_a2a_nacos_config_t"
__table_args__ = {"schema": SCHEMA}
- id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC)
- config_id = Column(String(64), unique=True, nullable=False, doc="Unique config identifier for API reference")
+ id = Column(BigInteger, primary_key=True,
+ autoincrement=True, doc=_PRIMARY_KEY_DOC)
+ config_id = Column(String(64), unique=True, nullable=False,
+ doc="Unique config identifier for API reference")
# Nacos connection
- nacos_addr = Column(String(512), nullable=False, doc="Nacos server address, e.g., http://nacos-server:8848")
- nacos_username = Column(String(100), doc="Nacos username for authentication")
- nacos_password = Column(String(256), doc="Nacos password, encrypted at rest")
+ nacos_addr = Column(String(512), nullable=False,
+ doc="Nacos server address, e.g., http://nacos-server:8848")
+ nacos_username = Column(
+ String(100), doc="Nacos username for authentication")
+ nacos_password = Column(
+ String(256), doc="Nacos password, encrypted at rest")
# Discovery scope
- namespace_id = Column(String(100), default="public", doc="Nacos namespace for service discovery")
+ namespace_id = Column(String(100), default="public",
+ doc="Nacos namespace for service discovery")
# Metadata
- name = Column(String(100), nullable=False, doc="Display name for this Nacos config")
+ name = Column(String(100), nullable=False,
+ doc="Display name for this Nacos config")
description = Column(Text, doc="Description of this Nacos configuration")
# Tenant isolation
- tenant_id = Column(String(100), nullable=False, doc="Tenant ID for multi-tenancy")
+ tenant_id = Column(String(100), nullable=False,
+ doc="Tenant ID for multi-tenancy")
# Status
- is_active = Column(Boolean, default=True, doc="Whether this Nacos config is active")
- last_scan_at = Column(TIMESTAMP(timezone=False), doc="Last time a scan was performed using this config")
+ is_active = Column(Boolean, default=True,
+ doc="Whether this Nacos config is active")
+ last_scan_at = Column(TIMESTAMP(timezone=False),
+ doc="Last time a scan was performed using this config")
class A2AExternalAgent(TableBase):
@@ -640,36 +969,49 @@ class A2AExternalAgent(TableBase):
__tablename__ = "ag_a2a_external_agent_t"
__table_args__ = {"schema": SCHEMA}
- id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC)
+ id = Column(BigInteger, primary_key=True,
+ autoincrement=True, doc=_PRIMARY_KEY_DOC)
# Agent metadata (cached from Agent Card)
- name = Column(String(255), nullable=False, doc="Agent name from Agent Card")
+ name = Column(String(255), nullable=False,
+ doc="Agent name from Agent Card")
description = Column(Text, doc="Agent description from Agent Card")
- version = Column(String(50), doc="Agent version from Agent Card, e.g., 1.2.0")
+ version = Column(
+ String(50), doc="Agent version from Agent Card, e.g., 1.2.0")
# Primary interface (extracted from supportedInterfaces for quick access)
# In A2A 1.0, this should store the http-json-rpc URL
- agent_url = Column(String(512), nullable=False, doc="Primary A2A endpoint URL (http-json-rpc by default)")
+ agent_url = Column(String(512), nullable=False,
+ doc="Primary A2A endpoint URL (http-json-rpc by default)")
# Protocol type for calling this agent: JSONRPC, HTTP+JSON, GRPC
- protocol_type = Column(String(20), default=PROTOCOL_JSONRPC, doc="Protocol type for calling this agent")
+ protocol_type = Column(String(20), default=PROTOCOL_JSONRPC,
+ doc="Protocol type for calling this agent")
# Capabilities
- streaming = Column(Boolean, default=False, doc="Whether this agent supports SSE streaming")
+ streaming = Column(Boolean, default=False,
+ doc="Whether this agent supports SSE streaming")
# All supported interfaces (full JSON array from Agent Card)
# Format: [{protocolBinding, url, protocolVersion}, ...]
supported_interfaces = Column(JSON, doc="All supported interfaces array")
# Source information
- source_type = Column(String(20), nullable=False, doc="Discovery source: url or nacos")
+ source_type = Column(String(20), nullable=False,
+ doc="Discovery source: url or nacos")
# For URL mode
source_url = Column(String(512), doc="Direct URL to agent card")
# For Nacos mode
- nacos_config_id = Column(String(64), doc="Reference to Nacos config used for discovery")
- nacos_agent_name = Column(String(255), doc="Original name used for Nacos query")
+ nacos_config_id = Column(
+ String(64), doc="Reference to Nacos config used for discovery")
+ nacos_agent_name = Column(
+ String(255), doc="Original name used for Nacos query")
+
+ # Base URL for infrastructure health checks
+ base_url = Column(String(
+ 512), doc="Base URL for health checks (service root address), e.g., http://agent:8080")
# Tenant isolation
tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC)
@@ -678,13 +1020,18 @@ class A2AExternalAgent(TableBase):
raw_card = Column(JSON, doc="Full original Agent Card JSON from discovery")
# Cache management
- cached_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when Agent Card was cached")
- cache_expires_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when cache expires")
+ cached_at = Column(TIMESTAMP(timezone=False),
+ doc="Timestamp when Agent Card was cached")
+ cache_expires_at = Column(
+ TIMESTAMP(timezone=False), doc="Timestamp when cache expires")
# Health check status
- is_available = Column(Boolean, default=True, doc="Whether this agent is currently reachable")
- last_check_at = Column(TIMESTAMP(timezone=False), doc="Last health check timestamp")
- last_check_result = Column(String(50), doc="Last health check result: OK, ERROR, TIMEOUT")
+ is_available = Column(Boolean, default=True,
+ doc="Whether this agent is currently reachable")
+ last_check_at = Column(TIMESTAMP(timezone=False),
+ doc="Last health check timestamp")
+ last_check_result = Column(
+ String(50), doc="Last health check result: OK, ERROR, TIMEOUT")
class A2AExternalAgentRelation(TableBase):
@@ -699,28 +1046,26 @@ class A2AExternalAgentRelation(TableBase):
name="uq_local_external_agent",
deferrable=True,
),
- ForeignKeyConstraint(
- ["external_agent_id"],
- [f"{SCHEMA}.ag_a2a_external_agent_t.id"],
- name="fk_external_agent",
- deferrable=True,
- ),
{"schema": SCHEMA},
)
- id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC)
+ id = Column(BigInteger, primary_key=True,
+ autoincrement=True, doc=_PRIMARY_KEY_DOC)
# Local agent (parent)
- local_agent_id = Column(Integer, nullable=False, doc="Local parent agent ID")
+ local_agent_id = Column(Integer, nullable=False,
+ doc="Local parent agent ID")
# External A2A agent (sub-agent) - FK to ag_a2a_external_agent_t.id
- external_agent_id = Column(BigInteger, nullable=False, doc="External A2A agent ID (FK to ag_a2a_external_agent_t.id)")
+ external_agent_id = Column(
+ BigInteger, nullable=False, doc="External A2A agent ID (FK to ag_a2a_external_agent_t.id)")
# Tenant isolation
tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC)
# Status
- is_enabled = Column(Boolean, default=True, doc="Whether this relation is active")
+ is_enabled = Column(Boolean, default=True,
+ doc="Whether this relation is active")
class A2AServerAgent(TableBase):
@@ -731,7 +1076,8 @@ class A2AServerAgent(TableBase):
__tablename__ = "ag_a2a_server_agent_t"
__table_args__ = {"schema": SCHEMA}
- id = Column(BigInteger, primary_key=True, autoincrement=True, doc=_PRIMARY_KEY_DOC)
+ id = Column(BigInteger, primary_key=True,
+ autoincrement=True, doc=_PRIMARY_KEY_DOC)
# Link to local agent
agent_id = Column(Integer, nullable=False, doc="Local agent ID")
@@ -741,35 +1087,44 @@ class A2AServerAgent(TableBase):
tenant_id = Column(String(100), nullable=False, doc=_TENANT_ID_DOC)
# Generated endpoint ID
- endpoint_id = Column(String(64), unique=True, nullable=False, doc="Generated endpoint ID")
+ endpoint_id = Column(String(64), unique=True,
+ nullable=False, doc="Generated endpoint ID")
# Basic info (extracted from local agent, can be overridden)
- name = Column(String(255), nullable=False, doc="Agent name exposed in Agent Card")
+ name = Column(String(255), nullable=False,
+ doc="Agent name exposed in Agent Card")
description = Column(Text, doc="Agent description exposed in Agent Card")
version = Column(String(50), doc="Agent version exposed in Agent Card")
# Primary endpoint URL (http-json-rpc by default)
- agent_url = Column(String(512), doc="Primary A2A endpoint URL (http-json-rpc by default)")
+ agent_url = Column(
+ String(512), doc="Primary A2A endpoint URL (http-json-rpc by default)")
# Capabilities
- streaming = Column(Boolean, default=False, doc="Whether this agent supports SSE streaming")
+ streaming = Column(Boolean, default=False,
+ doc="Whether this agent supports SSE streaming")
# All supported interfaces (A2A 1.0 compliant)
# Format: [{protocolBinding, url, protocolVersion}, ...]
- supported_interfaces = Column(JSON, doc="All supported interfaces: [{protocolBinding, url, protocolVersion}, ...]")
+ supported_interfaces = Column(
+ JSON, doc="All supported interfaces: [{protocolBinding, url, protocolVersion}, ...]")
# Agent Card customization (partial overrides only)
- card_overrides = Column(JSON, doc="User customizations for Agent Card (partial override)")
+ card_overrides = Column(
+ JSON, doc="User customizations for Agent Card (partial override)")
# A2A Server status
- is_enabled = Column(Boolean, default=False, doc="Whether A2A Server is enabled for this agent")
+ is_enabled = Column(Boolean, default=False,
+ doc="Whether A2A Server is enabled for this agent")
# Raw Agent Card (generated from settings, for debugging)
raw_card = Column(JSON, doc="Generated Agent Card JSON (for debugging)")
# Publishing timestamps
- published_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when A2A Server was last enabled")
- unpublished_at = Column(TIMESTAMP(timezone=False), doc="Timestamp when A2A Server was disabled")
+ published_at = Column(TIMESTAMP(timezone=False),
+ doc="Timestamp when A2A Server was last enabled")
+ unpublished_at = Column(TIMESTAMP(timezone=False),
+ doc="Timestamp when A2A Server was disabled")
class A2ATask(SimpleTableBase):
@@ -782,7 +1137,8 @@ class A2ATask(SimpleTableBase):
# Core identifiers (following A2A spec)
id = Column(String(64), primary_key=True, doc="Task ID (A2A spec: taskId)")
- context_id = Column(String(64), doc="Context ID for grouping related tasks")
+ context_id = Column(
+ String(64), doc="Context ID for grouping related tasks")
# Endpoint and caller info
endpoint_id = Column(String(64), nullable=False, doc="Endpoint ID")
@@ -793,16 +1149,21 @@ class A2ATask(SimpleTableBase):
raw_request = Column(JSON, doc="Original A2A request payload")
# Task state (following A2A TaskState enum)
- task_state = Column(String(50), nullable=False, server_default="TASK_STATE_SUBMITTED", doc="Task state: TASK_STATE_SUBMITTED, TASK_STATE_WORKING, TASK_STATE_COMPLETED, TASK_STATE_FAILED, TASK_STATE_CANCELED, TASK_STATE_INPUT_REQUIRED, TASK_STATE_REJECTED, TASK_STATE_AUTH_REQUIRED")
- state_timestamp = Column(TIMESTAMP(timezone=False), doc="Task state last update timestamp")
+ task_state = Column(String(50), nullable=False, server_default="TASK_STATE_SUBMITTED",
+ doc="Task state: TASK_STATE_SUBMITTED, TASK_STATE_WORKING, TASK_STATE_COMPLETED, TASK_STATE_FAILED, TASK_STATE_CANCELED, TASK_STATE_INPUT_REQUIRED, TASK_STATE_REJECTED, TASK_STATE_AUTH_REQUIRED")
+ state_timestamp = Column(TIMESTAMP(timezone=False),
+ doc="Task state last update timestamp")
# Task result
result_data = Column(JSON, doc="Task final result data")
# Timestamps
- create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Task creation timestamp")
- update_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), onupdate=func.now(), doc="Task last update timestamp")
- completed_at = Column(TIMESTAMP(timezone=False), doc="Task completion timestamp")
+ create_time = Column(TIMESTAMP(timezone=False),
+ server_default=func.now(), doc="Task creation timestamp")
+ update_time = Column(TIMESTAMP(timezone=False), server_default=func.now(
+ ), onupdate=func.now(), doc="Task last update timestamp")
+ completed_at = Column(TIMESTAMP(timezone=False),
+ doc="Task completion timestamp")
class A2AMessage(SimpleTableBase):
@@ -814,23 +1175,30 @@ class A2AMessage(SimpleTableBase):
__table_args__ = {"schema": SCHEMA}
# Core identifiers (following A2A spec)
- message_id = Column(String(64), primary_key=True, doc="Message ID (A2A spec: messageId)")
- task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=True, doc="Task ID this message belongs to (nullable for standalone/simple requests)")
+ message_id = Column(String(64), primary_key=True,
+ doc="Message ID (A2A spec: messageId)")
+ task_id = Column(String(64), nullable=True,
+ doc="Task ID this message belongs to (nullable for standalone/simple requests)")
# Message attributes
- message_index = Column(Integer, nullable=False, doc="Order of message in the conversation")
- role = Column(String(20), nullable=False, doc="Message sender role: user or agent")
+ message_index = Column(Integer, nullable=False,
+ doc="Order of message in the conversation")
+ role = Column(String(20), nullable=False,
+ doc="Message sender role: user or agent")
# Message content (following A2A Part structure)
- parts = Column(JSON, nullable=False, doc="Message parts following A2A Part structure")
+ parts = Column(JSON, nullable=False,
+ doc="Message parts following A2A Part structure")
meta_data = Column(JSON, doc="Optional metadata")
extensions = Column(JSON, doc="Extension URI list")
# References to other tasks (optional)
- reference_task_ids = Column(JSON, doc="Referenced task IDs array for multi-turn scenarios")
+ reference_task_ids = Column(
+ JSON, doc="Referenced task IDs array for multi-turn scenarios")
# Timestamp
- create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Message creation timestamp")
+ create_time = Column(TIMESTAMP(
+ timezone=False), server_default=func.now(), doc="Message creation timestamp")
class A2AArtifact(SimpleTableBase):
@@ -842,15 +1210,19 @@ class A2AArtifact(SimpleTableBase):
# Core identifiers (following A2A spec)
id = Column(String(64), primary_key=True, doc="Internal primary key")
- artifact_id = Column(String(64), nullable=False, doc="Artifact ID (A2A spec: artifactId)")
- task_id = Column(String(64), ForeignKey(f"{SCHEMA}.ag_a2a_task_t.id", ondelete="CASCADE"), nullable=False, doc="Task ID this artifact belongs to")
+ artifact_id = Column(String(64), nullable=False,
+ doc="Artifact ID (A2A spec: artifactId)")
+ task_id = Column(String(64), nullable=False,
+ doc="Task ID this artifact belongs to")
# Artifact attributes
name = Column(String(255), doc="Human-readable artifact name")
description = Column(Text, doc="Artifact description")
- parts = Column(JSON, nullable=False, doc="Artifact parts following A2A Part structure")
+ parts = Column(JSON, nullable=False,
+ doc="Artifact parts following A2A Part structure")
meta_data = Column(JSON, doc="Artifact metadata")
extensions = Column(JSON, doc="Extension URI list")
# Timestamp
- create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Artifact creation timestamp")
+ create_time = Column(TIMESTAMP(
+ timezone=False), server_default=func.now(), doc="Artifact creation timestamp")
diff --git a/backend/database/invitation_db.py b/backend/database/invitation_db.py
index f7e27d005..32523cd06 100644
--- a/backend/database/invitation_db.py
+++ b/backend/database/invitation_db.py
@@ -300,8 +300,8 @@ def query_invitations_with_pagination(
TenantInvitationCode.delete_flag == "N"
)
- # Apply tenant filter if provided
- if tenant_id:
+ # Apply tenant filter when tenant_id is specified (including ASSET_OWNER virtual tenant)
+ if tenant_id is not None:
query = query.filter(TenantInvitationCode.tenant_id == tenant_id)
# Apply sorting
diff --git a/backend/database/knowledge_db.py b/backend/database/knowledge_db.py
index df42e1888..8fc60d6bd 100644
--- a/backend/database/knowledge_db.py
+++ b/backend/database/knowledge_db.py
@@ -1,5 +1,6 @@
from typing import Any, Dict, List, Optional
+import logging
import uuid
from sqlalchemy import func
from sqlalchemy.exc import SQLAlchemyError
@@ -7,6 +8,9 @@
from database.client import as_dict, get_db_session
from database.db_models import KnowledgeRecord
from utils.str_utils import convert_list_to_string
+from consts.scheduler import VALID_SUMMARY_FREQUENCIES
+
+logger = logging.getLogger("knowledge_db")
def _generate_index_name(knowledge_id: int) -> str:
@@ -30,6 +34,7 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
- user_id: Optional user ID for created_by and updated_by fields
- tenant_id: Optional tenant ID for created_by and updated_by fields
- embedding_model_name: embedding model name for the knowledge base
+ - preserve_source_file: whether to preserve uploaded source documents (optional)
Returns:
Dict[str, Any]: Dictionary with at least 'knowledge_id' and 'index_name'
@@ -49,9 +54,11 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
"knowledge_sources": query.get("knowledge_sources", "elasticsearch"),
"tenant_id": query.get("tenant_id"),
"embedding_model_name": query.get("embedding_model_name"),
+ "embedding_model_id": query.get("embedding_model_id"),
"knowledge_name": knowledge_name,
"group_ids": convert_list_to_string(group_ids) if isinstance(group_ids, list) else group_ids,
"ingroup_permission": query.get("ingroup_permission"),
+ "preserve_source_file": query.get("preserve_source_file", True),
}
# For backward compatibility: if caller explicitly provides index_name,
@@ -112,10 +119,16 @@ def upsert_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
if existing_record:
# Update existing record
- existing_record.knowledge_name = query.get('knowledge_name') or query.get('index_name')
- existing_record.knowledge_describe = query.get('knowledge_describe', '')
- existing_record.knowledge_sources = query.get('knowledge_sources', 'elasticsearch')
- existing_record.embedding_model_name = query.get('embedding_model_name')
+ existing_record.knowledge_name = query.get(
+ 'knowledge_name') or query.get('index_name')
+ existing_record.knowledge_describe = query.get(
+ 'knowledge_describe', '')
+ existing_record.knowledge_sources = query.get(
+ 'knowledge_sources', 'elasticsearch')
+ existing_record.embedding_model_name = query.get(
+ 'embedding_model_name')
+ existing_record.embedding_model_id = query.get(
+ 'embedding_model_id')
existing_record.updated_by = query.get('user_id')
existing_record.update_time = func.current_timestamp()
@@ -245,9 +258,11 @@ def get_knowledge_record(query: Optional[Dict[str, Any]] = None) -> Dict[str, An
# Support both index_name and knowledge_name queries
if 'index_name' in query:
- db_query = db_query.filter(KnowledgeRecord.index_name == query['index_name'])
+ db_query = db_query.filter(
+ KnowledgeRecord.index_name == query['index_name'])
elif 'knowledge_name' in query:
- db_query = db_query.filter(KnowledgeRecord.knowledge_name == query['knowledge_name'])
+ db_query = db_query.filter(
+ KnowledgeRecord.knowledge_name == query['knowledge_name'])
# Add tenant_id filter only if it is provided in the query
if 'tenant_id' in query and query['tenant_id'] is not None:
@@ -345,6 +360,43 @@ def update_model_name_by_index_name(index_name: str, embedding_model_name: str,
raise e
+def update_embedding_model_by_index_name(
+ index_name: str,
+ embedding_model_id: int,
+ embedding_model_name: str,
+ tenant_id: str,
+ user_id: str
+) -> bool:
+ """
+ Update the embedding model (both ID and name) for a knowledge base.
+
+ Args:
+ index_name: Internal index name of the knowledge base
+ embedding_model_id: New embedding model ID
+ embedding_model_name: New embedding model name
+ tenant_id: Tenant ID
+ user_id: User ID making the update
+
+ Returns:
+ bool: Whether the update was successful
+ """
+ try:
+ with get_db_session() as session:
+ result = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y',
+ KnowledgeRecord.tenant_id == tenant_id
+ ).update({
+ "embedding_model_id": embedding_model_id,
+ "embedding_model_name": embedding_model_name,
+ "updated_by": user_id
+ })
+ session.commit()
+ return result > 0
+ except SQLAlchemyError as e:
+ raise e
+
+
def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str:
"""
Get the internal index_name from user-facing knowledge_name.
@@ -361,16 +413,138 @@ def get_index_name_by_knowledge_name(knowledge_name: str, tenant_id: str) -> str
"""
try:
with get_db_session() as session:
+ # First try resolving by user-facing knowledge_name.
result = session.query(KnowledgeRecord).filter(
KnowledgeRecord.knowledge_name == knowledge_name,
KnowledgeRecord.tenant_id == tenant_id,
KnowledgeRecord.delete_flag != 'Y'
).first()
-
if result:
return result.index_name
+
+ # Backward/forward compatibility: if caller already passes internal index_name,
+ # accept it directly by resolving on index_name as well.
+ index_result = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == knowledge_name,
+ KnowledgeRecord.tenant_id == tenant_id,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if index_result:
+ return index_result.index_name
+
raise ValueError(
f"Knowledge base '{knowledge_name}' not found for the current tenant"
)
except SQLAlchemyError as e:
raise e
+
+
+def get_knowledge_name_map_by_index_names(index_names: List[str]) -> Dict[str, str]:
+ """
+ Get a mapping from index_name to knowledge_name (display name) for the given index_names.
+ Used to build user-friendly knowledge base summaries in prompts.
+
+ Args:
+ index_names: List of internal index names
+
+ Returns:
+ Dict[str, str]: Mapping of index_name -> knowledge_name.
+ If a knowledge base is not found in the database,
+ the index_name itself is used as the fallback value.
+ """
+ if not index_names:
+ return {}
+
+ try:
+ with get_db_session() as session:
+ result = session.query(
+ KnowledgeRecord.index_name,
+ KnowledgeRecord.knowledge_name
+ ).filter(
+ KnowledgeRecord.index_name.in_(index_names),
+ KnowledgeRecord.delete_flag != 'Y'
+ ).all()
+
+ knowledge_name_map = {}
+ for row in result:
+ knowledge_name_map[row.index_name] = row.knowledge_name
+
+ for index_name in index_names:
+ if index_name not in knowledge_name_map:
+ knowledge_name_map[index_name] = index_name
+
+ return knowledge_name_map
+ except SQLAlchemyError:
+ logger.exception("Query knowledge name map error")
+ raise
+
+
+def update_summary_frequency(index_name: str, summary_frequency: Optional[str],
+ _tenant_id: str, user_id: str) -> bool:
+ """Update the auto-summary frequency for a knowledge base."""
+ valid_frequencies = VALID_SUMMARY_FREQUENCIES
+ if summary_frequency not in valid_frequencies:
+ raise ValueError(f"Invalid summary_frequency: {summary_frequency}")
+ try:
+ with get_db_session() as session:
+ record = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if not record:
+ return False
+ record.summary_frequency = summary_frequency
+ record.updated_by = user_id
+ session.commit()
+ return True
+ except SQLAlchemyError:
+ logger.exception("Update summary frequency error")
+ raise
+
+
+def update_last_summary_time(index_name: str):
+ """Update last_summary_time to now after a successful summary generation."""
+ from datetime import datetime
+ try:
+ with get_db_session() as session:
+ record = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if record:
+ record.last_summary_time = datetime.now()
+ session.commit()
+ except SQLAlchemyError:
+ logger.exception("Update last summary time error")
+ raise
+
+
+def update_last_doc_update_time(index_name: str):
+ """Update last_doc_update_time to now after document add/delete operation."""
+ from datetime import datetime
+ try:
+ with get_db_session() as session:
+ record = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.index_name == index_name,
+ KnowledgeRecord.delete_flag != 'Y'
+ ).first()
+ if record:
+ record.last_doc_update_time = datetime.now()
+ session.commit()
+ except SQLAlchemyError:
+ logger.exception("Update last doc update time error")
+ raise
+
+
+def get_knowledge_bases_for_auto_summary() -> List[Dict[str, Any]]:
+ """Query all knowledge bases with non-null summary_frequency."""
+ try:
+ with get_db_session() as session:
+ records = session.query(KnowledgeRecord).filter(
+ KnowledgeRecord.summary_frequency.isnot(None),
+ KnowledgeRecord.delete_flag != 'Y'
+ ).all()
+ return [as_dict(record) for record in records]
+ except SQLAlchemyError:
+ logger.exception("Get knowledge bases error")
+ raise
diff --git a/backend/database/model_management_db.py b/backend/database/model_management_db.py
index cb1c6c69f..1a1a98c8b 100644
--- a/backend/database/model_management_db.py
+++ b/backend/database/model_management_db.py
@@ -1,3 +1,4 @@
+import logging
from typing import Any, Dict, List, Optional
from sqlalchemy import and_, desc, func, insert, select, update
@@ -7,6 +8,8 @@
from .db_models import ModelRecord
from .utils import add_creation_tracking, add_update_tracking
+logger = logging.getLogger("database.model_management_db")
+
def create_model_record(model_data: Dict[str, Any], user_id: str, tenant_id: str) -> bool:
"""
@@ -170,7 +173,7 @@ def get_model_records(filters: Optional[Dict[str, Any]], tenant_id: str) -> List
return result_list
-def get_model_by_display_name(display_name: str, tenant_id: str) -> Optional[Dict[str, Any]]:
+def get_model_by_display_name(display_name: str, tenant_id: str, model_type: str = None) -> Optional[Dict[str, Any]]:
"""
Get a model record by display name
@@ -179,6 +182,11 @@ def get_model_by_display_name(display_name: str, tenant_id: str) -> Optional[Dic
tenant_id:
"""
filters = {'display_name': display_name}
+
+ if model_type in ["multiEmbedding", "multi_embedding"]:
+ filters['model_type'] = "multi_embedding"
+ elif model_type == "embedding":
+ filters['model_type'] = "embedding"
records = get_model_records(filters, tenant_id)
if not records:
@@ -203,7 +211,7 @@ def get_models_by_display_name(display_name: str, tenant_id: str) -> List[Dict[s
return get_model_records(filters, tenant_id)
-def get_model_id_by_display_name(display_name: str, tenant_id: str) -> Optional[int]:
+def get_model_id_by_display_name(display_name: str, tenant_id: str, model_type: str = None) -> Optional[int]:
"""
Get a model ID by display name
@@ -214,7 +222,7 @@ def get_model_id_by_display_name(display_name: str, tenant_id: str) -> Optional[
Returns:
Optional[int]: Model ID
"""
- model = get_model_by_display_name(display_name, tenant_id)
+ model = get_model_by_display_name(display_name, tenant_id, model_type)
return model["model_id"] if model else None
diff --git a/backend/database/oauth_account_db.py b/backend/database/oauth_account_db.py
new file mode 100644
index 000000000..3b798f738
--- /dev/null
+++ b/backend/database/oauth_account_db.py
@@ -0,0 +1,220 @@
+"""
+Database operations for OAuth account management
+"""
+
+import logging
+from typing import Any, Dict, List, Optional
+
+from database.client import as_dict, get_db_session
+from database.db_models import UserOAuthAccount
+
+logger = logging.getLogger(__name__)
+
+
+def insert_oauth_account(
+ user_id: str,
+ provider: str,
+ provider_user_id: str,
+ provider_email: Optional[str] = None,
+ provider_username: Optional[str] = None,
+ tenant_id: Optional[str] = None,
+) -> Dict[str, Any]:
+ with get_db_session() as session:
+ account = UserOAuthAccount(
+ user_id=user_id,
+ provider=provider,
+ provider_user_id=provider_user_id,
+ provider_email=provider_email,
+ provider_username=provider_username,
+ tenant_id=tenant_id,
+ created_by=user_id,
+ updated_by=user_id,
+ )
+ session.add(account)
+ session.flush()
+ return as_dict(account)
+
+
+def get_oauth_account_by_provider(
+ provider: str, provider_user_id: str
+) -> Optional[Dict[str, Any]]:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.provider == provider,
+ UserOAuthAccount.provider_user_id == provider_user_id,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .first()
+ )
+ return as_dict(result) if result else None
+
+
+def get_soft_deleted_oauth_account(
+ provider: str, provider_user_id: str
+) -> Optional[Dict[str, Any]]:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.provider == provider,
+ UserOAuthAccount.provider_user_id == provider_user_id,
+ UserOAuthAccount.delete_flag == "Y",
+ )
+ .first()
+ )
+ return as_dict(result) if result else None
+
+
+def list_oauth_accounts_by_user_id(user_id: str) -> List[Dict[str, Any]]:
+ with get_db_session() as session:
+ results = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.user_id == user_id,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .all()
+ )
+ return [as_dict(r) for r in results]
+
+
+def rebind_oauth_account(
+ provider: str,
+ provider_user_id: str,
+ new_user_id: str,
+ provider_email: Optional[str] = None,
+ provider_username: Optional[str] = None,
+ tenant_id: Optional[str] = None,
+) -> bool:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.provider == provider,
+ UserOAuthAccount.provider_user_id == provider_user_id,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .first()
+ )
+ if not result:
+ return False
+
+ result.user_id = new_user_id
+ result.updated_by = new_user_id
+ if provider_email is not None:
+ result.provider_email = provider_email
+ if provider_username is not None:
+ result.provider_username = provider_username
+ if tenant_id is not None:
+ result.tenant_id = tenant_id
+
+ return True
+
+
+def update_oauth_account_tokens(
+ provider: str,
+ provider_user_id: str,
+ provider_username: Optional[str] = None,
+) -> bool:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.provider == provider,
+ UserOAuthAccount.provider_user_id == provider_user_id,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .first()
+ )
+ if not result:
+ return False
+
+ if provider_username is not None:
+ result.provider_username = provider_username
+
+ return True
+
+
+def delete_oauth_account(user_id: str, provider: str) -> bool:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.user_id == user_id,
+ UserOAuthAccount.provider == provider,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .first()
+ )
+ if not result:
+ return False
+
+ result.delete_flag = "Y"
+ result.updated_by = user_id
+ return True
+
+
+def reactivate_oauth_account(
+ provider: str,
+ provider_user_id: str,
+ user_id: str,
+ provider_email: Optional[str] = None,
+ provider_username: Optional[str] = None,
+ tenant_id: Optional[str] = None,
+) -> bool:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.provider == provider,
+ UserOAuthAccount.provider_user_id == provider_user_id,
+ UserOAuthAccount.delete_flag == "Y",
+ )
+ .first()
+ )
+ if not result:
+ return False
+
+ result.delete_flag = "N"
+ result.user_id = user_id
+ result.updated_by = user_id
+ if provider_email is not None:
+ result.provider_email = provider_email
+ if provider_username is not None:
+ result.provider_username = provider_username
+ if tenant_id is not None:
+ result.tenant_id = tenant_id
+
+ return True
+
+
+def count_oauth_accounts_by_user_id(user_id: str) -> int:
+ with get_db_session() as session:
+ return (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.user_id == user_id,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .count()
+ )
+
+
+def soft_delete_all_oauth_accounts_by_user_id(user_id: str, deleted_by: str) -> int:
+ with get_db_session() as session:
+ result = (
+ session.query(UserOAuthAccount)
+ .filter(
+ UserOAuthAccount.user_id == user_id,
+ UserOAuthAccount.delete_flag == "N",
+ )
+ .all()
+ )
+ count = 0
+ for account in result:
+ account.delete_flag = "Y"
+ account.updated_by = deleted_by
+ count += 1
+ return count
\ No newline at end of file
diff --git a/backend/database/prompt_template_db.py b/backend/database/prompt_template_db.py
new file mode 100644
index 000000000..fbc286cf9
--- /dev/null
+++ b/backend/database/prompt_template_db.py
@@ -0,0 +1,165 @@
+import logging
+from typing import Optional
+
+from sqlalchemy import select, update
+
+from database.client import as_dict, filter_property, get_db_session
+from database.db_models import PromptTemplate
+
+logger = logging.getLogger("prompt_template_db")
+
+
+def create_prompt_template(template_data: dict) -> dict:
+ """Create a prompt template."""
+ with get_db_session() as session:
+ prompt_template = PromptTemplate(
+ **filter_property(template_data, PromptTemplate)
+ )
+ prompt_template.delete_flag = "N"
+ session.add(prompt_template)
+ session.flush()
+ return as_dict(prompt_template)
+
+
+def upsert_prompt_template_by_id(template_id: int, template_data: dict, user_id: str) -> dict:
+ """Create or update a prompt template with a fixed template ID."""
+ with get_db_session() as session:
+ prompt_template = session.query(PromptTemplate).filter(
+ PromptTemplate.template_id == template_id,
+ ).first()
+
+ filtered_data = filter_property(template_data, PromptTemplate)
+ if prompt_template:
+ for key, value in filtered_data.items():
+ setattr(prompt_template, key, value)
+ prompt_template.updated_by = user_id
+ else:
+ prompt_template = PromptTemplate(**filtered_data)
+ prompt_template.template_id = template_id
+ prompt_template.delete_flag = filtered_data.get("delete_flag", "N")
+ session.add(prompt_template)
+
+ session.flush()
+ return as_dict(prompt_template)
+
+
+def update_prompt_template(template_id: int, template_data: dict, user_id: str) -> dict:
+ """Update a prompt template."""
+ with get_db_session() as session:
+ prompt_template = session.query(PromptTemplate).filter(
+ PromptTemplate.template_id == template_id,
+ PromptTemplate.delete_flag == "N",
+ ).first()
+
+ if not prompt_template:
+ raise ValueError("prompt template not found")
+
+ for key, value in filter_property(template_data, PromptTemplate).items():
+ if value is None:
+ continue
+ setattr(prompt_template, key, value)
+
+ prompt_template.updated_by = user_id
+ session.flush()
+ return as_dict(prompt_template)
+
+
+def delete_prompt_template(template_id: int, user_id: str) -> int:
+ """Soft-delete a prompt template."""
+ with get_db_session() as session:
+ result = session.execute(
+ update(PromptTemplate)
+ .where(
+ PromptTemplate.template_id == template_id,
+ PromptTemplate.delete_flag == "N",
+ )
+ .values(delete_flag="Y", updated_by=user_id)
+ )
+ return result.rowcount
+
+
+def query_prompt_templates_by_user(
+ tenant_id: str,
+ user_id: str,
+ template_type: str = "agent_generate",
+) -> list[dict]:
+ """Query prompt templates by tenant and user."""
+ with get_db_session() as session:
+ templates = session.query(PromptTemplate).filter(
+ PromptTemplate.tenant_id == tenant_id,
+ PromptTemplate.user_id == user_id,
+ PromptTemplate.template_type == template_type,
+ PromptTemplate.delete_flag == "N",
+ ).order_by(PromptTemplate.update_time.desc(), PromptTemplate.template_id.desc()).all()
+ return [as_dict(template) for template in templates]
+
+
+def get_prompt_template_by_id(
+ template_id: int,
+ tenant_id: str,
+ user_id: str,
+ template_type: str = "agent_generate",
+) -> Optional[dict]:
+ """Get a prompt template by ID."""
+ with get_db_session() as session:
+ template = session.query(PromptTemplate).filter(
+ PromptTemplate.template_id == template_id,
+ PromptTemplate.tenant_id == tenant_id,
+ PromptTemplate.user_id == user_id,
+ PromptTemplate.template_type == template_type,
+ PromptTemplate.delete_flag == "N",
+ ).first()
+ return as_dict(template) if template else None
+
+
+def get_prompt_template_by_name(
+ template_name: str,
+ tenant_id: str,
+ user_id: str,
+ template_type: str = "agent_generate",
+) -> Optional[dict]:
+ """Get a prompt template by name."""
+ with get_db_session() as session:
+ template = session.query(PromptTemplate).filter(
+ PromptTemplate.template_name == template_name,
+ PromptTemplate.tenant_id == tenant_id,
+ PromptTemplate.user_id == user_id,
+ PromptTemplate.template_type == template_type,
+ PromptTemplate.delete_flag == "N",
+ ).first()
+ return as_dict(template) if template else None
+
+
+def get_prompt_template_by_template_id(
+ template_id: int,
+ template_type: str = "agent_generate",
+ include_deleted: bool = False,
+) -> Optional[dict]:
+ """Get a prompt template by template ID regardless of owner."""
+ with get_db_session() as session:
+ query = session.query(PromptTemplate).filter(
+ PromptTemplate.template_id == template_id,
+ PromptTemplate.template_type == template_type,
+ )
+ if not include_deleted:
+ query = query.filter(PromptTemplate.delete_flag == "N")
+ template = query.first()
+ return as_dict(template) if template else None
+
+
+def query_prompt_template_names(
+ tenant_id: str,
+ user_id: str,
+ template_type: str = "agent_generate",
+) -> set[str]:
+ """Query all active prompt template names for the current user."""
+ with get_db_session() as session:
+ rows = session.execute(
+ select(PromptTemplate.template_name).where(
+ PromptTemplate.tenant_id == tenant_id,
+ PromptTemplate.user_id == user_id,
+ PromptTemplate.template_type == template_type,
+ PromptTemplate.delete_flag == "N",
+ )
+ ).all()
+ return {row[0] for row in rows if row and row[0]}
diff --git a/backend/database/remote_mcp_db.py b/backend/database/remote_mcp_db.py
index d535f9fba..b08769437 100644
--- a/backend/database/remote_mcp_db.py
+++ b/backend/database/remote_mcp_db.py
@@ -15,16 +15,31 @@ def create_mcp_record(mcp_data: Dict[str, Any], tenant_id: str, user_id: str):
:param tenant_id: Tenant ID
:param user_id: User ID
:return: Created MCP record
+
+ Note: Only fields defined in the McpRecord model are inserted.
+ Fields like 'transport_type' and 'version' are not part of McpRecord
+ and will be ignored.
"""
+ # Filter to only include fields that exist in the model
+ # McpRecord fields: mcp_id, tenant_id, user_id, mcp_name, mcp_server, status,
+ # container_id, container_port, authorization_token, source, registry_json,
+ # config_json, enabled, tags, description, create_time, update_time, created_by, updated_by, delete_flag
+ allowed_fields = {
+ 'mcp_name', 'mcp_server', 'status', 'container_id', 'container_port',
+ 'authorization_token', 'custom_headers', 'source', 'registry_json', 'config_json',
+ 'enabled', 'tags', 'description'
+ }
+
+ filtered_data = {k: v for k, v in mcp_data.items() if k in allowed_fields and v is not None}
+ filtered_data.update({
+ "tenant_id": tenant_id,
+ "user_id": user_id,
+ "created_by": user_id,
+ "updated_by": user_id,
+ "delete_flag": "N"
+ })
with get_db_session() as session:
- mcp_data.update({
- "tenant_id": tenant_id,
- "user_id": user_id,
- "created_by": user_id,
- "updated_by": user_id,
- "delete_flag": "N"
- })
- new_mcp = McpRecord(**filter_property(mcp_data, McpRecord))
+ new_mcp = McpRecord(**filtered_data)
session.add(new_mcp)
@@ -80,7 +95,7 @@ def update_mcp_status_by_name_and_url(mcp_name: str, mcp_server: str, tenant_id:
).update({"status": status, "updated_by": user_id})
-def get_mcp_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]:
+def get_mcp_records_by_tenant(tenant_id: str, tag: str | None = None) -> List[Dict[str, Any]]:
"""
Get all MCP records for a tenant
@@ -88,14 +103,139 @@ def get_mcp_records_by_tenant(tenant_id: str) -> List[Dict[str, Any]]:
:return: List of MCP records
"""
with get_db_session() as session:
- mcp_records = session.query(McpRecord).filter(
+ query = session.query(McpRecord).filter(
McpRecord.tenant_id == tenant_id,
McpRecord.delete_flag != 'Y'
- ).order_by(McpRecord.create_time.desc()).all()
+ )
+
+ if tag:
+ query = query.filter(McpRecord.tags.any(tag))
+
+ mcp_records = query.order_by(McpRecord.create_time.desc()).all()
return [as_dict(record) for record in mcp_records]
+def get_mcp_records_by_container_port(container_port: int) -> List[Dict[str, Any]]:
+ """
+ Get enabled MCP records that already use the given container port.
+
+ The lookup is global.
+ """
+ with get_db_session() as session:
+ query = session.query(McpRecord).filter(
+ McpRecord.container_port == container_port,
+ McpRecord.delete_flag != 'Y'
+ )
+
+ records = query.order_by(McpRecord.create_time.desc()).all()
+ return [as_dict(record) for record in records]
+
+
+def update_mcp_record_manage_fields_by_id(
+ *,
+ mcp_id: int,
+ tenant_id: str,
+ user_id: str,
+ name: str,
+ server_url: str,
+ description: str | None,
+ tags: List[str] | None,
+ source: str | None,
+ authorization_token: str | None,
+ custom_headers: Dict[str, Any] | None,
+ config_json: Dict[str, Any] | None,
+) -> None:
+ with get_db_session() as session:
+ session.query(McpRecord).filter(
+ McpRecord.mcp_id == mcp_id,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y'
+ ).update(
+ {
+ "mcp_name": name,
+ "mcp_server": server_url,
+ "description": description,
+ "tags": tags or [],
+ "source": source,
+ "authorization_token": authorization_token,
+ "custom_headers": custom_headers,
+ "config_json": config_json,
+ "updated_by": user_id,
+ }
+ )
+
+
+def update_mcp_record_enabled_by_id(
+ *,
+ mcp_id: int,
+ tenant_id: str,
+ user_id: str,
+ enabled: bool,
+) -> None:
+ with get_db_session() as session:
+ session.query(McpRecord).filter(
+ McpRecord.mcp_id == mcp_id,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y'
+ ).update({"enabled": enabled, "updated_by": user_id})
+
+
+def update_mcp_record_status_by_id(
+ *,
+ mcp_id: int,
+ tenant_id: str,
+ user_id: str,
+ status: bool,
+) -> None:
+ with get_db_session() as session:
+ session.query(McpRecord).filter(
+ McpRecord.mcp_id == mcp_id,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y'
+ ).update({"status": status, "updated_by": user_id})
+
+
+def update_mcp_record_container_fields_by_id(
+ *,
+ mcp_id: int,
+ tenant_id: str,
+ user_id: str,
+ container_id: str | None,
+ container_port: int | None,
+ mcp_server: str,
+ status: bool | None,
+) -> None:
+ with get_db_session() as session:
+ session.query(McpRecord).filter(
+ McpRecord.mcp_id == mcp_id,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y'
+ ).update(
+ {
+ "container_id": container_id,
+ "container_port": container_port,
+ "mcp_server": mcp_server,
+ "status": status,
+ "updated_by": user_id,
+ }
+ )
+
+
+def delete_mcp_record_by_id(
+ *,
+ mcp_id: int,
+ tenant_id: str,
+ user_id: str,
+) -> None:
+ with get_db_session() as session:
+ session.query(McpRecord).filter(
+ McpRecord.mcp_id == mcp_id,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y'
+ ).update({"delete_flag": "Y", "updated_by": user_id})
+
+
def get_mcp_server_by_name_and_tenant(mcp_name: str, tenant_id: str) -> str:
"""
Get MCP server address by name and tenant ID
@@ -134,6 +274,26 @@ def get_mcp_authorization_token_by_name_and_url(mcp_name: str, mcp_server: str,
return mcp_record.authorization_token if mcp_record else None
+def get_mcp_custom_headers_by_name_and_url(mcp_name: str, mcp_server: str, tenant_id: str) -> Dict[str, Any] | None:
+ """
+ Get MCP custom headers by name, URL and tenant ID
+
+ :param mcp_name: MCP name
+ :param mcp_server: MCP server URL
+ :param tenant_id: Tenant ID
+ :return: Custom headers dict, None if not found
+ """
+ with get_db_session() as session:
+ mcp_record = session.query(McpRecord).filter(
+ McpRecord.mcp_name == mcp_name,
+ McpRecord.mcp_server == mcp_server,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y'
+ ).first()
+
+ return mcp_record.custom_headers if mcp_record else None
+
+
def update_mcp_record_by_name_and_url(
update_data,
tenant_id: str,
@@ -161,6 +321,10 @@ def update_mcp_record_by_name_and_url(
if hasattr(update_data, 'new_authorization_token'):
update_fields["authorization_token"] = update_data.new_authorization_token
+ # Update custom_headers if provided
+ if hasattr(update_data, 'custom_headers'):
+ update_fields["custom_headers"] = update_data.custom_headers
+
with get_db_session() as session:
session.query(McpRecord).filter(
McpRecord.mcp_name == update_data.current_service_name,
@@ -187,6 +351,26 @@ def check_mcp_name_exists(mcp_name: str, tenant_id: str) -> bool:
return mcp_record is not None
+def check_enabled_mcp_name_exists(mcp_name: str, tenant_id: str) -> bool:
+ """
+ Check if enabled MCP name already exists for a tenant.
+
+ Only enabled records participate in conflict checks for runtime container startup.
+
+ :param mcp_name: MCP name
+ :param tenant_id: Tenant ID
+ :return: True if enabled name exists, False otherwise
+ """
+ with get_db_session() as session:
+ mcp_record = session.query(McpRecord).filter(
+ McpRecord.mcp_name == mcp_name,
+ McpRecord.tenant_id == tenant_id,
+ McpRecord.delete_flag != 'Y',
+ McpRecord.enabled.is_(True),
+ ).first()
+ return mcp_record is not None
+
+
def get_mcp_record_by_id_and_tenant(mcp_id: int, tenant_id: str) -> Dict[str, Any] | None:
"""
Get MCP record by ID and tenant ID
diff --git a/backend/database/skill_db.py b/backend/database/skill_db.py
index 2a718800b..6a3f69069 100644
--- a/backend/database/skill_db.py
+++ b/backend/database/skill_db.py
@@ -18,8 +18,7 @@ def _params_value_for_db(raw: Any) -> Any:
"""Strip UI/YAML comment metadata, then JSON round-trip for the DB JSON column."""
if raw is None:
return None
- stripped = strip_params_comments_for_db(raw)
- return json.loads(json.dumps(stripped, default=str))
+ return json.loads(json.dumps(strip_params_comments_for_db(raw), default=str))
def create_or_update_skill_by_skill_info(skill_info, tenant_id: str, user_id: str, version_no: int = 0):
@@ -155,6 +154,31 @@ def delete_skill_instances_by_skill_id(skill_id: int, user_id: str):
})
+def delete_skill_instances_by_tenant(tenant_id: str, user_id: str) -> int:
+ """Soft delete all skill instances for a tenant.
+
+ This is called when a tenant is deleted to clean up all skill instances.
+
+ Args:
+ tenant_id: Tenant ID to delete skill instances for
+ user_id: User ID for the updated_by field
+
+ Returns:
+ Number of skill instances soft-deleted
+ """
+ with get_db_session() as session:
+ count = session.query(SkillInstance).filter(
+ SkillInstance.tenant_id == tenant_id,
+ SkillInstance.delete_flag != 'Y'
+ ).update({
+ SkillInstance.delete_flag: 'Y',
+ 'updated_by': user_id
+ })
+ session.commit()
+ return count
+
+
+
# ============== SkillInfo Repository Functions ==============
@@ -171,10 +195,12 @@ def _to_dict(skill: SkillInfo) -> Dict[str, Any]:
return {
"skill_id": skill.skill_id,
"name": skill.skill_name,
+ "tenant_id": skill.tenant_id,
"description": skill.skill_description,
"tags": skill.skill_tags or [],
"content": skill.skill_content or "",
- "params": skill.params if skill.params is not None else {},
+ "config_schemas": skill.config_schemas,
+ "config_values": skill.config_values,
"source": skill.source,
"created_by": skill.created_by,
"create_time": skill.create_time.isoformat() if skill.create_time else None,
@@ -183,10 +209,15 @@ def _to_dict(skill: SkillInfo) -> Dict[str, Any]:
}
-def list_skills() -> List[Dict[str, Any]]:
- """List all skills from database."""
+def list_skills(tenant_id: str) -> List[Dict[str, Any]]:
+ """List all skills for a tenant from database.
+
+ Args:
+ tenant_id: Tenant ID for filtering skills
+ """
with get_db_session() as session:
skills = session.query(SkillInfo).filter(
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != 'Y'
).all()
results = []
@@ -197,11 +228,37 @@ def list_skills() -> List[Dict[str, Any]]:
return results
-def get_skill_by_name(skill_name: str) -> Optional[Dict[str, Any]]:
- """Get skill by name."""
+def get_skill_by_name(skill_name: str, tenant_id: str) -> Optional[Dict[str, Any]]:
+ """Get skill by name within a tenant.
+
+ Args:
+ skill_name: Skill name
+ tenant_id: Tenant ID for filtering
+ """
with get_db_session() as session:
skill = session.query(SkillInfo).filter(
SkillInfo.skill_name == skill_name,
+ SkillInfo.tenant_id == tenant_id,
+ SkillInfo.delete_flag != 'Y'
+ ).first()
+ if skill:
+ result = _to_dict(skill)
+ result["tool_ids"] = _get_tool_ids(session, skill.skill_id)
+ return result
+ return None
+
+
+def get_skill_by_id(skill_id: int, tenant_id: str) -> Optional[Dict[str, Any]]:
+ """Get skill by ID within a tenant.
+
+ Args:
+ skill_id: Skill ID
+ tenant_id: Tenant ID for filtering
+ """
+ with get_db_session() as session:
+ skill = session.query(SkillInfo).filter(
+ SkillInfo.skill_id == skill_id,
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != 'Y'
).first()
if skill:
@@ -211,8 +268,15 @@ def get_skill_by_name(skill_name: str) -> Optional[Dict[str, Any]]:
return None
-def get_skill_by_id(skill_id: int) -> Optional[Dict[str, Any]]:
- """Get skill by ID."""
+def get_skill_by_id_global(skill_id: int) -> Optional[Dict[str, Any]]:
+ """Get skill by ID without tenant filter (global lookup for template skills).
+
+ Args:
+ skill_id: Skill ID
+
+ Returns:
+ Skill dict or None if not found.
+ """
with get_db_session() as session:
skill = session.query(SkillInfo).filter(
SkillInfo.skill_id == skill_id,
@@ -225,15 +289,42 @@ def get_skill_by_id(skill_id: int) -> Optional[Dict[str, Any]]:
return None
-def create_skill(skill_data: Dict[str, Any]) -> Dict[str, Any]:
- """Create a new skill."""
+def list_global_official_skills() -> List[Dict[str, Any]]:
+ """List all global official skills (tenant_id IS NULL) for installation.
+
+ Returns:
+ List of skill dicts with skill_id, name, description, source.
+ """
+ with get_db_session() as session:
+ skills = session.query(SkillInfo).filter(
+ SkillInfo.tenant_id.is_(None),
+ SkillInfo.delete_flag != 'Y',
+ SkillInfo.source == 'official'
+ ).all()
+ return [_to_dict(s) for s in skills]
+ if skill:
+ result = _to_dict(skill)
+ result["tool_ids"] = _get_tool_ids(session, skill.skill_id)
+ return result
+ return None
+
+
+def create_skill(skill_data: Dict[str, Any], tenant_id: str) -> Dict[str, Any]:
+ """Create a new skill for a tenant.
+
+ Args:
+ skill_data: Skill data dict
+ tenant_id: Tenant ID for the skill
+ """
with get_db_session() as session:
skill = SkillInfo(
skill_name=skill_data["name"],
+ tenant_id=tenant_id,
skill_description=skill_data.get("description", ""),
skill_tags=skill_data.get("tags", []),
skill_content=skill_data.get("content", ""),
- params=_params_value_for_db(skill_data.get("params")),
+ config_schemas=_params_value_for_db(skill_data.get("config_schemas")),
+ config_values=_params_value_for_db(skill_data.get("config_values")),
source=skill_data.get("source", "custom"),
created_by=skill_data.get("created_by"),
create_time=datetime.now(),
@@ -265,13 +356,15 @@ def create_skill(skill_data: Dict[str, Any]) -> Dict[str, Any]:
def update_skill(
skill_name: str,
skill_data: Dict[str, Any],
+ tenant_id: str,
updated_by: Optional[str] = None,
) -> Dict[str, Any]:
- """Update an existing skill.
+ """Update an existing skill for a tenant.
Args:
- skill_name: Skill name (unique key).
+ skill_name: Skill name (unique key within tenant).
skill_data: Business fields to update (description, content, tags, source, params, tool_ids).
+ tenant_id: Tenant ID for filtering.
updated_by: Actor user id from server-side auth; never taken from the HTTP request body.
Notes:
@@ -282,6 +375,7 @@ def update_skill(
with get_db_session() as session:
skill = session.query(SkillInfo).filter(
SkillInfo.skill_name == skill_name,
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != "Y",
).first()
@@ -302,8 +396,10 @@ def update_skill(
row_values["skill_tags"] = skill_data["tags"]
if "source" in skill_data:
row_values["source"] = skill_data["source"]
- if "params" in skill_data:
- row_values["params"] = _params_value_for_db(skill_data["params"])
+ if "config_schemas" in skill_data:
+ row_values["config_schemas"] = _params_value_for_db(skill_data["config_schemas"])
+ if "config_values" in skill_data:
+ row_values["config_values"] = _params_value_for_db(skill_data["config_values"])
session.execute(
sa_update(SkillInfo)
@@ -331,6 +427,7 @@ def update_skill(
refreshed = session.query(SkillInfo).filter(
SkillInfo.skill_id == skill_id,
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != "Y",
).first()
if not refreshed:
@@ -344,11 +441,12 @@ def update_skill(
return result
-def delete_skill(skill_name: str, updated_by: Optional[str] = None) -> bool:
- """Soft delete a skill (mark as deleted).
+def delete_skill(skill_name: str, tenant_id: str, updated_by: Optional[str] = None) -> bool:
+ """Soft delete a skill for a tenant (mark as deleted).
Args:
skill_name: Name of the skill to delete
+ tenant_id: Tenant ID for filtering
updated_by: User ID of the user performing the delete
Returns:
@@ -357,6 +455,7 @@ def delete_skill(skill_name: str, updated_by: Optional[str] = None) -> bool:
with get_db_session() as session:
skill = session.query(SkillInfo).filter(
SkillInfo.skill_name == skill_name,
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != 'Y'
).first()
@@ -412,11 +511,12 @@ def get_tool_ids_by_names(tool_names: List[str], tenant_id: str) -> List[int]:
return [t.tool_id for t in tools]
-def get_tool_names_by_skill_name(skill_name: str) -> List[str]:
- """Get tool names for a skill by skill name.
+def get_tool_names_by_skill_name(skill_name: str, tenant_id: str) -> List[str]:
+ """Get tool names for a skill by skill name within a tenant.
Args:
skill_name: Name of the skill
+ tenant_id: Tenant ID for filtering
Returns:
List of tool names
@@ -424,6 +524,7 @@ def get_tool_names_by_skill_name(skill_name: str) -> List[str]:
with get_db_session() as session:
skill = session.query(SkillInfo).filter(
SkillInfo.skill_name == skill_name,
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != 'Y'
).first()
if not skill:
@@ -432,11 +533,12 @@ def get_tool_names_by_skill_name(skill_name: str) -> List[str]:
return get_tool_names_by_ids(session, tool_ids)
-def get_skill_with_tool_names(skill_name: str) -> Optional[Dict[str, Any]]:
- """Get skill with tool names included."""
+def get_skill_with_tool_names(skill_name: str, tenant_id: str) -> Optional[Dict[str, Any]]:
+ """Get skill with tool names included for a tenant."""
with get_db_session() as session:
skill = session.query(SkillInfo).filter(
SkillInfo.skill_name == skill_name,
+ SkillInfo.tenant_id == tenant_id,
SkillInfo.delete_flag != 'Y'
).first()
if skill:
@@ -446,3 +548,74 @@ def get_skill_with_tool_names(skill_name: str) -> Optional[Dict[str, Any]]:
result["allowed_tools"] = get_tool_names_by_ids(session, tool_ids)
return result
return None
+
+
+# ============== Skill Initialization Functions ==============
+
+
+def check_skill_list_initialized(tenant_id: str) -> bool:
+ """Check if skill list has been initialized for the tenant.
+
+ Args:
+ tenant_id: Tenant ID to check
+
+ Returns:
+ True if skills have been initialized, False otherwise
+ """
+ with get_db_session() as session:
+ count = session.query(SkillInfo).filter(
+ SkillInfo.tenant_id == tenant_id,
+ SkillInfo.delete_flag != 'Y',
+ SkillInfo.source != 'custom'
+ ).count()
+ return count > 0
+
+
+def upsert_scanned_skills(skills: List[Dict[str, Any]], user_id: str, tenant_id: str):
+ """Scan local skill directories and upsert skill metadata to ag_skill_info_t.
+
+ Mirrors update_tool_table_from_scan_tool_list() in tool_db.py.
+ All fields are unconditionally overwritten on every scan (same as tools).
+
+ Args:
+ skills: List of skill dicts with name, description, tags, content, params, inputs, source
+ user_id: User ID for tracking who initiated the scan
+ tenant_id: Tenant ID for the skills
+ """
+ with get_db_session() as session:
+ existing_skills = session.query(SkillInfo).filter(
+ SkillInfo.tenant_id == tenant_id,
+ SkillInfo.delete_flag != 'Y'
+ ).all()
+ existing_dict = {s.skill_name: s for s in existing_skills}
+
+ for skill_data in skills:
+ skill_name = skill_data.get("name")
+ if not skill_name:
+ continue
+
+ if skill_name in existing_dict:
+ existing = existing_dict[skill_name]
+ # Unconditionally overwrite all fields on every scan (same as tools)
+ existing.skill_description = skill_data.get("description", "")
+ existing.skill_tags = skill_data.get("tags", [])
+ existing.skill_content = skill_data.get("content", "")
+ existing.config_schemas = _params_value_for_db(skill_data.get("config_schemas"))
+ existing.config_values = _params_value_for_db(skill_data.get("config_values"))
+ existing.updated_by = user_id
+ else:
+ new_skill = SkillInfo(
+ skill_name=skill_name,
+ tenant_id=tenant_id,
+ skill_description=skill_data.get("description", ""),
+ skill_tags=skill_data.get("tags", []),
+ skill_content=skill_data.get("content", ""),
+ config_schemas=_params_value_for_db(skill_data.get("config_schemas")),
+ config_values=_params_value_for_db(skill_data.get("config_values")),
+ source=skill_data.get("source", "official"),
+ created_by=user_id,
+ updated_by=user_id,
+ create_time=datetime.now(),
+ update_time=datetime.now(),
+ )
+ session.add(new_skill)
diff --git a/backend/database/user_tenant_db.py b/backend/database/user_tenant_db.py
index f1294f8a7..b147eac49 100644
--- a/backend/database/user_tenant_db.py
+++ b/backend/database/user_tenant_db.py
@@ -75,6 +75,37 @@ def insert_user_tenant(user_id: str, tenant_id: str, user_role: str = "USER", us
session.add(user_tenant)
+def upsert_user_tenant(user_id: str, tenant_id: str, user_role: str = "USER", user_email: str = None) -> Dict[str, Any]:
+ """
+ Create or update the active user-tenant relationship for an external identity login.
+ """
+ with get_db_session() as session:
+ result = session.query(UserTenant).filter(
+ UserTenant.user_id == user_id,
+ UserTenant.delete_flag == "N"
+ ).first()
+
+ if result:
+ result.tenant_id = tenant_id
+ result.user_role = user_role
+ if user_email is not None:
+ result.user_email = user_email
+ result.updated_by = user_id
+ else:
+ result = UserTenant(
+ user_id=user_id,
+ tenant_id=tenant_id,
+ user_role=user_role,
+ user_email=user_email,
+ created_by=user_id,
+ updated_by=user_id
+ )
+ session.add(result)
+
+ session.flush()
+ return as_dict(result)
+
+
def get_users_by_tenant_id(tenant_id: str, page: Optional[int] = 1, page_size: Optional[int] = 20,
sort_by: str = "created_at", sort_order: str = "desc") -> Dict[str, Any]:
"""
diff --git a/backend/mcp_service.py b/backend/mcp_service.py
index 0d8ab4c1b..4629d42ad 100644
--- a/backend/mcp_service.py
+++ b/backend/mcp_service.py
@@ -70,7 +70,7 @@ async def run(self, arguments: Dict[str, Any]) -> Any:
nexent_mcp = FastMCP(name="nexent_mcp")
-nexent_mcp.mount(local_mcp_service.name, local_mcp_service)
+nexent_mcp.mount(local_mcp_service, local_mcp_service.name)
_openapi_mcp_services: Dict[str, FastMCP] = {}
@@ -188,7 +188,8 @@ def _sanitize_function_name(name: str) -> str:
def register_openapi_service(
service_name: str,
openapi_json: Dict[str, Any],
- server_url: str
+ server_url: str,
+ headers_template: Dict[str, str],
) -> bool:
"""
Register an OpenAPI service using FastMCP.from_openapi().
@@ -222,7 +223,7 @@ def register_openapi_service(
openapi_spec["servers"] = [{"url": server_url}]
# Create HTTP client for the underlying REST API
- client = httpx.AsyncClient(base_url=server_url, timeout=30.0)
+ client = httpx.AsyncClient(base_url=server_url, timeout=120.0, headers=headers_template)
# Create FastMCP instance from OpenAPI spec
mcp_server = FastMCP.from_openapi(
@@ -239,7 +240,7 @@ def register_openapi_service(
_openapi_mcp_services[service_name] = mcp_server
# Mount to the main MCP server
- nexent_mcp.mount(service_name, mcp_server)
+ nexent_mcp.mount(mcp_server, service_name)
logger.info(f"Registered OpenAPI service: {service_name}")
return True
@@ -320,13 +321,14 @@ def refresh_openapi_services_by_tenant(tenant_id: str) -> Dict[str, Any]:
service_name = service.get("mcp_service_name")
openapi_json = service.get("openapi_json")
server_url = service.get("server_url")
+ headers_template = service.get("headers_template")
if not openapi_json:
logger.warning(f"Service '{service_name}' has no OpenAPI JSON, skipping")
skipped_count += 1
continue
- if register_openapi_service(service_name, openapi_json, server_url):
+ if register_openapi_service(service_name, openapi_json, server_url, headers_template):
registered_count += 1
else:
skipped_count += 1
@@ -394,6 +396,7 @@ def refresh_single_openapi_service(service_name: str, tenant_id: str) -> Dict[st
# Re-register with fresh data
openapi_json = service_data.get("openapi_json")
server_url = service_data.get("server_url")
+ headers_template = service_data.get("headers_template")
if not openapi_json:
logger.warning(f"Service '{service_name}' has no OpenAPI JSON")
@@ -403,7 +406,7 @@ def refresh_single_openapi_service(service_name: str, tenant_id: str) -> Dict[st
"error": "No OpenAPI JSON found"
}
- success = register_openapi_service(service_name, openapi_json, server_url)
+ success = register_openapi_service(service_name, openapi_json, server_url, headers_template)
return {
"status": "refreshed" if success else "error",
"service_name": service_name,
diff --git a/backend/prompts/managed_system_prompt_template_en.yaml b/backend/prompts/managed_system_prompt_template_en.yaml
index 167be1f2b..62e16e946 100644
--- a/backend/prompts/managed_system_prompt_template_en.yaml
+++ b/backend/prompts/managed_system_prompt_template_en.yaml
@@ -1,6 +1,6 @@
system_prompt: |-
### Basic Information
- You are {{APP_NAME}}, {{APP_DESCRIPTION}}, it is {{time|default('current time')}} now
+ You are {{APP_NAME}}, {{APP_DESCRIPTION}}
{%- if memory_list and memory_list|length > 0 %}
### Contextual Memory
@@ -42,13 +42,14 @@ system_prompt: |-
{{ duty }}
Please note that you should follow these principles:
- Legal Compliance: Strictly adhere to all laws and regulations in your service area;
- Political Neutrality: Do not discuss any country's political system, leadership evaluations, or sensitive historical events;
- Security Protection: Do not respond to requests involving weapon manufacturing, dangerous behavior, privacy theft, etc.;
- Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate universal values.
+ Behavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited;
+ Legal Compliance: Comply with laws and regulations of the business operating jurisdiction;
+ Political Neutrality: Maintain political neutrality and avoid initiating political discussions;
+ Security Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities;
+ Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards.
### Execution Process
- To solve tasks, you must plan forward through a series of steps in a loop of 'Think:', 'Code:', and 'Observe Results:' sequences:
+ To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.**
1. Think:
- Determine which tools need to be used to obtain information or take action
@@ -63,9 +64,12 @@ system_prompt: |-
- Call tools correctly according to format specifications
- To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code
- Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code.
+ - **IMPORTANT**: After code execution, the system will return content with "Observation:" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.**
- 3. Observe Results:
- - View code execution results
+ 3. Self-verification:
+ - After critical events (tool calls, retrieval results, code execution, and final-answer preparation), the system may run explicit verification.
+ - If verification reports errors, insufficient evidence, incomplete parameters, or unreliable results, you must repair the issue, gather more evidence, call tools again, or clearly state what cannot be completed.
+ - The final answer is shown to the user only after verification passes. If the system returns Verification feedback, treat it as a real observation and continue revising.
After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop.
@@ -96,15 +100,31 @@ system_prompt: |-
{%- if tools and tools.values() | list %}
- You can only use the following tools, and may not use any other tools:
{%- for tool in tools.values() %}
+ {%- if tool.source == 'mcp' %}
+ - [MCP] {{ tool.name }}: {{ tool.description }}
+ Accepts input: {{tool.inputs}}
+ Returns output type: {{tool.output_type}}
+ {%- else %}
- {{ tool.name }}: {{ tool.description }}
Accepts input: {{tool.inputs}}
Returns output type: {{tool.output_type}}
+ {%- endif %}
{%- endfor %}
{%- if knowledge_base_summary %}
- knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question:
{{ knowledge_base_summary }}
{%- endif %}
+
+ ### File URL Usage Guide
+ When processing user-uploaded files, choose the correct URL based on tool type:
+ 1. **Calling tools marked with [MCP]** (external tools that run outside Nexent):
+ → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`)
+ Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything.
+ 2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):
+ → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`)
+ Reason: Internal tools run inside Nexent and can directly access MinIO storage
+
{%- else %}
- No tools are currently available
{%- endif %}
@@ -152,5 +172,24 @@ planning:
final_answer:
pre_messages: |-
+ You have reached the maximum step limit. Please provide a comprehensive summary of:
+ 1. What has been accomplished so far
+ 2. Key findings or results
+ 3. Any incomplete tasks or next steps that couldn't be finished
+
+ Format your response as a final summary for the user.
+
+ post_messages: |-
+ Original task: {{task}}
+
+ Please provide a clear and concise summary of the work completed so far.
+
+
+verification:
+ pre_messages: |-
+ You are a strict verifier for a ReAct agent. Judge reliability only from the task, candidate answer, tool outputs, and observations. Do not output hidden chain-of-thought.
+ You must output JSON only.
post_messages: |-
+ Verify whether the candidate answer covers the user's intent, is grounded in observations, handles tool errors, uses trustworthy citations, and is formatted for users.
+ Output fields: passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note.
diff --git a/backend/prompts/managed_system_prompt_template_zh.yaml b/backend/prompts/managed_system_prompt_template_zh.yaml
index c42d61c66..da3d53469 100644
--- a/backend/prompts/managed_system_prompt_template_zh.yaml
+++ b/backend/prompts/managed_system_prompt_template_zh.yaml
@@ -2,7 +2,7 @@ system_prompt: |-
### 基本信息
- 你是{{APP_NAME}},{{APP_DESCRIPTION}},现在是{{time|default('当前时间')}},用户ID为{{user_id}}
+ 你是{{APP_NAME}},{{APP_DESCRIPTION}},用户ID为{{user_id}}
{%- if memory_list and memory_list|length > 0 %}
### 上下文记忆
@@ -46,6 +46,7 @@ system_prompt: |-
{{ duty }}
请注意,你应该遵守以下原则:
+ 行为安全:严禁直接执行代码进行文件的增删改操作,只能使用提供的文件操作类工具;
法律合规:严格遵守服务地区的所有法律法规;
政治中立:不讨论任何国家的政治体制、领导人评价或敏感历史事件;
安全防护:不响应涉及武器制造、危险行为、隐私窃取等内容的请求;
@@ -83,7 +84,7 @@ system_prompt: |-
value = config["key1"]["key2"]
print(value)
- 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的��程。
+ 3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。
4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用:
代码:
@@ -113,8 +114,7 @@ system_prompt: |-
{%- endif %}
### 执行流程
- 要解决任务,你必须通过一系列步骤向前规划,以'思考:'、'代码:'和'观察结果:'序列的循环进行:
-
+ 要解决任务,你必须通过一系列步骤向前规划,以'思考:'、'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。**
1. 思考:
- 确定需要使用哪些工具获取信息或行动
{%- if memory_list and memory_list|length > 0 %}
@@ -128,9 +128,12 @@ system_prompt: |-
- 根据格式规范正确调用工具
- 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码
- 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。
+ - **重要**:代码执行后,系统会返回 "Observation:" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。
- 3. 观察结果:
- - 查看代码执行结果
+ 3. 自验证:
+ - 关键事件(工具调用、检索结果、代码执行、准备最终回答)后,系统会进行显式自验证。
+ - 如果自验证提示存在错误、证据不足、参数不完整或结果不可靠,必须优先修正、补充证据、重新调用工具,或清晰说明无法完成的部分。
+ - 最终回答只有在自验证通过后才会展示给用户;如果系统返回 Verification feedback,请把它视为真实观察结果继续修正,不要忽略。
在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。
@@ -161,9 +164,15 @@ system_prompt: |-
{%- if tools and tools.values() | list %}
- 你只能使用以下工具,不得使用任何其他工具:
{%- for tool in tools.values() %}
+ {%- if tool.source == 'mcp' %}
+ - [MCP] {{ tool.name }}: {{ tool.description }}
+ 接受输入: {{tool.inputs}}
+ 返回输出类型: {{tool.output_type}}
+ {%- else %}
- {{ tool.name }}: {{ tool.description }}
接受输入: {{tool.inputs}}
返回输出类型: {{tool.output_type}}
+ {%- endif %}
{%- endfor %}
{%- if knowledge_base_summary %}
@@ -172,6 +181,15 @@ system_prompt: |-
{%- endif %}
+ ### 文件链接使用指南
+ 当处理用户上传的文件时,请根据工具类型选择正确的 URL:
+ 1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外):
+ → 使用 **presigned_url**(已包含代理前缀,格式:`http://.../api/nb/v1/file/fetch?presigned_url=...`)
+ 直接使用用户上传文件信息中提供的 **presigned_url** 字段,无需拼接。
+ 2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等):
+ → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`)
+ 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储
+
{%- else %}
- 当前没有可用的工具
{%- endif %}
@@ -199,11 +217,11 @@ system_prompt: |-
### python代码规范
1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等;
2. 只使用已定义的变量,变量将在多次调用之间持续保持;
- 3. 使用“print()”函数让下一次的模型调用看到对应变量信息;
+ 3. 使用"print()"函数让下一次的模型调用看到对应变量信息;
4. 正确使用工具的入参,使用关键字参数,不要用字典形式;
5. 避免在一轮对话中进行过多的工具调用,这会导致输出格式难以预测;
6. 只在需要时调用工具,不重复相同参数的调用;
- 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用“print()”来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;
+ 7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用"print()"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;
9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例;
10. 工具调用使用关键字参数,如:tool_name(param1="value1", param2="value2");
11. 不要放弃!你负责解决任务,而不是提供解决方向。
@@ -247,5 +265,24 @@ planning:
final_answer:
pre_messages: |-
+ 你已达到最大步数限制。请提供一份全面的工作总结,内容包括:
+ 1. 到目前为止已完成的工作
+ 2. 主要发现或结果
+ 3. 未能完成的任务或后续步骤
+
+ 请以最终总结的格式呈现给用户。
+
+ post_messages: |-
+ 原始任务:{{task}}
+
+ 请对迄今为止完成的工作进行清晰、简洁的总结。
+
+
+verification:
+ pre_messages: |-
+ 你是 ReAct 智能体的严格验证器。请仅根据任务、候选答案、工具输出和观察结果判断答案是否可靠,不要输出隐藏思维链。
+ 你必须只输出 JSON。
post_messages: |-
+ 请验证候选答案是否覆盖用户意图、是否有观察结果支撑、是否处理了工具错误、引用是否可信、格式是否适合展示。
+ 输出字段:passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note。
diff --git a/backend/prompts/manager_system_prompt_template_en.yaml b/backend/prompts/manager_system_prompt_template_en.yaml
index 28e6cb2b1..d44ed9a71 100644
--- a/backend/prompts/manager_system_prompt_template_en.yaml
+++ b/backend/prompts/manager_system_prompt_template_en.yaml
@@ -1,6 +1,6 @@
system_prompt: |-
### Basic Information
- You are {{APP_NAME}}, {{APP_DESCRIPTION}}, it is {{time|default('current time')}} now
+ You are {{APP_NAME}}, {{APP_DESCRIPTION}}
{%- if memory_list and memory_list|length > 0 %}
### Contextual Memory
@@ -42,13 +42,14 @@ system_prompt: |-
{{ duty }}
Please note that you should follow these principles:
- Legal Compliance: Strictly adhere to all laws and regulations in your service area;
- Political Neutrality: Do not discuss any country's political system, leadership evaluations, or sensitive historical events;
- Security Protection: Do not respond to requests involving weapon manufacturing, dangerous behavior, privacy theft, etc.;
- Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate universal values.
+ Behavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited;
+ Legal Compliance: Comply with laws and regulations of the business operating jurisdiction;
+ Political Neutrality: Maintain political neutrality and avoid initiating political discussions;
+ Security Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities;
+ Ethical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards.
### Execution Process
- To solve tasks, you must plan forward through a series of steps in a loop of 'Think:', 'Code:', and 'Observe Results:' sequences:
+ To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.**
1. Think:
- Analyze current task status and progress
@@ -64,10 +65,12 @@ system_prompt: |-
- Correctly call tools or agents to solve problems
- To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code
- Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code.
+ - **IMPORTANT**: After code execution, the system will return content with "Observation:" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.**
- 3. Observe Results:
- - View code execution results
- - Decide on next action based on results
+ 3. Self-verification:
+ - After critical events (tool calls, retrieval results, code execution, agent handoffs, and final-answer preparation), the system may run explicit verification.
+ - If verification reports errors, insufficient evidence, incomplete parameters, or unreliable results, you must repair the issue, gather more evidence, call tools again, or clearly state what cannot be completed.
+ - The final answer is shown to the user only after verification passes. If the system returns Verification feedback, treat it as a real observation and continue revising.
After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop.
@@ -99,15 +102,30 @@ system_prompt: |-
{%- if tools and tools.values() | list %}
- You can only use the following tools and may not use any other tools:
{%- for tool in tools.values() %}
+ {%- if tool.source == 'mcp' %}
+ - [MCP] {{ tool.name }}: {{ tool.description }}
+ Accepts input: {{tool.inputs}}
+ Returns output type: {{tool.output_type}}
+ {%- else %}
- {{ tool.name }}: {{ tool.description }}
Accepts input: {{tool.inputs}}
Returns output type: {{tool.output_type}}
+ {%- endif %}
{%- endfor %}
{%- if knowledge_base_summary %}
- knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question:
{{ knowledge_base_summary }}
{%- endif %}
+
+ ### File URL Usage Guide
+ When processing user-uploaded files, choose the correct URL based on tool type:
+ 1. **Calling tools marked with [MCP]** (external tools that run outside Nexent):
+ → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`)
+ Reason: MCP tools run on external services and cannot access internal S3 storage
+ 2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):
+ → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`)
+ Reason: Internal tools run inside Nexent and can directly access MinIO storage
{%- else %}
- No tools are currently available
{%- endif %}
@@ -198,5 +216,24 @@ planning:
final_answer:
pre_messages: |-
+ You have reached the maximum step limit. Please provide a comprehensive summary of:
+ 1. What has been accomplished so far
+ 2. Key findings or results
+ 3. Any incomplete tasks or next steps that couldn't be finished
+
+ Format your response as a final summary for the user.
+
+ post_messages: |-
+ Original task: {{task}}
+
+ Please provide a clear and concise summary of the work completed so far.
+
+
+verification:
+ pre_messages: |-
+ You are a strict verifier for a ReAct agent. Judge reliability only from the task, candidate answer, tool outputs, and observations. Do not output hidden chain-of-thought.
+ You must output JSON only.
post_messages: |-
+ Verify whether the candidate answer covers the user's intent, is grounded in observations, handles tool errors, uses trustworthy citations, and is formatted for users.
+ Output fields: passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note.
diff --git a/backend/prompts/manager_system_prompt_template_zh.yaml b/backend/prompts/manager_system_prompt_template_zh.yaml
index 015b74450..a49ced82d 100644
--- a/backend/prompts/manager_system_prompt_template_zh.yaml
+++ b/backend/prompts/manager_system_prompt_template_zh.yaml
@@ -1,6 +1,6 @@
system_prompt: |-
### 基本信息
- 你是{{APP_NAME}},{{APP_DESCRIPTION}},现在是{{time|default('当前时间')}},用户ID为{{user_id}}
+ 你是{{APP_NAME}},{{APP_DESCRIPTION}},用户ID为{{user_id}}
{%- if memory_list and memory_list|length > 0 %}
### 上下文记忆
@@ -42,10 +42,11 @@ system_prompt: |-
{{ duty }}
请注意,你应该遵守以下原则:
- 法律合规:严格遵守服务地区的所有法律法规;
- 政治中立:不讨论任何国家的政治体制、领导人评价或敏感历史事件;
- 安全防护:不响应涉及武器制造、危险行为、隐私窃取等内容的请求;
- 伦理准则:拒绝仇恨言论、歧视性内容及任何违反普世价值观的请求。
+ 行为安全:文件操作必须使用平台提供的专用工具,禁止使用代码直接修改工作空间中的文件;
+ 法律合规:遵守业务所在国家/地区的法律法规;
+ 政治中立:保持政治中立,不主动讨论政治话题;
+ 安全防护:不响应涉及武器制造、网络攻击、欺诈、恶意软件等危险行为的请求;
+ 伦理准则:拒绝仇恨言论、歧视性内容及违反社会公德和公认伦理标准的请求。
{%- if skills and skills|length > 0 %}
### 可用技能
@@ -111,7 +112,7 @@ system_prompt: |-
{%- endif %}
### 执行流程
- 要解决任务,你必须通过一系列步骤向前规划,以'思考:'、'代码:'和'观察结果:'序列的循环进行:
+ 要解决任务,你必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。**
1. 思考:
- 分析当前任务状态和进展
@@ -127,10 +128,12 @@ system_prompt: |-
- 正确调用工具或助手解决问题
- 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码
- 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。
+ - **重要**:代码执行后,系统会返回 "Observation:" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。
- 3. 观察结果:
- - 查看代码执行结果
- - 根据结果决定下一步行动
+ 3. 自验证:
+ - 关键事件(工具调用、检索结果、代码执行、助手返回、准备最终回答)后,系统会进行显式自验证。
+ - 如果自验证提示存在错误、证据不足、参数不完整或结果不可靠,必须优先修正、补充证据、重新调用工具,或清晰说明无法完成的部分。
+ - 最终回答只有在自验证通过后才会展示给用户;如果系统返回 Verification feedback,请把它视为真实观察结果继续修正,不要忽略。
在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。
@@ -162,15 +165,30 @@ system_prompt: |-
{%- if tools and tools.values() | list %}
- 你只能使用以下工具,不得使用任何其他工具:
{%- for tool in tools.values() %}
+ {%- if tool.source == 'mcp' %}
+ - [MCP] {{ tool.name }}: {{ tool.description }}
+ 接受输入: {{tool.inputs}}
+ 返回输出类型: {{tool.output_type}}
+ {%- else %}
- {{ tool.name }}: {{ tool.description }}
接受输入: {{tool.inputs}}
返回输出类型: {{tool.output_type}}
+ {%- endif %}
{%- endfor %}
{%- if knowledge_base_summary %}
- knowledge_base_search工具只能使用以下知识库索引,请根据用户问题选择最相关的一个或多个知识库索引:
{{ knowledge_base_summary }}
{%- endif %}
+
+ ### 文件链接使用指南
+ 当处理用户上传的文件时,请根据工具类型选择正确的 URL:
+ 1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外):
+ → 使用 **Download URL**(格式:`https://minio.example.com/...?token=xxx`)
+ 原因:MCP 工具运行在外部服务,无法访问内部 S3 存储
+ 2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等):
+ → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`)
+ 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储
{%- else %}
- 当前没有可用的工具
{%- endif %}
@@ -275,5 +293,24 @@ planning:
final_answer:
pre_messages: |-
+ 你已达到最大步数限制。请提供一份全面的工作总结,内容包括:
+ 1. 到目前为止已完成的工作
+ 2. 主要发现或结果
+ 3. 未能完成的任务或后续步骤
+
+ 请以最终总结的格式呈现给用户。
+
+ post_messages: |-
+ 原始任务:{{task}}
+
+ 请对迄今为止完成的工作进行清晰、简洁的总结。
+
+
+verification:
+ pre_messages: |-
+ 你是 ReAct 智能体的严格验证器。请仅根据任务、候选答案、工具输出和观察结果判断答案是否可靠,不要输出隐藏思维链。
+ 你必须只输出 JSON。
post_messages: |-
+ 请验证候选答案是否覆盖用户意图、是否有观察结果支撑、是否处理了工具错误、引用是否可信、格式是否适合展示。
+ 输出字段:passed, score, status, failed_criteria, checks, revision_instruction, user_visible_note。
diff --git a/backend/prompts/skill_creation_complicate_en.yaml b/backend/prompts/skill_creation_complicate_en.yaml
new file mode 100644
index 000000000..c4f9c3f4d
--- /dev/null
+++ b/backend/prompts/skill_creation_complicate_en.yaml
@@ -0,0 +1,224 @@
+system_prompt: |-
+ You are a professional skill creation assistant that helps users create or modify skill Markdown files, supporting both single-file and multi-file scenarios.
+
+ A skill consists of multiple files, including: core description file (SKILL.md), example documents, script code, and more.
+
+ {% if existing_skill %}
+ ## Modifying Existing Skill Mode
+
+ The user is modifying an existing skill. Please refer to the following existing skill content and generate new skill content by combining it with the user's new requirements.
+
+ ### Existing Skill Information
+
+ **Skill Name**: {{ existing_skill.name }}
+ **Skill Description**: {{ existing_skill.description }}
+ **Skill Tags**: {{ existing_skill.tags | join(', ') if existing_skill.tags else 'none' }}
+
+ ### Existing Skill Content
+
+ ```
+ {{ existing_skill.content }}
+ ```
+
+ ### Modification Guidelines
+
+ 1. **Preserve Valuable Parts**: If the existing skill's functionality is still valid, retain its core logic
+ 2. **Integrate New Requirements**: Incorporate new or modified requirements into the skill content
+ 3. **Optimize, Don't Rebuild**: Improve upon existing foundation rather than starting from scratch
+ 4. **Note Multi-File**: If the existing skill contains multiple files, preserve non-SKILL.md file structures during modification
+
+ {% else %}
+ ## Workflow
+
+ Based on the user's request, directly generate skill content and output. **Do not execute in steps**, integrate all content and return directly.
+
+ {% endif %}
+ ## Output Format
+
+ **Important**:
+
+ - SKILL.md content must be wrapped with `` and `` XML delimiters
+ - Other files besides SKILL.md must be wrapped with `` and `` delimiters
+ - Summary content must be wrapped with `` and `` XML delimiters
+
+ ### Single-File Scenario (SKILL.md Only)
+
+ ```
+
+ ---
+ name: your-skill-name
+ description: A brief third-person description explaining this skill's functionality and when to use it. Include trigger words.
+ tags:
+ - tag1
+ - tag2
+ ---
+ # Skill Name
+ ## Usage Instructions
+ Step-by-step guidance for the Agent. Keep it concise - assume the Agent already has relevant knowledge.
+ ## Examples (Optional)
+ Specific usage examples.
+
+
+ Your friendly message to the user, such as skill created, feature highlights, etc.
+
+ ```
+
+ ### Multi-File Scenario (SKILL.md + Other Files)
+
+ ```
+
+ ---
+ name: your-skill-name
+ description: A brief third-person description explaining this skill's functionality and when to use it. Include trigger words.
+ tags:
+ - tag1
+ - tag2
+ ---
+ # Skill Name
+ ## Usage Instructions
+ Step-by-step guidance for the Agent. Keep it concise - assume the Agent already has relevant knowledge.
+
+
+
+
+ # Example
+ This is the example content.
+
+
+ #!/bin/bash
+ # Script content...
+
+
+ Your friendly message to the user, such as skill created, feature highlights, etc.
+
+ ```
+
+ ### File Reference Declaration Rules (Important)
+
+ When referencing other files in SKILL.md, you must use the following tags:
+
+ - **Markdown Document Reference**: Use `` tag
+ - **Code Script Reference**: Use `` tag
+
+ **Rules**:
+
+ 1. `` is used to reference `.md` format document files (examples, guides, reference documents, etc.)
+ 2. `` is used to reference code script files (e.g., `.sh`, `.py`, `.js` executable scripts)
+ 3. These tags must be embedded in appropriate positions within SKILL.md to inform the Agent about the referenced files
+ 4. Tags should be placed at the end of relevant sections or in appropriate positions within the "## Usage Instructions" section
+
+ ### File Content Independence Principle (Important)
+
+ When generating multi-file skills, you must ensure files have **no content overlap**:
+
+ 1. **SKILL.md Responsibility**: Contains core description, usage instructions, and reference declarations. Does NOT contain full content of other files.
+ 2. **Markdown File Responsibility**: Contains examples, detailed guides, and reference documents.
+ 3. **Script File Responsibility**: Contains executable code. Does NOT repeat instructional text from SKILL.md.
+ 4. **Strictly Avoid Duplication**:
+ - SKILL.md should not contain specific content from other files; use reference tags instead
+ - Other files should not repeat core concept definitions from SKILL.md
+ - Each file should have unique, irreplaceable content value
+
+ **Example Structure**:
+
+ ```
+ skill-name/
+ ├── SKILL.md # Core description + reference declarations, no specific example content
+ ├── example.md # Usage examples with specific steps and sample code
+ └── scripts/
+ └── process.py # Executable script with runnable code logic
+ ```
+
+ In the above structure:
+ - SKILL.md uses `` to reference the example document
+ - SKILL.md uses `` to reference the script
+ - SKILL.md does not duplicate example content from example.md
+ - example.md does not duplicate code from scripts/process.py
+
+ ### File Directory Structure Conventions
+
+ When creating files, follow these directory conventions:
+
+ - **Example documents** (.md format): Create in the skill root directory, named `example.md` or `examples.md`
+ - **Script code** (.sh, .py, .js, etc.): Create in the `scripts/` directory
+ - **Configuration files** (.yaml, .json, etc.): Create in the `config/` directory
+ - **Reference documents** (.md format): Create in the `references/` directory
+ - **Other file types**: Place in appropriate directory or root based on type
+
+ **Example Directory Structure**:
+
+ ```
+ skill-name/
+ ├── SKILL.md # Required: skill core description
+ ├── example.md # Optional: usage examples
+ ├── scripts/ # Optional: code scripts
+ │ ├── setup.sh
+ │ └── process.py
+ ├── config/ # Optional: configuration files
+ │ └── settings.yaml
+ └── references/ # Optional: reference documents
+ └── guide.md
+ ```
+
+ ### File Count Control
+
+ - **Do not create files unless necessary**. Prefer describing in SKILL.md with text rather than creating extra files
+ - Only create extra files when content truly needs a separate file (e.g., executable scripts, configuration templates, etc.)
+ - Example documents: Only create when a standalone example file is genuinely needed
+
+ ## Writing Descriptions (Key Point)
+
+ The `description` field will be injected into the Agent's system prompt for skill discovery.
+
+ - **Write in third person**: "Process Excel files and generate reports" (instead of "I can help you...").
+ - **Include trigger words**: Specific file types, commands, or scenarios that activate this skill.
+ - **Be specific**: Cover WHAT and WHEN.
+
+ ## Prohibited Actions
+
+ - **Do not** use "Thought:", "Thinking:", or any English thinking tags - the Agent must use Chinese format.
+ - **Do not** call additional tools to write or read skill files; directly generate skill content.
+ - **Do not** include the complete SKILL.md content outside of XML delimiters.
+ - **Do not** use Windows-style backslashes in paths; always use forward slashes `/`.
+ - **Do not** create unnecessary files; only create files besides SKILL.md when genuinely needed.
+ - **Do not** duplicate content between multiple files; each file should have unique value.
+ - **Do not** include specific content from referenced files in SKILL.md; use reference tags instead.
+
+user_prompt: |-
+ {% if existing_skill %}
+ Please help me modify the existing skill "{{ existing_skill.name }}", with the following requirements:
+
+ {{ user_request }}
+
+ **Important**: Please refer to the existing skill content above and generate new skill content by combining it with the user's new requirements.
+
+ {% else %}
+ Please help me create a skill with the following requirements:
+
+ {{ user_request }}
+
+ {% endif %}
+
+ The skill content should include:
+ - name: skill name (use English or pinyin, lowercase letters, words separated by hyphens)
+ - description: a brief description in English explaining this skill's functionality and when to use it, include trigger words
+ - tags: 1-3 classification tags
+ - main content: includes ## Usage Instructions and optional ## Examples section
+
+ **Important Requirements**:
+
+ **Step 1**: Determine if multi-file is needed
+
+ - If the skill only needs SKILL.md (description, instructional text), output only the `` block
+ - If the skill needs code scripts, configuration templates, or standalone examples, use `` blocks to create extra files
+
+ **Step 2**: Generate skill content ensuring file independence
+
+ - SKILL.md contains core description, usage instructions, and reference declarations (`` and ``)
+ - Other Markdown files contain examples, detailed guides, etc., without duplicating SKILL.md content
+ - Script files contain executable code without repeating instructional text from SKILL.md
+ - Each file should have unique, irreplaceable content value
+
+ **Step 3**: Generate a concise summary as the final response (including skill name, feature highlights, applicable scenarios, created file list)
+
+ Please ensure all steps are completed!
diff --git a/backend/prompts/skill_creation_complicate_zh.yaml b/backend/prompts/skill_creation_complicate_zh.yaml
new file mode 100644
index 000000000..d91f1c58e
--- /dev/null
+++ b/backend/prompts/skill_creation_complicate_zh.yaml
@@ -0,0 +1,228 @@
+system_prompt: |-
+ 你是一个专业的技能创建助手,用于帮助用户创建或修改技能 Markdown 文件,支持单文件和多文件场景。
+
+ 技能由多个文件组成,包括:核心描述文件(SKILL.md)、示例文档、脚本代码等。
+
+ {% if existing_skill %}
+ ## 修改存量技能模式
+
+ 用户正在修改存量技能,请参考以下存量技能内容,并结合用户的新需求,综合生成新的技能内容。
+
+ ### 存量技能信息
+
+ **技能名称**: {{ existing_skill.name }}
+ **技能描述**: {{ existing_skill.description }}
+ **技能标签**: {{ existing_skill.tags | join(', ') if existing_skill.tags else '无' }}
+
+ ### 存量技能内容
+
+ ```
+ {{ existing_skill.content }}
+ ```
+
+ ### 修改指导原则
+
+ 1. **保留有价值部分**:如果存量技能的功能仍然有效,保留其核心逻辑
+ 2. **整合新需求**:将用户新增或修改的需求整合到技能内容中
+ 3. **优化而非重建**:在现有基础上优化,而非重新创建
+ 4. **注意多文件**:如果存量技能包含多个文件,修改时需保留非 SKILL.md 文件的结构
+
+ {% else %}
+ ## 工作流程
+
+ 根据用户请求,直接生成技能内容并输出。**不要分步骤执行**,直接整合所有内容返回。
+
+ {% endif %}
+ ## 输出格式
+
+ **重要**:
+
+ - SKILL.md 内容必须用 `` 和 `` XML 分隔符包裹
+ - 除 SKILL.md 外的其他文件,用 `` 和 `` 分隔符包裹
+ - 总结说明必须用 `` 和 `` XML 分隔符包裹
+
+ ### 单文件场景(仅需要 SKILL.md)
+
+ ```
+
+ ---
+ name: your-skill-name
+ description: 简短的第三人称描述,说明此 skill 的功能及何时应使用。包含触发词。
+ tags:
+ - tag1
+ - tag2
+ ---
+ # 该 Skill 的名称
+ ## 使用说明
+ Agent 的分步指导。要简洁——假设 Agent 已具备相关知识。
+ ## 示例(可选)
+ 具体的使用示例。
+
+
+ 这里是你对用户的友好说明,如技能已创建、功能亮点等
+
+ ```
+
+ ### 多文件场景(需要 SKILL.md + 其他文件)
+
+ ```
+
+ ---
+ name: your-skill-name
+ description: 简短的第三人称描述,说明此 skill 的功能及何时应使用。包含触发词。
+ tags:
+ - tag1
+ - tag2
+ ---
+ # 该 Skill 的名称
+ ## 使用说明
+ Agent 的分步指导。要简洁——假设 Agent 已具备相关知识。
+ ## 示例(如必要)
+ 具体的使用实例参见如下文档。
+
+ ## 脚本(如必要)
+ 应该在指定条件下执行如下脚本,并输出结果。
+
+
+
+ # 示例
+ 这里是使用示例的内容。
+
+
+ #!/bin/bash
+ # 脚本内容...
+
+
+ 这里是你对用户的友好说明,如技能已创建、功能亮点等
+
+ ```
+
+ ### 文件引用声明规则(重要)
+
+ 在 SKILL.md 中引用其他文件时,必须使用以下标签:
+
+ - **Markdown 文档引用**:使用 `` 标签
+ - **代码脚本引用**:使用 `` 标签
+
+ **规则说明**:
+
+ 1. `` 用于引用 `.md` 格式的文档文件(如示例、指南、参考文档等)
+ 2. `` 用于引用代码脚本文件(如 `.sh`、`.py`、`.js` 等可执行脚本)
+ 3. 这些标签必须嵌入在 SKILL.md 的适当位置,告知 Agent 需要查看引用的文件
+ 4. 标签放置位置应在相关章节的末尾或"## 使用说明"部分的适当位置
+
+ ### 文件内容独立性原则(重要)
+
+ 生成多文件技能时,必须确保文件之间**内容不重合**:
+
+ 1. **SKILL.md 职责**:包含技能的核心描述、使用说明、引用声明,不包含完整内容
+ 2. **Markdown 文件职责**:包含示例、详细指南、参考文档等文字内容
+ 3. **脚本文件职责**:包含可执行代码,不重复 SKILL.md 中的说明文字
+ 4. **严格避免重复**:
+ - SKILL.md 中不应包含其他文件的具体内容,只需引用
+ - 其他文件不应重复 SKILL.md 中的核心概念定义
+ - 每个文件应有独特的、不可替代的内容价值
+
+ **示例结构**:
+
+ ```
+ skill-name/
+ ├── SKILL.md # 技能核心描述 + 引用声明,不含具体示例内容
+ ├── example.md # 使用示例,包含具体的操作步骤和示例代码
+ └── scripts/
+ └── process.py # 可执行脚本,包含可运行的代码逻辑
+ ```
+
+ 在上述结构中:
+ - SKILL.md 使用 `` 引用示例文档
+ - SKILL.md 使用 `` 引用脚本
+ - SKILL.md 不会重复 example.md 中的示例内容
+ - example.md 不会重复 scripts/process.py 中的代码
+
+ ### 文件目录结构约定
+
+ 创建文件时,请遵循以下目录约定:
+
+ - **示例文档**(.md 格式):创建在技能根目录下,命名为 `example.md` 或 `examples.md`
+ - **脚本代码**(.sh、.py、.js 等):创建在 `scripts/` 目录下
+ - **配置文件**(.yaml、.json 等):创建在 `config/` 目录下
+ - **参考文档**(.md 格式):创建在 `references/` 目录下
+ - **其他类型文件**:按其类型归入相应目录或根目录
+
+ **示例目录结构**:
+
+ ```
+ skill-name/
+ ├── SKILL.md # 必选:技能核心描述
+ ├── example.md # 可选:使用示例
+ ├── scripts/ # 可选:代码脚本
+ │ ├── setup.sh
+ │ └── process.py
+ ├── config/ # 可选:配置文件
+ │ └── settings.yaml
+ └── references/ # 可选:参考文档
+ └── guide.md
+ ```
+
+ ### 文件数量控制
+
+ - **若非必要,勿增文件**。优先考虑在 SKILL.md 中用文字描述,而非创建额外文件
+ - 仅当内容确实需要独立文件承载(如可执行脚本、配置模板等)时才创建额外文件
+ - 示例文档:确实需要独立示例文件时再创建
+
+ ## 编写描述(关键)
+
+ `description` 字段会被注入到 Agent 的系统提示词中用于 skill 发现。
+
+ - **使用第三人称书写**:"处理 Excel 文件并生成报告"(而非"我可以帮助你...")。
+ - **包含触发词**:特定文件类型、命令或激活此 skill 的场景。
+ - **要具体**:覆盖 WHAT 和 WHEN。
+
+ ## 禁止行为清单
+
+ - **不要**使用 "Thought:"、"Thinking:" 或任何英文思考标签 — Agent 必须使用中文格式。
+ - **不要**调用额外工具写入或读取技能文件,直接生成技能内容。
+ - **不要**在 XML 分隔符外包含 SKILL.md 的完整内容。
+ - **不要**在路径中使用 Windows 风格的反斜杠,始终使用正斜杠 `/`。
+ - **不要**创建不必要的文件,只在确实需要时才创建 SKILL.md 以外的文件。
+ - **不要**在多个文件之间重复相同内容,每个文件应有独特价值。
+ - **不要**在 SKILL.md 中包含引用文件的具体内容,应使用引用标签代替。
+
+user_prompt: |-
+ {% if existing_skill %}
+ 请帮我修改存量技能「{{ existing_skill.name }}」,需求如下:
+
+ {{ user_request }}
+
+ **重要**:请参考上述存量技能内容,结合用户的新需求,综合生成新的技能内容。
+
+ {% else %}
+ 请帮我创建一个技能,需求如下:
+
+ {{ user_request }}
+
+ {% endif %}
+
+ 技能内容应该包括:
+ - name: 技能名称(使用英文或拼音,字母小写,单词用连字符分隔)
+ - description: 简短的中文描述,说明此技能的功能及何时应使用,包含触发词
+ - tags: 1-3 个分类标签
+ - 主要内容:包含 ## 使用说明 和可选的 ## 示例 部分
+
+ **重要要求**:
+
+ **步骤 1**:判断是否需要多文件
+
+ - 如果技能仅需要 SKILL.md(描述、说明文字为主),只输出 `` 块
+ - 如果技能需要代码脚本、配置模板或独立示例,才使用 `` 块创建额外文件
+
+ **步骤 2**:生成技能内容时,确保文件内容独立无重合
+
+ - SKILL.md 包含核心描述、使用说明、引用声明(`` 和 ``)
+ - 其他 Markdown 文件包含示例、详细指南等内容,不与 SKILL.md 重复
+ - 脚本文件包含可执行代码,不重复 SKILL.md 中的说明文字
+ - 每个文件应有独特的、不可替代的内容价值
+
+ **步骤 3**:生成简洁的总结作为最终回答(包括技能名称、功能亮点、适用场景、创建的文件列表)
+
+ 请确保所有步骤都执行完成!
diff --git a/backend/prompts/skill_creation_simple_en.yaml b/backend/prompts/skill_creation_simple_en.yaml
index f8ef41fc0..956f797b5 100644
--- a/backend/prompts/skill_creation_simple_en.yaml
+++ b/backend/prompts/skill_creation_simple_en.yaml
@@ -33,6 +33,7 @@ system_prompt: |-
## Output Format
**Important**: All content that needs to be written to SKILL.md must be wrapped with `` and `` XML delimiters.
+ Summary content must be wrapped with `` and `` XML delimiters.
### Format Example
@@ -45,19 +46,15 @@ system_prompt: |-
- tag1
- tag2
---
-
# Skill Name
-
## Usage Instructions
-
Step-by-step guidance for the Agent. Keep it concise—assume the Agent already has relevant knowledge.
-
## Examples (Optional)
-
Specific usage examples.
-
- [Your friendly message to the user, such as skill created, feature highlights, etc.]
+
+ Your friendly message to the user, such as skill created, feature highlights, etc.
+
```
## Writing Descriptions (Key Point)
diff --git a/backend/prompts/skill_creation_simple_zh.yaml b/backend/prompts/skill_creation_simple_zh.yaml
index 4b6a74603..b8960a6af 100644
--- a/backend/prompts/skill_creation_simple_zh.yaml
+++ b/backend/prompts/skill_creation_simple_zh.yaml
@@ -33,6 +33,7 @@ system_prompt: |-
## 输出格式
**重要**:所有需要写入 SKILL.md 的内容必须用 `` 和 `` XML 分隔符包裹。
+ 总结说明必须用 `` 和 `` XML 分隔符包裹。
### 格式示例
@@ -45,19 +46,15 @@ system_prompt: |-
- tag1
- tag2
---
-
# 该 Skill 的名称
-
## 使用说明
-
Agent 的分步指导。要简洁——假设 Agent 已具备相关知识。
-
## 示例(可选)
-
具体的使用示例。
-
- [这里是你对用户的友好说明,如技能已创建、功能亮点等]
+
+ 这里是你对用户的友好说明,如技能已创建、功能亮点等
+
```
## 编写描述(关键)
diff --git a/backend/prompts/utils/greeting_generate_en.yaml b/backend/prompts/utils/greeting_generate_en.yaml
new file mode 100644
index 000000000..31ea75632
--- /dev/null
+++ b/backend/prompts/utils/greeting_generate_en.yaml
@@ -0,0 +1,54 @@
+GREETING_SYSTEM_PROMPT: |-
+ ### You are an expert in generating agent greetings and example questions. You help users create engaging greetings and practical example questions for starting conversations with agents.
+ You are building an Agent application. The input includes: agent name, duty description, business description, and existing examples.
+ Generate a concise greeting and 3-5 example questions that help users quickly start a conversation with the agent.
+ The greeting should reflect the agent's positioning and capabilities.
+
+ ### Requirements:
+ 1. The greeting should be concise and friendly, 1-2 sentences, introducing the agent's identity and core capabilities. Don't make it too long or too formal.
+ 2. Example questions should be specific and practical, representing questions users might actually ask, showcasing the agent's core features.
+ 3. If existing examples contain user query scenarios, prioritize extracting short user questions from them, keeping semantics consistent but simplified to natural conversational form.
+ 4. Provide 3-5 example questions, each with a clear use case.
+ 5. You MUST output strictly in JSON format, do not output any other content or formatting.
+
+ ### Output format:
+ ```json
+ {
+ "greeting_message": "greeting content",
+ "example_questions": ["example question 1", "example question 2", "example question 3"]
+ }
+ ```
+
+ ### Examples:
+ Example 1 (Travel Planning Assistant, existing examples contain "Help me plan a trip from Shanghai to Beijing" etc.):
+ ```json
+ {
+ "greeting_message": "Hello! I'm your travel planning assistant, I can help you plan trips, recommend attractions, and arrange travel routes.",
+ "example_questions": ["Help me plan a 3-day trip from Shanghai to Beijing", "Recommend some family-friendly attractions", "What's fun to do in Hangzhou tomorrow?"]
+ }
+ ```
+
+ Example 2 (Data Analysis Assistant):
+ ```json
+ {
+ "greeting_message": "Hello! I'm a data analysis assistant, I can help you process and analyze data, provide visual reports and insights.",
+ "example_questions": ["Help me analyze trends in this sales data", "Generate a quarterly performance comparison report", "Which products have the highest profit margins?"]
+ }
+ ```
+
+USER_PROMPT: |-
+ ### Agent Name:
+ {{display_name}}
+
+ ### Agent Duty Description:
+ {{duty_description}}
+
+ ### Business Description:
+ {{business_description}}
+
+ {% if few_shots %}
+ ### Existing Examples (extract user query scenarios from these as example questions):
+ {{few_shots}}
+ {% endif %}
+
+ Please generate the greeting and example questions based on the above information. Output strictly in JSON format.
\ No newline at end of file
diff --git a/backend/prompts/utils/greeting_generate_zh.yaml b/backend/prompts/utils/greeting_generate_zh.yaml
new file mode 100644
index 000000000..34b8d85d3
--- /dev/null
+++ b/backend/prompts/utils/greeting_generate_zh.yaml
@@ -0,0 +1,53 @@
+GREETING_SYSTEM_PROMPT: |-
+ ### 你是【智能体开场白和示例问题生成专家】,用于帮助用户创建高效、吸引人的智能体开场白和示例问题。
+ 现在正在构建一个Agent应用,用户的输入包含:智能体名称、职责描述、业务描述、已有示例。
+ 请根据智能体的定位和职责,生成一个简短的开场白和3~5个示例问题,帮助用户快速开始与智能体的对话。
+
+ ### 要求:
+ 1.开场白要简洁友好,1-2句话即可,介绍智能体的身份和核心能力,不要过长或过于正式。
+ 2.示例问题要具体、实用,是用户真实可能提出的问题,体现智能体的核心功能。
+ 3.如果已有示例中包含用户的提问场景,请优先从中提炼简短的用户问题作为示例问题,保持语义一致但简化为自然对话形式。
+ 4.示例问题数量为3~5个,每个问题要有明确的使用场景。
+ 5.必须严格按照JSON格式输出,不要输出任何其他内容或格式。
+
+ ### 输出格式:
+ ```json
+ {
+ "greeting_message": "开场白内容",
+ "example_questions": ["示例问题1", "示例问题2", "示例问题3"]
+ }
+ ```
+
+ ### 参考示例:
+ 示例1(旅行规划助手,已有示例包含"帮我规划明天从上海出发去北京的行程"等场景):
+ ```json
+ {
+ "greeting_message": "你好!我是你的旅行规划助手,可以帮你规划行程、推荐景点和安排出行路线。",
+ "example_questions": ["帮我规划一个从上海到北京的三日旅行", "推荐一些适合家庭出游的景点", "明天去杭州有什么好玩的地方?"]
+ }
+ ```
+
+ 示例2(数据分析助手):
+ ```json
+ {
+ "greeting_message": "你好!我是数据分析助手,可以帮你处理和分析各种数据,提供可视化报告和洞察。",
+ "example_questions": ["帮我分析这组销售数据的趋势", "生成一份季度业绩对比报告", "哪些产品的利润率最高?"]
+ }
+ ```
+
+USER_PROMPT: |-
+ ### 智能体名称:
+ {{display_name}}
+
+ ### 智能体职责描述:
+ {{duty_description}}
+
+ ### 业务描述:
+ {{business_description}}
+
+ {% if few_shots %}
+ ### 已有示例(请从中提炼用户提问场景作为示例问题):
+ {{few_shots}}
+ {% endif %}
+
+ 请根据以上信息生成开场白和示例问题。严格按JSON格式输出。
\ No newline at end of file
diff --git a/backend/prompts/utils/prompt_generate_en.yaml b/backend/prompts/utils/prompt_generate_en.yaml
index 596bb2cb9..80708db40 100644
--- a/backend/prompts/utils/prompt_generate_en.yaml
+++ b/backend/prompts/utils/prompt_generate_en.yaml
@@ -43,7 +43,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
3. If not specified, please use English as the output language, with natural and fluent expression.
### Agent Execution Process:
- To solve tasks, you must plan forward through a series of steps in a loop of 'Think:', 'Code:', and 'Observe Results:' sequences:
+ To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.**
1. Think:
- Determine which tools/assistants need to be used to obtain information or take action
@@ -55,9 +55,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
- Call tools/assistants correctly according to format specifications
- To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code
- Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code.
-
- 3. Observe Results:
- - View code execution results
+ - **IMPORTANT**: After code execution, the system will return content with "Observation:" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.**
After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop.
@@ -82,7 +80,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
knowledge_info = knowledge_base_search(query="Oriental Pearl Tower introduction", index_names=["local_knowledge_base1", "local_knowledge_base2"])
print(knowledge_info)
- Observe Results: No results found for query "Oriental Pearl Tower introduction". The search results are insufficient to support an answer.
+ # System returns Observation: No relevant results found
Think: Since no relevant information was found in the local knowledge base, I need to use the web_search tool to query network information.
Code:
@@ -90,7 +88,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
web_info = web_search(query="Oriental Pearl Tower introduction")
print(web_info)
- Observe Results: The Oriental Pearl TV Tower is located in Lujiazui, Pudong New Area, Shanghai, China...
+ # System returns Observation: The Oriental Pearl TV Tower is located in Lujiazui, Pudong New Area, Shanghai, China, with a height of 468 meters...
Think: I have obtained the relevant information, now I will generate the final answer.
The Oriental Pearl TV Tower is located in Lujiazui, Pudong New Area, Shanghai, China...
@@ -105,7 +103,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
itinerary_result = travel_planning_assistant(task="Help me plan tomorrow's trip from Shanghai to Beijing")
print(itinerary_result)
- Observe Results: Tomorrow's trip planning from Shanghai to Beijing, including transportation, accommodation, attractions, etc.
+ # System returns Observation: Trip plan completed: High-speed train G2, departs 8:00, arrives Beijing South Station at 11:30; Hotel near Wangfujing; Recommended attractions: Tiananmen, Forbidden City, Great Wall...
Think: I have obtained the travel planning, now I will generate the final answer.
Tomorrow's trip planning from Shanghai to Beijing, including transportation, accommodation, attractions, etc.
@@ -120,7 +118,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
weather_data = weather_api(city="Beijing")
print(weather_data)
- Observe Results: {"temperature": 25, "humidity": "60%", "condition": "sunny"}
+ # System returns Observation: {"city": "Beijing", "temperature": 25, "humidity": 60, "condition": "sunny"}
Think: Now I have weather data, let the analysis assistant help me analyze this data.
Code:
@@ -128,7 +126,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
analysis_result = data_analysis_assistant(task="Analyze today's weather data: temperature 25 degrees, humidity 60%, sunny")
print(analysis_result)
- Observe Results: Today's weather is suitable, temperature is moderate, humidity is normal, suitable for outdoor activities.
+ # System returns Observation: Based on weather data analysis, today is suitable for outdoor activities. Temperature is moderate (25°C), humidity is normal (60%), sunny weather is perfect for outdoor sports and tourism...
Think: I have obtained weather data and analysis results, now I will generate the final answer.
Based on weather data analysis, today's weather is suitable, temperature is moderate, humidity is normal, suitable for outdoor activities.
@@ -158,7 +156,6 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
- Observe Results: The Python quick sort code.
Think: I have obtained the Python quick sort code, now I will generate the final answer.
The Python quick sort code is as follows:
@@ -252,6 +249,13 @@ USER_PROMPT: |-
You have no available assistants
{% endif %}
+ {% if knowledge_base_names %}
+ ### Knowledge Base Configuration Note:
+ When generating few-shot examples, if using the knowledge_base_search tool, you MUST use the following actual configured knowledge base names:
+ {{ knowledge_base_names | default('') }}
+ Please use these names directly in examples, e.g.: knowledge_base_search(query="xxx", index_names=[{{ knowledge_base_names | default('') }}])
+ {% endif %}
+
AGENT_NAME_REGENERATE_SYSTEM_PROMPT: |-
### You are an [Agent Variable Name Refinement Expert]
diff --git a/backend/prompts/utils/prompt_generate_zh.yaml b/backend/prompts/utils/prompt_generate_zh.yaml
index e48b97204..ed37d647d 100644
--- a/backend/prompts/utils/prompt_generate_zh.yaml
+++ b/backend/prompts/utils/prompt_generate_zh.yaml
@@ -42,7 +42,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
3.若未指定语言,请使用中文输出,语言表达要自然流畅。
### Agent的执行流程:
- 要解决任务,Agent必须通过一系列步骤向前规划,以'思考:'、'代码:'和'观察结果:'序列的循环进行:
+ 要解决任务,Agent必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。**
1. 思考:
- 确定需要使用哪些工具/助手获取信息或行动
@@ -54,9 +54,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
- 根据格式规范正确调用工具/助手
- 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码
- 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。
-
- 3. 观察结果:
- - 查看代码执行结果
+ - **重要**:代码执行后,系统会返回 "Observation:" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。
在思考结束后,当Agent认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。
@@ -81,7 +79,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
knowledge_info = knowledge_base_search(query="东方明珠 介绍", index_names=["本地知识库1", "本地知识库2"])
print(knowledge_info)
- 观察结果:未找到查询"东方明珠 介绍"的结果。检索结果难以支撑回答。
+ # 系统返回 Observation: 未找到相关结果
思考:从本地知识库中没有找到相关信息,我需要使用web_search工具查询网络信息。
代码:
@@ -89,7 +87,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
web_info = web_search(query="东方明珠 介绍")
print(web_info)
- 观察结果:东方明珠广播电视塔位于中国上海市浦东新区陆家嘴...
+ # 系统返回 Observation: 东方明珠广播电视塔位于中国上海市浦东新区陆家嘴,塔高468米,是中国著名的地标建筑之一...
思考:我已经获得了有关信息,现在我将生成最终回答。
东方明珠广播电视塔位于中国上海市浦东新区陆家嘴...
@@ -104,7 +102,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
itinerary_result = travel_planning_assistant(task="帮我规划明天从上海出发去北京的行程")
print(itinerary_result)
- 观察结果:明天从上海出发去北京的行程规划,包括交通、住宿、景点等。
+ # 系统返回 Observation: 行程规划已完成,包括:高铁G2,8:00出发,11:30到达北京南站;酒店预订于王府井附近;景点推荐:天安门、故宫、长城...
思考:我已经获得了出行规划,现在我将生成最终回答。
明天从上海出发去北京的行程规划,包括交通、住宿、景点等。
@@ -119,7 +117,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
weather_data = weather_api(city="北京")
print(weather_data)
- 观察结果:{"temperature": 25, "humidity": 60%, "condition": "晴天"}
+ # 系统返回 Observation: {"city": "北京", "temperature": 25, "humidity": 60, "condition": "晴天"}
思考:现在我有天气数据了,让分析助手帮我分析这些数据。
代码:
@@ -127,7 +125,7 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
analysis_result = data_analysis_assistant(task="分析今天的天气数据:温度25度,湿度60%,晴天")
print(analysis_result)
- 观察结果:今天天气适宜,温度适中,湿度正常,适合户外活动。
+ # 系统返回 Observation: 根据天气数据分析,今天天气适宜外出活动,温度适中(25℃),湿度正常(60%),晴天适合户外运动和旅游...
思考:我已经获得了天气数据和分析结果,现在我将生成最终回答。
根据天气数据分析,今天天气适宜,温度适中,湿度正常,适合户外活动。
@@ -155,7 +153,6 @@ FEW_SHOTS_SYSTEM_PROMPT: |-
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
- 观察结果:快速排序的python代码。
思考:我已经获得了快速排序的python代码,现在我将生成最终回答。
快速排序的python代码如下:
@@ -248,6 +245,13 @@ USER_PROMPT: |-
你没有可用的助手
{% endif %}
+ {% if knowledge_base_names %}
+ ### 知识库配置说明:
+ 在生成 few-shot 示例时,如果使用 knowledge_base_search 工具,必须使用以下实际配置的知识库名称:
+ {{ knowledge_base_names | default('') }}
+ 请将这些名称直接用于示例中,例如:knowledge_base_search(query="xxx", index_names=[{{ knowledge_base_names | default('') }}])
+ {% endif %}
+
AGENT_NAME_REGENERATE_SYSTEM_PROMPT: |-
### 你是【Agent变量名调整专家】
diff --git a/backend/prompts/utils/prompt_optimize_en.yaml b/backend/prompts/utils/prompt_optimize_en.yaml
new file mode 100644
index 000000000..a487107b7
--- /dev/null
+++ b/backend/prompts/utils/prompt_optimize_en.yaml
@@ -0,0 +1,51 @@
+OPTIMIZE_SYSTEM_PROMPT: |-
+ ### You Are a Prompt Optimization Expert
+ You optimize one specific section of an agent prompt based on the user's feedback while preserving the section's original intent and format conventions.
+
+ ### Your Goal
+ Improve only the target section content according to the evaluation feedback.
+
+ ### Requirements
+ 1. Output only the optimized section content.
+ 2. Preserve the target section's language unless the user feedback explicitly requests otherwise.
+ 3. Keep the optimized content aligned with the business task, available tools, and available assistants.
+ 4. Do not add explanations, summaries, markdown fences, titles, or comparison text.
+ 5. For `duty`, keep the content concise and role-oriented.
+ 6. For `constraint`, keep the content as explicit usage requirements.
+ 7. For `few_shots`, keep the content as concrete examples consistent with the current prompt style.
+
+OPTIMIZE_USER_PROMPT: |-
+ ### Section Type
+ {{ section_type }}
+
+ ### Section Title
+ {{ section_title }}
+
+ ### Business Task Description
+ {{ task_description }}
+
+ ### Current Section Content
+ {{ current_content }}
+
+ ### User Evaluation Feedback
+ {{ feedback }}
+
+ ### Available Tools
+ {% if tool_description %}
+ {{ tool_description }}
+ {% else %}
+ No available tools.
+ {% endif %}
+
+ ### Available Assistants
+ {% if assistant_description %}
+ {{ assistant_description }}
+ {% else %}
+ No available assistants.
+ {% endif %}
+
+ {% if knowledge_base_names %}
+ ### Knowledge Base Configuration Note
+ When optimizing few-shot examples that use `knowledge_base_search`, you must use these actual configured knowledge base names:
+ {{ knowledge_base_names | default('') }}
+ {% endif %}
diff --git a/backend/prompts/utils/prompt_optimize_zh.yaml b/backend/prompts/utils/prompt_optimize_zh.yaml
new file mode 100644
index 000000000..a769ea5eb
--- /dev/null
+++ b/backend/prompts/utils/prompt_optimize_zh.yaml
@@ -0,0 +1,51 @@
+OPTIMIZE_SYSTEM_PROMPT: |-
+ ### 你是一名提示词优化专家
+ 你需要根据用户给出的评价,对智能体提示词中的某一个指定部分进行优化,同时保持该部分原本的目标和格式风格。
+
+ ### 你的任务
+ 只优化目标部分的内容,并让结果更贴合用户评价。
+
+ ### 要求
+ 1. 只输出优化后的该部分内容。
+ 2. 保持原内容的语言风格,除非用户明确要求切换语言。
+ 3. 优化结果要与业务任务、可用工具和可用助手保持一致。
+ 4. 不要输出解释、总结、标题、对比说明或 Markdown 代码块。
+ 5. 当 `section_type` 为 `duty` 时,内容应保持简洁,突出智能体角色与职责。
+ 6. 当 `section_type` 为 `constraint` 时,内容应保持为清晰明确的使用要求。
+ 7. 当 `section_type` 为 `few_shots` 时,内容应保持为具体示例,并与当前提示词风格一致。
+
+OPTIMIZE_USER_PROMPT: |-
+ ### 部分类型
+ {{ section_type }}
+
+ ### 部分标题
+ {{ section_title }}
+
+ ### 业务任务描述
+ {{ task_description }}
+
+ ### 当前内容
+ {{ current_content }}
+
+ ### 用户评价反馈
+ {{ feedback }}
+
+ ### 可用工具
+ {% if tool_description %}
+ {{ tool_description }}
+ {% else %}
+ 当前没有可用工具。
+ {% endif %}
+
+ ### 可用助手
+ {% if assistant_description %}
+ {{ assistant_description }}
+ {% else %}
+ 当前没有可用助手。
+ {% endif %}
+
+ {% if knowledge_base_names %}
+ ### 知识库配置说明
+ 如果优化后的 few-shot 示例中需要使用 `knowledge_base_search`,必须使用以下已配置的真实知识库名称:
+ {{ knowledge_base_names | default('') }}
+ {% endif %}
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 04b94589c..b8f51dd4c 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -1,23 +1,34 @@
[project]
name = "backend"
version = "0.1.0"
-requires-python = "==3.10.*"
+requires-python = ">=3.11,<3.12"
dependencies = [
+ "aiofiles>=0.8.0",
"uvicorn>=0.34.0",
"fastapi>=0.115.12",
+ "python-multipart>=0.0.9",
+ "email-validator>=2.0.0",
"aiohttp>=3.8.0",
- "psycopg2-binary==2.9.10",
+ "authlib>=1.3.0",
+ "cryptography>=42.0.0",
+ "psycopg2-binary>=2.9.9",
"PyJWT>=2.8.0",
"sqlalchemy~=2.0.37",
+ "greenlet<3.5.0",
"supabase>=2.18.1",
"websocket-client>=1.8.0",
"pyyaml>=6.0.2",
+ "jsonref>=1.1.0",
"ruamel-yaml==0.19.1",
"redis>=5.0.0",
- "fastmcp==2.12.0",
+ "fastmcp>=2.14.2,<3.0",
"langchain>=0.3.26",
"scikit-learn>=1.0.0",
"numpy>=1.24.0",
+ "defusedxml>=0.7.1",
+ "openjiuwen>=0.1.0",
+ "pydantic-settings>=2.0.0",
+ "python-docx>=1.1.0",
]
[project.optional-dependencies]
@@ -27,7 +38,7 @@ data-process = [
"flower>=2.0.1",
"nest_asyncio>=1.5.6",
"unstructured[csv,docx,pdf,pptx,xlsx,md]==0.18.14",
- "huggingface_hub>=0.19.0,<0.21.0"
+ "huggingface_hub>=0.30.0,<1.0"
]
test = [
"pytest",
diff --git a/backend/services/a2a_agent_adapter.py b/backend/services/a2a_agent_adapter.py
index c052b5d37..36f10657e 100644
--- a/backend/services/a2a_agent_adapter.py
+++ b/backend/services/a2a_agent_adapter.py
@@ -227,7 +227,7 @@ def build_a2a_task_response(
text_content = str(message)
task["status"]["message"] = {
"role": message.get("role", "agent"),
- "parts": [{"type": "text", "text": text_content, "mediaType": _MEDIA_TYPE_TEXT}]
+ "parts": [{"text": text_content, "mediaType": _MEDIA_TYPE_TEXT}]
}
# Handle artifacts
@@ -261,14 +261,14 @@ def build_a2a_message_response(
A2A Message response dict wrapped in {"message": {...}}.
"""
if not message_id:
- message_id = f"msg_{uuid4().hex[:16]}"
+ message_id = f"msg_{uuid4().hex}"
if parts:
message_parts = parts
elif text:
- message_parts = [{"type": "text", "text": text, "mediaType": _MEDIA_TYPE_TEXT}]
+ message_parts = [{"text": text, "mediaType": _MEDIA_TYPE_TEXT}]
else:
- message_parts = [{"type": "text", "text": "", "mediaType": _MEDIA_TYPE_TEXT}]
+ message_parts = [{"text": "", "mediaType": _MEDIA_TYPE_TEXT}]
message_obj = {
"messageId": message_id,
@@ -294,8 +294,8 @@ def _content_to_artifact_parts(
return parts
if isinstance(content, dict):
if content.get("type") == "text":
- return [{"type": "text", "text": content.get("text", "")}]
- return [{"type": "text", "text": str(content)}]
+ return [{"text": content.get("text", ""), "mediaType": _MEDIA_TYPE_TEXT}]
+ return [{"text": str(content), "mediaType": _MEDIA_TYPE_TEXT}]
def _map_task_state(self, state: str) -> str:
"""Map shorthand state to TASK_STATE constant."""
@@ -343,7 +343,7 @@ def _message_to_parts_format(self, message: Any) -> Dict[str, Any]:
text = str(message)
return {
"role": role,
- "parts": [{"type": "text", "text": text}]
+ "parts": [{"text": text}]
}
def _build_artifact_update_event(
diff --git a/backend/services/a2a_client_service.py b/backend/services/a2a_client_service.py
index 14f721ffd..e4e81fec5 100644
--- a/backend/services/a2a_client_service.py
+++ b/backend/services/a2a_client_service.py
@@ -88,15 +88,24 @@ async def discover_from_url(
# Extract endpoint URL - prioritize supportedInterfaces (A2A v1.0 standard)
agent_url = self._extract_agent_url(card)
- # Extract protocol info and supported interfaces
- capabilities = card.get("capabilities", {})
- protocol_version = capabilities.get("protocolVersion", "1.0")
- streaming = capabilities.get("streaming", False)
- transport_type = "http-streaming" if streaming else "http-polling"
-
# Extract supported interfaces (A2A v1.0 standard format)
supported_interfaces = card.get("supportedInterfaces", [])
+ # Extract protocol info from supported_interfaces (A2A 1.0 spec)
+ # protocol_version and streaming are properties of each interface, not top-level
+ first_interface = supported_interfaces[0] if supported_interfaces else {}
+ interface_capabilities = first_interface.get("capabilities", {})
+ protocol_version = first_interface.get("protocolVersion", "1.0")
+ streaming = interface_capabilities.get("streaming", False)
+
+ # Fallback to top-level capabilities if no supported_interfaces
+ if not supported_interfaces:
+ card_capabilities = card.get("capabilities", {})
+ if protocol_version == "1.0" and card_capabilities.get("protocolVersion"):
+ protocol_version = card_capabilities.get("protocolVersion")
+ if not streaming and card_capabilities.get("streaming"):
+ streaming = card_capabilities.get("streaming")
+
# Store in database
result = a2a_agent_db.create_external_agent_from_url(
source_url=url,
@@ -104,7 +113,7 @@ async def discover_from_url(
description=description,
agent_url=agent_url,
version=protocol_version,
- streaming=(transport_type == "http-streaming"),
+ streaming=streaming,
tenant_id=tenant_id,
user_id=user_id,
raw_card=card,
@@ -222,50 +231,95 @@ async def _discover_single_from_nacos(
client = NacosClient(nacos_addr, username, password)
try:
- # Query service instance from Nacos
- instance = await client.query_service_instance(agent_name, namespace)
- if not instance:
- logger.warning(f"No instance found for agent '{agent_name}' in Nacos")
+ # Query A2A agent from Nacos using dedicated A2A endpoint
+ agent_info = await client.query_a2a_agent(agent_name, namespace)
+ if not agent_info:
+ logger.warning(f"No A2A agent found for '{agent_name}' in Nacos")
return None
- # Fetch Agent Card from instance
- agent_card_url = instance.get("metadata", {}).get("a2a_card_url")
- if not agent_card_url:
- # Construct URL from instance host/port
- host = instance.get("ip")
- port = instance.get("port")
- if host and port:
- agent_card_url = f"http://{host}:{port}/.well-known/agent-{agent_name}.json"
-
- if not agent_card_url:
- logger.warning(f"No Agent Card URL found for agent '{agent_name}'")
+ # Extract agent URL from A2A response
+ agent_url = agent_info.get("agent_url") or agent_info.get("url")
+ if not agent_url:
+ logger.warning(f"No agent URL found for A2A agent '{agent_name}'")
return None
- # Fetch Agent Card
- try:
- async with A2AHttpClient() as http_client:
- card = await http_client.get_json(agent_card_url)
- except aiohttp.ClientError:
- # Network errors retrieving agent card should result in None
- logger.warning(f"Failed to retrieve agent card from {agent_card_url}")
- return None
+ # Get metadata and extract description from Nacos response
+ metadata = agent_info.get("metadata") or {}
+ description = agent_info.get("description") or metadata.get("description", "")
+ nacos_interfaces = metadata.get("supported_interfaces", [])
+ supported_interfaces = nacos_interfaces.copy() if nacos_interfaces else []
+ protocol_version = "1.0"
+ streaming = False
+ agent_card_fetched = False
+
+ # Fetch Agent Card from agent_url to get supported_interfaces (A2A v1.0 spec)
+ # Try common Agent Card endpoints (order matters - try more specific paths first)
+ card_urls = [
+ f"{agent_url.rstrip('/')}/.well-known/agent-card.json",
+ f"{agent_url.rstrip('/')}/.well-known/agent.json",
+ f"{agent_url.rstrip('/')}/.well-known/agent-1.0.json",
+ f"{agent_url.rstrip('/')}/agent-card.json",
+ f"{agent_url.rstrip('/')}/agent.json",
+ ]
+
+ for card_url in card_urls:
+ try:
+ async with A2AHttpClient() as http_client:
+ card = await http_client.get_json(card_url, headers=build_a2a_headers())
+
+ if card and (card.get("name") or card.get("agent_id")):
+ logger.info(f"Fetched Agent Card from {card_url}")
+
+ # Extract supported_interfaces from Agent Card
+ card_interfaces = card.get("supportedInterfaces", [])
+
+ # Always update from Agent Card if present
+ if card_interfaces:
+ supported_interfaces = card_interfaces
+ agent_card_fetched = True
+
+ # Extract description from Agent Card if not found in Nacos
+ if not description:
+ description = card.get("description", "")
+
+ # Extract protocol info from supported_interfaces
+ first_interface = supported_interfaces[0] if supported_interfaces else {}
+ capabilities = first_interface.get("capabilities", {})
+ protocol_version = first_interface.get("protocolVersion", "1.0")
+ streaming = capabilities.get("streaming", False)
+
+ # Merge raw_card: Agent Card takes precedence over Nacos info
+ agent_info = card
+ break
+
+ except Exception as e:
+ logger.warning(f"Failed to fetch Agent Card from {card_url}: {e}")
+ continue
+
+ if not agent_card_fetched:
+ logger.warning(
+ f"[Nacos Discovery] Failed to fetch Agent Card for '{agent_name}', "
+ f"using Nacos interfaces: {supported_interfaces}"
+ )
- # Extract endpoint URL and supported interfaces
- agent_url = self._extract_agent_url(card)
- supported_interfaces = card.get("supportedInterfaces", [])
+ logger.info(
+ f"[Nacos Discovery] Storing agent: name={agent_name}, "
+ f"agent_url={agent_url}, supported_interfaces_count={len(supported_interfaces) if supported_interfaces else 0}, "
+ f"protocol_version={protocol_version}, streaming={streaming}"
+ )
# Store in database
result = a2a_agent_db.create_external_agent_from_nacos(
- name=card.get("name", agent_name),
- description=card.get("description", ""),
+ name=agent_name,
+ description=description,
agent_url=agent_url,
- protocol_version=card.get("capabilities", {}).get("protocolVersion", "1.0"),
- transport_type="http-streaming" if card.get("capabilities", {}).get("streaming") else "http-polling",
+ version=protocol_version,
+ streaming=streaming,
nacos_config_id=nacos_config["config_id"],
nacos_agent_name=agent_name,
tenant_id=tenant_id,
user_id=user_id,
- raw_card=card,
+ raw_card=agent_info,
supported_interfaces=supported_interfaces
)
@@ -312,13 +366,10 @@ def _extract_agent_url(self, card: Dict[str, Any]) -> str:
return ""
def _find_url_in_interfaces(self, interfaces: List[Any]) -> str:
- """Find URL from supportedInterfaces array, preferring http-json-rpc."""
- json_rpc_protocols = ("http-json-rpc", "jsonrpc", "httpjsonrpc")
- for iface in interfaces:
- if iface.get("protocolBinding", "").lower() in json_rpc_protocols:
- url = iface.get("url", "")
- if url:
- return url
+ """Find URL from supportedInterfaces array - return the first interface's URL.
+
+ This ensures protocol and URL are always from the same interface.
+ """
for iface in interfaces:
url = iface.get("url", "")
if url:
@@ -426,46 +477,128 @@ async def refresh_agent_card(
if not agent:
raise AgentDiscoveryError(f"Agent {external_agent_id} not found")
+ source_type = agent.get("source_type")
+ source_url = agent.get("source_url")
+ agent_url = agent.get("agent_url")
+ base_url = agent.get("base_url")
+
try:
- # Fetch fresh Agent Card
- source_url = agent.get("source_url")
- if not source_url:
- raise AgentDiscoveryError("No source URL available for refresh")
+ if source_type == "nacos":
+ # Nacos discovered agents: use /health endpoint to check availability
+ if not base_url:
+ raise AgentDiscoveryError("No base_url available for health check")
- async with A2AHttpClient() as client:
- card = await client.get_json(source_url)
+ health_url = f"{base_url.rstrip('/')}/health"
+ logger.info(f"Checking health for Nacos agent: {health_url}")
- # Extract updated info - use _extract_agent_url for A2A v1.0 standard
- new_url = self._extract_agent_url(card)
- new_name = card.get("name")
- new_description = card.get("description")
- new_supported_interfaces = card.get("supportedInterfaces", [])
+ async with A2AHttpClient() as client:
+ health_response = await client.get_json(health_url)
- # Note: Do NOT update protocol_type and agent_url during refresh
- # These are user-configured values and should not be overwritten
- # The refresh should only update metadata (name, description, supported_interfaces, raw_card)
+ # Update availability based on health check
+ a2a_agent_db.update_agent_availability(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ is_available=True,
+ check_result="OK"
+ )
- # Update cache
- result = a2a_agent_db.refresh_external_agent_cache(
- external_agent_id=external_agent_id,
- tenant_id=tenant_id,
- user_id=user_id,
- new_raw_card=card,
- new_name=new_name,
- new_description=new_description,
- new_supported_interfaces=new_supported_interfaces
- )
+ # Update cache timestamp
+ a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id
+ )
- # Update availability
- a2a_agent_db.update_agent_availability(
- external_agent_id=external_agent_id,
- tenant_id=tenant_id,
- is_available=True,
- check_result="OK"
- )
+ logger.info(f"Health check passed for agent {external_agent_id}")
+ return {
+ "agent_id": external_agent_id,
+ "source_type": source_type,
+ "health_url": health_url,
+ "health_response": health_response,
+ "status": "available"
+ }
- logger.info(f"Refreshed agent {external_agent_id}")
- return result
+ else:
+ # URL discovered agents: fetch fresh Agent Card from source_url
+ if not source_url:
+ raise AgentDiscoveryError("No source URL available for refresh")
+
+ async with A2AHttpClient() as client:
+ card = await client.get_json(source_url)
+
+ # Extract updated info - use _extract_agent_url for A2A v1.0 standard
+ new_url = self._extract_agent_url(card)
+ new_name = card.get("name")
+ new_description = card.get("description")
+ new_supported_interfaces = card.get("supportedInterfaces", [])
+
+ # Extract new protocol type from the card
+ new_protocol_type = _extract_protocol_type(new_supported_interfaces)
+ current_protocol_type = agent.get("protocol_type")
+
+ # Determine if we need to update agent_url and protocol_type
+ # Update agent_url if it changed in the remote card
+ update_agent_url = new_url is not None and new_url != agent_url
+
+ # Update protocol_type if it changed in the remote card
+ update_protocol_type = new_protocol_type != current_protocol_type
+
+ # When protocol_type changes, we need to find the corresponding interface URL
+ if update_protocol_type:
+ logger.info(
+ f"Protocol type changed for agent {external_agent_id}: "
+ f"{current_protocol_type} -> {new_protocol_type}"
+ )
+ # The database function will handle finding the correct interface URL
+ result = a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ new_raw_card=card,
+ new_agent_url=new_url if update_agent_url else None,
+ new_name=new_name,
+ new_description=new_description,
+ new_supported_interfaces=new_supported_interfaces,
+ new_protocol_type=new_protocol_type
+ )
+ elif update_agent_url:
+ # Only agent_url changed
+ logger.info(
+ f"Agent URL changed for agent {external_agent_id}: "
+ f"{agent_url} -> {new_url}"
+ )
+ result = a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ new_raw_card=card,
+ new_agent_url=new_url,
+ new_name=new_name,
+ new_description=new_description,
+ new_supported_interfaces=new_supported_interfaces
+ )
+ else:
+ # No changes to agent_url or protocol_type, just update metadata
+ result = a2a_agent_db.refresh_external_agent_cache(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ new_raw_card=card,
+ new_name=new_name,
+ new_description=new_description,
+ new_supported_interfaces=new_supported_interfaces
+ )
+
+ # Update availability
+ a2a_agent_db.update_agent_availability(
+ external_agent_id=external_agent_id,
+ tenant_id=tenant_id,
+ is_available=True,
+ check_result="OK"
+ )
+
+ logger.info(f"Refreshed agent {external_agent_id}")
+ return result
except aiohttp.ClientError as e:
logger.error(f"Failed to refresh agent {external_agent_id}: {e}")
diff --git a/backend/services/a2a_server_service.py b/backend/services/a2a_server_service.py
index 2cccbe40d..4d9c5e607 100644
--- a/backend/services/a2a_server_service.py
+++ b/backend/services/a2a_server_service.py
@@ -647,7 +647,7 @@ async def handle_message_send(
return self.adapter.build_a2a_task_response(
task_id=task_id,
status="TASK_STATE_COMPLETED",
- parts=[{"type": "text", "text": accumulated_text, "mediaType": "text/plain"}] if accumulated_text else None,
+ parts=[{"text": accumulated_text, "mediaType": "text/plain"}] if accumulated_text else None,
context_id=context_id,
timestamp=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
)
@@ -879,7 +879,7 @@ def get_task(
message = result.get("message", "")
if message:
task_obj["artifacts"] = [{
- "parts": [{"type": "text", "text": str(message)}],
+ "parts": [{"text": str(message)}],
"lastChunk": True
}]
diff --git a/backend/services/agent_repository_service.py b/backend/services/agent_repository_service.py
new file mode 100644
index 000000000..87649bcd1
--- /dev/null
+++ b/backend/services/agent_repository_service.py
@@ -0,0 +1,306 @@
+import logging
+from typing import Any, Dict, Optional
+
+from consts.const import ASSET_OWNER_TENANT_ID
+from consts.model import AgentRepositorySnapshot
+from database.agent_db import search_agent_info_by_agent_id
+from database.agent_version_db import search_version_by_version_no
+from database.agent_repository_db import (
+ STATUS_PENDING_REVIEW,
+ VALID_REPOSITORY_STATUSES,
+ get_agent_repository_by_agent_id,
+ get_agent_repository_by_id,
+ insert_agent_repository_record,
+ list_agent_repository_summaries,
+ update_agent_repository_by_id,
+ update_agent_repository_status_by_id,
+)
+from services.agent_service import (
+ collect_skill_zip_entries,
+ export_agent_dict_for_repository_impl,
+ import_agent_impl,
+ import_agent_with_skills_impl,
+)
+
+logger = logging.getLogger("agent_repository_service")
+
+_UPDATE_SNAPSHOT_FIELDS = (
+ "display_name",
+ "description",
+ "author",
+ "category_id",
+ "tags",
+ "tool_count",
+ "version_label",
+ "source_version_no",
+ "agent_info_json",
+ "status",
+)
+
+
+def _to_summary_item(record: Dict[str, Any]) -> Dict[str, Any]:
+ """Map a DB record to a lightweight marketplace summary item."""
+ return {
+ "agent_repository_id": record.get("agent_repository_id"),
+ "author": record.get("author"),
+ "name": record.get("name"),
+ "display_name": record.get("display_name"),
+ "description": record.get("description"),
+ "status": record.get("status"),
+ }
+
+
+def list_agent_repository_listings_impl(
+ *,
+ status: Optional[str] = None,
+) -> Dict[str, Any]:
+ """List all repository listings with optional status filter."""
+ if status is not None and status not in VALID_REPOSITORY_STATUSES:
+ raise ValueError(
+ f"Invalid status '{status}'; must be one of: "
+ f"{', '.join(sorted(VALID_REPOSITORY_STATUSES))}"
+ )
+ records = list_agent_repository_summaries(status=status)
+ return {"items": [_to_summary_item(record) for record in records]}
+
+
+def update_agent_repository_status_impl(
+ *,
+ agent_repository_id: int,
+ status: str,
+ user_id: str,
+) -> Dict[str, Any]:
+ """Update a repository listing status by primary key."""
+ if status not in VALID_REPOSITORY_STATUSES:
+ raise ValueError(
+ f"Invalid status '{status}'; must be one of: "
+ f"{', '.join(sorted(VALID_REPOSITORY_STATUSES))}"
+ )
+
+ record = get_agent_repository_by_id(agent_repository_id)
+ if not record:
+ raise ValueError("Repository listing not found")
+
+ rows_affected = update_agent_repository_status_by_id(
+ repository_id=agent_repository_id,
+ status=status,
+ user_id=user_id,
+ )
+ if rows_affected == 0:
+ raise ValueError("Repository listing not found")
+
+ updated = get_agent_repository_by_id(agent_repository_id)
+ if not updated:
+ raise ValueError("Failed to load repository listing after update")
+ return _to_summary_item(updated)
+
+
+def _to_list_item(record: Dict[str, Any]) -> Dict[str, Any]:
+ """Map a DB record to a marketplace list item (without heavy JSON blobs)."""
+ return {
+ "id": record.get("agent_repository_id"),
+ "agent_repository_id": record.get("agent_repository_id"),
+ "agent_id": record.get("agent_id"),
+ "name": record.get("name"),
+ "display_name": record.get("display_name"),
+ "description": record.get("description"),
+ "author": record.get("author"),
+ "category_id": record.get("category_id"),
+ "tags": record.get("tags") or [],
+ "tool_count": record.get("tool_count"),
+ "version_label": record.get("version_label"),
+ "status": record.get("status"),
+ "source_version_no": record.get("source_version_no"),
+ "publisher_tenant_id": record.get("publisher_tenant_id"),
+ "created_at": record.get("create_time"),
+ "updated_at": record.get("update_time"),
+ }
+
+
+def _to_detail_item(
+ record: Dict[str, Any],
+ *,
+ include_bundles: bool = True,
+ is_updated: Optional[bool] = None,
+) -> Dict[str, Any]:
+ """Map a DB record to a marketplace detail payload."""
+ detail = _to_list_item(record)
+ if include_bundles:
+ detail["agent_info_json"] = record.get("agent_info_json")
+ if is_updated is not None:
+ detail["is_updated"] = is_updated
+ return detail
+
+
+def _validate_create_payload(repository_data: Dict[str, Any]) -> None:
+ """Validate required fields before inserting a repository listing."""
+ required_fields = (
+ "agent_id",
+ "source_version_no",
+ "name",
+ "agent_info_json",
+ )
+ missing = [
+ field for field in required_fields
+ if field not in repository_data or repository_data[field] is None
+ ]
+ if missing:
+ raise ValueError(f"Missing required repository fields: {', '.join(missing)}")
+ if not repository_data.get("name"):
+ raise ValueError("name must be a non-empty string")
+
+ agent_info_json = repository_data.get("agent_info_json")
+ if not isinstance(agent_info_json, dict):
+ raise ValueError("agent_info_json must be a JSON object")
+ for key in ("agent_id", "agent_info", "mcp_info"):
+ if key not in agent_info_json:
+ raise ValueError(f"agent_info_json must contain '{key}'")
+
+
+def _validate_agent_info_json_shareable(agent_info_json: dict) -> None:
+ """Reject marketplace share when any agent in the tree belongs to ASSET_OWNER tenant."""
+ agent_info_map = agent_info_json.get("agent_info")
+ if not isinstance(agent_info_map, dict):
+ return
+ for entry in agent_info_map.values():
+ if not isinstance(entry, dict):
+ continue
+ if entry.get("tenant_id") == ASSET_OWNER_TENANT_ID:
+ raise ValueError("租户管理员智能体无法共享")
+
+
+async def _build_agent_info_json(
+ agent_id: int,
+ tenant_id: str,
+ user_id: str,
+ version_no: int,
+) -> dict:
+ """Build marketplace snapshot JSON via the agent export pipeline."""
+ export_dict = await export_agent_dict_for_repository_impl(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=version_no,
+ )
+ skills = collect_skill_zip_entries(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ version_no=version_no,
+ )
+ snapshot = AgentRepositorySnapshot(
+ **export_dict,
+ skills=skills or None,
+ )
+ return snapshot.model_dump()
+
+
+async def _build_repository_data_from_agent(
+ agent_id: int,
+ tenant_id: str,
+ user_id: str,
+ version_no: int,
+) -> Dict[str, Any]:
+ """Build a repository upsert payload from a published agent version snapshot."""
+ agent_info = search_agent_info_by_agent_id(agent_id, tenant_id, version_no)
+ agent_info_json = await _build_agent_info_json(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=version_no,
+ )
+ _validate_agent_info_json_shareable(agent_info_json)
+
+ version_meta = search_version_by_version_no(agent_id, tenant_id, version_no)
+ version_label = (
+ version_meta.get("version_name")
+ if version_meta and version_meta.get("version_name")
+ else f"v{version_no}"
+ )
+
+ return {
+ "agent_id": agent_id,
+ "source_version_no": version_no,
+ "name": agent_info["name"],
+ "display_name": agent_info.get("display_name"),
+ "description": agent_info.get("description"),
+ "author": agent_info.get("author"),
+ "version_label": version_label,
+ "agent_info_json": agent_info_json,
+ "status": STATUS_PENDING_REVIEW,
+ }
+
+
+async def create_agent_repository_listing_impl(
+ agent_id: int,
+ tenant_id: str,
+ user_id: str,
+ version_no: int,
+) -> Dict[str, Any]:
+ """Create or update a repository listing from a published agent version.
+
+ Loads agent metadata and builds agent_info_json via the export pipeline,
+ then inserts or updates the marketplace table.
+
+ When a listing for the same agent_id already exists, snapshot fields are
+ updated via update_agent_repository_by_id.
+ """
+ if version_no < 0:
+ raise ValueError("version_no must be >= 0")
+
+ repository_data = await _build_repository_data_from_agent(
+ agent_id, tenant_id, user_id, version_no
+ )
+ _validate_create_payload(repository_data)
+
+ existing = get_agent_repository_by_agent_id(agent_id)
+ if not existing:
+ repository_id = insert_agent_repository_record(
+ repository_data=repository_data,
+ publisher_tenant_id=tenant_id,
+ publisher_user_id=user_id,
+ )
+ is_updated = False
+ else:
+ repository_id = int(existing["agent_repository_id"])
+ updates = {
+ key: repository_data[key]
+ for key in _UPDATE_SNAPSHOT_FIELDS
+ if key in repository_data
+ }
+ affected = update_agent_repository_by_id(
+ repository_id=repository_id,
+ publisher_tenant_id=tenant_id,
+ user_id=user_id,
+ updates=updates,
+ )
+ if affected == 0:
+ raise ValueError("Failed to update repository listing")
+ is_updated = True
+
+ record = get_agent_repository_by_id(repository_id)
+ if not record:
+ raise ValueError("Failed to load repository listing after write")
+ return _to_detail_item(record, is_updated=is_updated)
+
+
+async def import_agent_from_repository_impl(
+ agent_repository_id: int,
+ authorization: str,
+) -> Dict[int, int]:
+ """Import an agent tree from a marketplace repository listing into the current tenant."""
+ record = get_agent_repository_by_id(agent_repository_id)
+ if not record:
+ raise ValueError("Repository listing not found")
+
+ agent_info_json = record.get("agent_info_json")
+ if not isinstance(agent_info_json, dict):
+ raise ValueError("Repository listing has no agent snapshot")
+
+ snapshot = AgentRepositorySnapshot.model_validate(agent_info_json)
+ if snapshot.skills:
+ return await import_agent_with_skills_impl(
+ snapshot,
+ snapshot.skills,
+ authorization,
+ )
+ return await import_agent_impl(snapshot, authorization)
diff --git a/backend/services/agent_service.py b/backend/services/agent_service.py
index 950194da9..643d1995e 100644
--- a/backend/services/agent_service.py
+++ b/backend/services/agent_service.py
@@ -1,10 +1,13 @@
import asyncio
+import base64
+import io
import json
import logging
import os
import uuid
+import zipfile
from collections import deque
-from typing import Callable, Optional, Dict
+from typing import Callable, Optional, Dict, List
from fastapi import Header, Request
from fastapi.responses import JSONResponse, StreamingResponse
@@ -16,9 +19,12 @@
from agents.create_agent_info import create_agent_run_info, create_tool_config_list
from agents.preprocess_manager import preprocess_manager
from services.agent_version_service import publish_version_impl
+from utils.prompt_template_utils import normalize_prompt_generate_template_content
from consts.const import MEMORY_SEARCH_START_MSG, MEMORY_SEARCH_DONE_MSG, MEMORY_SEARCH_FAIL_MSG, TOOL_TYPE_MAPPING, \
LANGUAGE, MESSAGE_ROLE, MODEL_CONFIG_MAPPING, CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ, PERMISSION_PRIVATE
-from consts.exceptions import MemoryPreparationException
+from consts.exceptions import AppException, MemoryPreparationException, SkillDuplicateError
+from consts.error_code import ErrorCode
+from consts.agent_unavailable_reasons import AgentUnavailableReason
from consts.model import (
AgentInfoRequest,
AgentRequest,
@@ -28,9 +34,11 @@
ExportAndImportDataFormat,
MCPInfo,
SkillInstanceInfoRequest,
+ SkillZipEntry,
ToolInstanceInfoRequest,
ToolSourceEnum, ModelConnectStatusEnum
)
+from services.asset_owner_visibility import resolve_agent_list_permission
from database.agent_db import (
create_agent,
delete_agent_by_id,
@@ -38,7 +46,9 @@
delete_related_agent,
insert_related_agent,
query_all_agent_info_by_tenant_id,
+ query_sub_agent_relations,
query_sub_agents_id_list,
+ resolve_sub_agent_version_no,
search_agent_id_by_agent_name,
search_agent_info_by_agent_id,
search_blank_sub_agent_by_main_agent_id,
@@ -46,6 +56,7 @@
update_related_agents,
clear_agent_new_mark
)
+from database import a2a_agent_db
from database.model_management_db import get_model_by_model_id, get_model_id_by_display_name
from database.remote_mcp_db import get_mcp_server_by_name_and_tenant
from database.tool_db import (
@@ -59,12 +70,20 @@
search_tools_for_sub_agent
)
from database import skill_db
-from database.agent_version_db import query_version_list
+from database.attachment_db import upload_fileobj
+from services.skill_service import SkillService
+from services.file_management_service import is_allowed_skill_upload_path
+from database.agent_version_db import query_version_list, query_current_version_no
from database.group_db import query_group_ids_by_user
from database.user_tenant_db import get_user_tenant_by_user_id
-from database.a2a_agent_db import get_server_agent_ids
+from database.a2a_agent_db import get_server_agent_ids, query_external_sub_agents
+from services.prompt_template_service import (
+ SYSTEM_PROMPT_TEMPLATE_ID,
+ SYSTEM_PROMPT_TEMPLATE_NAME,
+ get_prompt_template_summary,
+)
from utils.str_utils import convert_list_to_string, convert_string_to_list
-from services.conversation_management_service import save_conversation_assistant, save_conversation_user
+from services.conversation_management_service import save_conversation_assistant, save_conversation_user, save_skill_files_to_conversation
from services.memory_config_service import build_memory_context
from utils.auth_utils import get_current_user_info, get_user_language
from utils.config_utils import tenant_config_manager
@@ -73,15 +92,158 @@
from utils.prompt_template_utils import get_prompt_generate_prompt_template
from utils.llm_utils import call_llm_for_system_prompt
+# Monitoring utilities: bind Agent metadata once at the request boundary.
+from nexent.monitor import AgentRunMetadata, agent_monitoring_context
+
# Import monitoring utilities
from utils.monitoring import monitoring_manager
logger = logging.getLogger(__name__)
+SAFE_AGENT_STREAM_ERROR_MESSAGE = "Agent execution failed. Please try again later."
+
+
+def _extract_json_objects_from_text(text: str) -> list[dict]:
+ """Extract all JSON objects embedded in a text blob."""
+ if not text:
+ return []
+
+ decoder = json.JSONDecoder()
+ results: list[dict] = []
+ index = 0
+
+ while index < len(text):
+ start_index = text.find("{", index)
+ if start_index < 0:
+ break
+
+ try:
+ payload, end_index = decoder.raw_decode(text, start_index)
+ except json.JSONDecodeError:
+ index = start_index + 1
+ continue
+
+ if isinstance(payload, dict):
+ results.append(payload)
+ index = max(end_index, start_index + 1)
+
+ return results
+
+
+def _extract_skill_file_upload_payloads(content: str) -> list[dict]:
+ """Extract JSON payloads containing absolute_path from streamed tool output."""
+ payloads: list[dict] = []
+ for payload in _extract_json_objects_from_text(content):
+ if payload.get("absolute_path"):
+ payloads.append(payload)
+ return payloads
+
+
+def _transform_skill_files_to_standard_format(upload_results: list[dict]) -> list[dict]:
+ """
+ Transform skill file upload results to match the frontend attachment format.
+
+ Skill upload format:
+ {file_name, absolute_path, object_name, preview_url, url, presigned_url, mime_type, file_size, status}
+ Frontend format:
+ {object_name, name, type, size, url, presigned_url, description}
+ """
+ frontend_files = []
+ for result in upload_results:
+ frontend_files.append({
+ "object_name": result.get("object_name", ""),
+ "name": result.get("file_name", result.get("name", "")),
+ "type": "file",
+ "size": result.get("file_size", result.get("size", 0)),
+ "url": result.get("url", ""),
+ "presigned_url": result.get("presigned_url", result.get("preview_url", "")),
+ "description": "",
+ })
+ return frontend_files
+
+
+async def _process_skill_file_uploads(
+ content: str,
+ user_id: str,
+ tenant_id: str,
+) -> list[dict]:
+ """Upload generated skill files to storage and return upload metadata."""
+
+ upload_results: list[dict] = []
+ for payload in _extract_skill_file_upload_payloads(content):
+ absolute_path = str(payload.get("absolute_path") or "").strip()
+ file_name = str(
+ payload.get("file_name")
+ or payload.get("file_path")
+ or os.path.basename(absolute_path)
+ )
+ mime_type = str(payload.get("mime_type") or payload.get("content_type") or "application/octet-stream")
+ if not absolute_path:
+ continue
+
+ if not is_allowed_skill_upload_path(absolute_path):
+ logger.warning(
+ "[skill-file] rejected unsafe path absolute_path=%s",
+ absolute_path,
+ )
+ continue
+
+ if not file_name:
+ file_name = os.path.basename(absolute_path)
+
+ if not os.path.exists(absolute_path):
+ continue
+
+ try:
+ file_size = os.path.getsize(absolute_path)
+ actual_prefix = f"skill-files/{user_id}" if user_id else "skill-files"
+ with open(absolute_path, "rb") as file_obj:
+ upload_result = upload_fileobj(
+ file_obj=file_obj,
+ file_name=file_name,
+ prefix=actual_prefix,
+ generate_presigned_url=True,
+ file_size=file_size,
+ )
+ if upload_result.get("success"):
+ upload_results.append(
+ {
+ "status": "success",
+ "file_name": file_name,
+ "absolute_path": absolute_path,
+ "object_name": upload_result.get("object_name"),
+ "preview_url": upload_result.get("presigned_url") or upload_result.get("url"),
+ "url": upload_result.get("url"),
+ "presigned_url": upload_result.get("presigned_url"),
+ "mime_type": mime_type,
+ "file_size": upload_result.get("file_size", file_size),
+ }
+ )
+ else:
+ error_message = upload_result.get("error") or "Upload failed"
+ logger.warning(
+ "[skill-file] upload failed file_name=%s absolute_path=%s error=%s",
+ file_name,
+ absolute_path,
+ error_message,
+ )
+ except Exception as exc:
+ logger.exception(
+ "[skill-file] failed to upload file file_name=%s absolute_path=%s",
+ file_name,
+ absolute_path,
+ )
+
+ return upload_results
-# -------------------------------------------------------------
-# Internal helper functions
-# -------------------------------------------------------------
+
+def _safe_agent_stream_error_chunk() -> str:
+ """Return a sanitized SSE error chunk without internal exception details."""
+ error_payload = json.dumps(
+ {"type": "error", "content": SAFE_AGENT_STREAM_ERROR_MESSAGE},
+ ensure_ascii=False,
+ )
+ return f"data: {error_payload}\n\n"
def _resolve_user_tenant_language(
@@ -308,12 +470,25 @@ def _regenerate_agent_value_with_llm(
user_prompt_key: str,
default_system_prompt: str,
default_user_prompt_builder: Callable[[dict], str],
- fallback_fn: Callable[[str], str]
+ fallback_fn: Callable[[str], str],
+ prompt_template_id: Optional[int] = None,
+ user_id: Optional[str] = None,
) -> str:
"""
Shared helper to regenerate agent-related values with an LLM.
"""
- prompt_template = get_prompt_generate_prompt_template(language)
+ if user_id is not None:
+ from services.prompt_template_service import resolve_prompt_generate_template
+ prompt_template = resolve_prompt_generate_template(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ language=language,
+ prompt_template_id=prompt_template_id,
+ )
+ else:
+ prompt_template = normalize_prompt_generate_template_content(
+ get_prompt_generate_prompt_template(language)
+ )
system_prompt = _render_prompt_template(
prompt_template.get(system_prompt_key, ""),
original_value=original_value
@@ -345,7 +520,8 @@ def _regenerate_agent_value_with_llm(
callback=None,
tenant_id=tenant_id
)
- candidate = (regenerated_value or "").strip().splitlines()[0].strip()
+ candidate = (regenerated_value or "").strip().splitlines()[
+ 0].strip()
if candidate in value_set:
raise ValueError(f"Generated duplicate value '{candidate}'")
return candidate
@@ -370,7 +546,9 @@ def _regenerate_agent_name_with_llm(
tenant_id: str,
language: str = LANGUAGE["ZH"],
agents_cache: list[dict] | None = None,
- exclude_agent_id: int | None = None
+ exclude_agent_id: int | None = None,
+ prompt_template_id: Optional[int] = None,
+ user_id: Optional[str] = None,
) -> str:
return _regenerate_agent_value_with_llm(
original_value=original_name,
@@ -379,8 +557,8 @@ def _regenerate_agent_name_with_llm(
model_id=model_id,
tenant_id=tenant_id,
language=language,
- system_prompt_key="AGENT_NAME_REGENERATE_SYSTEM_PROMPT",
- user_prompt_key="AGENT_NAME_REGENERATE_USER_PROMPT",
+ system_prompt_key="agent_name_regenerate_system_prompt",
+ user_prompt_key="agent_name_regenerate_user_prompt",
default_system_prompt=(
"You refine agent variable names so that they stay close to the "
"original meaning and remain unique within the tenant."
@@ -398,11 +576,12 @@ def _regenerate_agent_name_with_llm(
tenant_id=tenant_id,
agents_cache=agents_cache,
exclude_agent_id=exclude_agent_id
- )
+ ),
+ prompt_template_id=prompt_template_id,
+ user_id=user_id,
)
-
def _regenerate_agent_display_name_with_llm(
original_display_name: str,
existing_display_names: list[str],
@@ -411,7 +590,9 @@ def _regenerate_agent_display_name_with_llm(
tenant_id: str,
language: str = LANGUAGE["ZH"],
agents_cache: list[dict] | None = None,
- exclude_agent_id: int | None = None
+ exclude_agent_id: int | None = None,
+ prompt_template_id: Optional[int] = None,
+ user_id: Optional[str] = None,
) -> str:
return _regenerate_agent_value_with_llm(
original_value=original_display_name,
@@ -420,8 +601,8 @@ def _regenerate_agent_display_name_with_llm(
model_id=model_id,
tenant_id=tenant_id,
language=language,
- system_prompt_key="AGENT_DISPLAY_NAME_REGENERATE_SYSTEM_PROMPT",
- user_prompt_key="AGENT_DISPLAY_NAME_REGENERATE_USER_PROMPT",
+ system_prompt_key="agent_display_name_regenerate_system_prompt",
+ user_prompt_key="agent_display_name_regenerate_user_prompt",
default_system_prompt=(
"You refine agent display names so they remain unique, concise, "
"and aligned with the agent's capability."
@@ -438,11 +619,12 @@ def _regenerate_agent_display_name_with_llm(
tenant_id=tenant_id,
agents_cache=agents_cache,
exclude_agent_id=exclude_agent_id
- )
+ ),
+ prompt_template_id=prompt_template_id,
+ user_id=user_id,
)
-
async def check_agent_name_conflict_batch_impl(
request: AgentNameBatchCheckRequest,
authorization: str
@@ -500,17 +682,21 @@ async def regenerate_agent_name_batch_impl(
_, tenant_id, _ = get_current_user_info(authorization)
agents_cache = query_all_agent_info_by_tenant_id(tenant_id)
- existing_names = [agent.get("name") for agent in agents_cache if agent.get("name")]
- existing_display_names = [agent.get("display_name") for agent in agents_cache if agent.get("display_name")]
+ existing_names = [agent.get("name")
+ for agent in agents_cache if agent.get("name")]
+ existing_display_names = [agent.get(
+ "display_name") for agent in agents_cache if agent.get("display_name")]
# Always use tenant quick-config LLM model
quick_config_model = tenant_config_manager.get_model_config(
key=MODEL_CONFIG_MAPPING["llm"],
tenant_id=tenant_id
)
- resolved_model_id = quick_config_model.get("model_id") if quick_config_model else None
+ resolved_model_id = quick_config_model.get(
+ "model_id") if quick_config_model else None
if not resolved_model_id:
- raise ValueError("No available model for regeneration. Please configure an LLM model first.")
+ raise ValueError(
+ "No available model for regeneration. Please configure an LLM model first.")
results: list[dict] = []
# Use local mutable caches to avoid regenerated duplicates in the same batch
@@ -540,7 +726,8 @@ async def regenerate_agent_name_batch_impl(
exclude_agent_id=exclude_agent_id
)
except Exception as e:
- logger.error(f"Failed to regenerate agent name with LLM: {str(e)}, using fallback")
+ logger.error(
+ f"Failed to regenerate agent name with LLM: {str(e)}, using fallback")
agent_name = _generate_unique_agent_name_with_suffix(
agent_name,
tenant_id=tenant_id,
@@ -565,7 +752,8 @@ async def regenerate_agent_name_batch_impl(
exclude_agent_id=exclude_agent_id
)
except Exception as e:
- logger.error(f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback")
+ logger.error(
+ f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback")
agent_display_name = _generate_unique_display_name_with_suffix(
agent_display_name,
tenant_id=tenant_id,
@@ -594,34 +782,60 @@ async def _stream_agent_chunks(
agent_run_info,
memory_ctx,
):
- """Yield SSE chunks from agent_run while persisting messages & cleanup.
-
- This utility centralizes the common streaming logic used by both
- generate_stream_with_memory and generate_stream_no_memory so that the code
- is easier to maintain and less error-prone.
- """
+ """Yield SSE chunks from agent_run while persisting messages and cleanup."""
local_messages = []
captured_final_answer = None
+ captured_skill_files: dict[str, dict] = {}
+ skill_file_uploads: list[dict] = []
try:
async for chunk in agent_run(agent_run_info):
local_messages.append(chunk)
- # Try to capture the final answer as it streams by in order to start memory addition
try:
data = json.loads(chunk)
- if data.get("type") == "final_answer":
+ chunk_type = data.get("type")
+ if chunk_type == "final_answer":
captured_final_answer = data.get("content")
+
+ should_parse_skill_file = chunk_type in {"execution_logs", "parse"} or data.get("role") == "tool-response"
+ if should_parse_skill_file:
+ extracted_payload_count = 0
+ content_value = data.get("content")
+ if isinstance(content_value, list):
+ content_items = content_value
+ elif content_value:
+ content_items = [{"type": "text", "text": str(content_value)}]
+ else:
+ content_items = []
+
+ for item in content_items:
+ if isinstance(item, dict) and item.get("type") == "text":
+ text_value = item.get("text")
+ if text_value:
+ extracted_payloads = _extract_json_objects_from_text(text_value)
+ for payload in extracted_payloads:
+ absolute_path = str(payload.get("absolute_path") or "").strip()
+ if not absolute_path:
+ continue
+ if absolute_path in captured_skill_files:
+ continue
+ if not os.path.exists(absolute_path):
+ continue
+ captured_skill_files[absolute_path] = payload
+ extracted_payload_count += 1
+ if extracted_payload_count:
+ logger.info(
+ "[skill-file] captured payloads count=%s current_total=%s",
+ extracted_payload_count,
+ len(captured_skill_files),
+ )
except Exception:
pass
yield f"data: {chunk}\n\n"
except Exception as run_exc:
- logger.error(f"Agent run error: {str(run_exc)}")
- # Emit an error chunk and terminate the stream immediately
- error_payload = json.dumps(
- {"type": "error", "content": str(run_exc)}, ensure_ascii=False)
- yield f"data: {error_payload}\n\n"
+ logger.error("Agent run error: %r", run_exc, exc_info=True)
+ yield _safe_agent_stream_error_chunk()
finally:
- # Persist assistant messages for non-debug runs
if not agent_request.is_debug:
save_messages(
agent_request,
@@ -630,11 +844,54 @@ async def _stream_agent_chunks(
tenant_id=tenant_id,
user_id=user_id,
)
- # Always unregister the run to release resources
agent_run_manager.unregister_agent_run(
agent_request.conversation_id, user_id)
- # Schedule memory addition in background to avoid blocking SSE termination
+ try:
+ skill_file_content_local = "\n".join(
+ json.dumps(payload, ensure_ascii=False)
+ for payload in captured_skill_files.values()
+ )
+ if skill_file_content_local:
+ skill_file_uploads = await _process_skill_file_uploads(
+ content=skill_file_content_local,
+ user_id=user_id,
+ tenant_id=tenant_id,
+ )
+ logger.info(
+ "[skill-file] upload finished conversation=%s result_count=%s results=%s",
+ agent_request.conversation_id,
+ len(skill_file_uploads), skill_file_uploads
+ )
+ if skill_file_uploads:
+ # Keep original format for real-time SSE display
+ skill_files_payload = json.dumps(
+ {"skill_file_uploads": skill_file_uploads},
+ ensure_ascii=False,
+ )
+ try:
+ yield f"data: {json.dumps({'type': 'skill_files', 'content': skill_files_payload}, ensure_ascii=False)}\n\n"
+ except RuntimeError:
+ # Stream is closing (e.g., client disconnect). Avoid raising during generator teardown.
+ pass
+ # Persist skill file uploads to the conversation history so they
+ # appear in subsequent GET /conversation/{id} calls.
+ # Transform to frontend attachment format (object_name, name, type, size, etc.)
+ try:
+ frontend_files = _transform_skill_files_to_standard_format(skill_file_uploads)
+ save_skill_files_to_conversation(
+ conversation_id=agent_request.conversation_id,
+ skill_file_uploads=frontend_files,
+ user_id=user_id,
+ )
+ except Exception:
+ logger.exception(
+ "[skill-file] failed to persist skill file uploads to conversation=%s",
+ agent_request.conversation_id,
+ )
+ except Exception:
+ logger.exception("Failed to process skill file uploads")
+
async def _add_memory_background():
try:
# Skip if memory recording is disabled
@@ -681,7 +938,8 @@ async def _add_memory_background():
# Create and store the background task to avoid warnings
background_task = asyncio.create_task(_add_memory_background())
# Add done callback to handle any exceptions that might occur
- background_task.add_done_callback(lambda t: t.exception() if t.exception() else None)
+ background_task.add_done_callback(
+ lambda t: t.exception() if t.exception() else None)
except Exception as schedule_err:
logger.error(
f"Failed to schedule background memory addition: {schedule_err}")
@@ -709,13 +967,35 @@ async def get_creating_sub_agent_id_service(tenant_id: str, user_id: str = None)
return create_agent(agent_info={"enabled": False}, tenant_id=tenant_id, user_id=user_id)["agent_id"]
-async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0):
+async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0, user_id: Optional[str] = None):
try:
- agent_info = search_agent_info_by_agent_id(agent_id, tenant_id, version_no)
+ agent_info = search_agent_info_by_agent_id(
+ agent_id, tenant_id, version_no)
+ # Keep the request-scoped tenant_id unless the record explicitly provides one.
+ record_tenant_id = agent_info.get("tenant_id")
+ if record_tenant_id:
+ tenant_id = record_tenant_id
except Exception as e:
logger.error(f"Failed to get agent info: {str(e)}")
raise ValueError(f"Failed to get agent info: {str(e)}")
+ # Calculate permission if user_id is provided
+ if user_id is not None:
+ try:
+ user_tenant_record = get_user_tenant_by_user_id(user_id) or {}
+ user_role = str(user_tenant_record.get("user_role") or "").upper()
+ can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES
+
+ # Permission logic (same as agent list, including ASSET_OWNER read-only override)
+ agent_info["permission"] = resolve_agent_list_permission(
+ user_role=user_role,
+ agent=agent_info,
+ user_id=user_id,
+ can_edit_all=can_edit_all,
+ )
+ except Exception as e:
+ logger.warning(f"Failed to calculate agent permission: {str(e)}")
+
try:
tool_info = search_tools_for_sub_agent(
agent_id=agent_id, tenant_id=tenant_id)
@@ -732,21 +1012,52 @@ async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0
logger.error(f"Failed to get sub agent id list: {str(e)}")
agent_info["sub_agent_id_list"] = []
+ try:
+ skill_service = SkillService()
+ instances = skill_service.list_skill_instances(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ version_no=version_no
+ )
+ agent_info["skills"] = instances
+ except Exception as e:
+ logger.exception(f"Failed to get agent skills: {str(e)}")
+ agent_info["skills"] = []
+
+ try:
+ external_agents = query_external_sub_agents(
+ local_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no)
+ agent_info["external_sub_agent_id_list"] = [
+ ea["external_agent_id"] for ea in external_agents
+ ]
+ except Exception as e:
+ logger.error(f"Failed to get external sub agents: {str(e)}")
+ agent_info["external_sub_agent_id_list"] = []
+
if agent_info["model_id"] is not None:
model_info = get_model_by_model_id(agent_info["model_id"])
- agent_info["model_name"] = model_info.get("display_name", None) if model_info is not None else None
+ agent_info["model_name"] = model_info.get(
+ "display_name", None) if model_info is not None else None
else:
agent_info["model_name"] = None
# Get business logic model display name from model_id
if agent_info.get("business_logic_model_id") is not None:
- business_logic_model_info = get_model_by_model_id(agent_info["business_logic_model_id"])
- agent_info["business_logic_model_name"] = business_logic_model_info.get("display_name", None) if business_logic_model_info is not None else None
+ business_logic_model_info = get_model_by_model_id(
+ agent_info["business_logic_model_id"])
+ agent_info["business_logic_model_name"] = business_logic_model_info.get(
+ "display_name", None) if business_logic_model_info is not None else None
elif "business_logic_model_name" not in agent_info:
agent_info["business_logic_model_name"] = None
+ if not agent_info.get("prompt_template_id"):
+ agent_info["prompt_template_id"] = SYSTEM_PROMPT_TEMPLATE_ID
+ if not agent_info.get("prompt_template_name"):
+ agent_info["prompt_template_name"] = SYSTEM_PROMPT_TEMPLATE_NAME
+
if agent_info.get("group_ids") is not None:
- agent_info["group_ids"] = convert_string_to_list(agent_info.get("group_ids"))
+ agent_info["group_ids"] = convert_string_to_list(
+ agent_info.get("group_ids"))
# Check agent availability
is_available, unavailable_reasons = check_agent_availability(
@@ -757,6 +1068,12 @@ async def get_agent_info_impl(agent_id: int, tenant_id: str, version_no: int = 0
agent_info["is_available"] = is_available
agent_info["unavailable_reasons"] = unavailable_reasons
+ # Set current_version_no from draft record (version_no=0)
+ # This ensures the returned data always has the current published version info
+ if version_no > 0:
+ draft_version_no = query_current_version_no(agent_id, tenant_id)
+ agent_info["current_version_no"] = draft_version_no
+
return agent_info
@@ -802,6 +1119,15 @@ async def get_creating_sub_agent_info_impl(authorization: str = Header(None)):
async def update_agent_info_impl(request: AgentInfoRequest, authorization: str = Header(None)):
user_id, tenant_id, _ = get_current_user_info(authorization)
+ if request.example_questions is not None and len(request.example_questions) > 6:
+ raise AppException(ErrorCode.COMMON_PARAMETER_INVALID, "example_questions cannot exceed 6 items")
+
+ prompt_template_id, prompt_template_name = get_prompt_template_summary(
+ template_id=request.prompt_template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
# If agent_id is None, create a new agent; otherwise, update existing
agent_id: Optional[int] = request.agent_id
try:
@@ -818,11 +1144,16 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
"model_name": request.model_name,
"business_logic_model_id": request.business_logic_model_id,
"business_logic_model_name": request.business_logic_model_name,
+ "prompt_template_id": prompt_template_id,
+ "prompt_template_name": prompt_template_name,
"max_steps": request.max_steps,
"provide_run_summary": request.provide_run_summary,
+ "verification_config": request.verification_config,
"duty_prompt": request.duty_prompt,
"constraint_prompt": request.constraint_prompt,
"few_shots_prompt": request.few_shots_prompt,
+ "greeting_message": request.greeting_message,
+ "example_questions": request.example_questions,
"enabled": request.enabled if request.enabled is not None else True,
"group_ids": convert_list_to_string(request.group_ids) if request.group_ids else user_group_ids,
"ingroup_permission": request.ingroup_permission
@@ -830,6 +1161,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
agent_id = created["agent_id"]
else:
# Update agent
+ request.prompt_template_id = prompt_template_id
+ request.prompt_template_name = prompt_template_name
update_agent(agent_id, request, user_id)
except Exception as e:
logger.error(f"Failed to update agent info: {str(e)}")
@@ -897,9 +1230,11 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
skill_info=SkillInstanceInfoRequest(
skill_id=inst_skill_id,
agent_id=agent_id,
- skill_description=instance.get("skill_description"),
+ skill_description=instance.get(
+ "skill_description"),
skill_content=instance.get("skill_content"),
- enabled=False
+ enabled=False,
+ config_values=instance.get("config_values"),
),
tenant_id=tenant_id,
user_id=user_id
@@ -913,7 +1248,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
if inst.get("skill_id") == skill_id),
None
)
- skill_description = (existing_instance or {}).get("skill_description")
+ skill_description = (existing_instance or {}).get(
+ "skill_description")
skill_content = (existing_instance or {}).get("skill_content")
skill_db.create_or_update_skill_by_skill_info(
skill_info=SkillInstanceInfoRequest(
@@ -922,6 +1258,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
skill_description=skill_description,
skill_content=skill_content,
enabled=True,
+ config_values=(existing_instance or {}
+ ).get("config_values"),
),
tenant_id=tenant_id,
user_id=user_id
@@ -941,7 +1279,8 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
while len(search_list):
left_ele = search_list.popleft()
if left_ele == agent_id:
- raise ValueError("Circular dependency detected: Agent cannot be related to itself or create circular calls")
+ raise ValueError(
+ "Circular dependency detected: Agent cannot be related to itself or create circular calls")
if left_ele in agent_id_set:
continue
else:
@@ -964,6 +1303,50 @@ async def update_agent_info_impl(request: AgentInfoRequest, authorization: str =
logger.error(f"Failed to update related agents: {str(e)}")
raise ValueError(f"Failed to update related agents: {str(e)}")
+ # Handle related external agents saving when provided
+ try:
+ if request.related_external_agent_ids is not None and agent_id is not None:
+ related_external_agent_ids = request.related_external_agent_ids
+ # Query current relations
+ current_relations = a2a_agent_db.list_external_relations_by_local_agent(
+ local_agent_id=agent_id,
+ tenant_id=tenant_id
+ )
+ current_external_ids = {
+ rel["external_agent_id"] for rel in current_relations
+ }
+ new_external_ids = set(
+ related_external_agent_ids) if related_external_agent_ids else set()
+
+ # Find IDs to delete (in current but not in new)
+ ids_to_delete = current_external_ids - new_external_ids
+ # Find IDs to add (in new but not in current)
+ ids_to_add = new_external_ids - current_external_ids
+
+ # Soft delete removed relations
+ for ext_agent_id in ids_to_delete:
+ a2a_agent_db.remove_external_agent_relation(
+ local_agent_id=agent_id,
+ external_agent_id=ext_agent_id,
+ tenant_id=tenant_id
+ )
+
+ # Add new relations
+ for ext_agent_id in ids_to_add:
+ try:
+ a2a_agent_db.add_external_agent_relation(
+ local_agent_id=agent_id,
+ external_agent_id=ext_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id
+ )
+ except ValueError:
+ # Relation already exists, skip
+ pass
+ except Exception as e:
+ logger.error(f"Failed to update related external agents: {str(e)}")
+ raise ValueError(f"Failed to update related external agents: {str(e)}")
+
return {"agent_id": agent_id}
@@ -1038,74 +1421,231 @@ async def clear_agent_memory(agent_id: int, tenant_id: str, user_id: str):
# Silently fail to maintain agent deletion process
-async def export_agent_impl(agent_id: int, authorization: str = Header(None)) -> str:
- """
- Export the configuration information of the specified agent and all its sub-agents.
-
- Args:
- agent_id (int): The ID of the agent to export.
- authorization (str): User authentication information, obtained from the Header.
-
- Returns:
- str: A formatted JSON string containing the configuration information of the agent and all its sub-agents.
-
- Data Structure Example:
- model.py ExportAndImportDataFormat
-
- Note:
- This function recursively finds all managed sub-agents and exports the detailed configuration of each agent (including tools, prompts, etc.) as a dictionary, and finally returns it as a formatted JSON string for frontend download and backup.
- """
-
- user_id, tenant_id, _ = get_current_user_info(authorization)
-
+async def _export_agent_dict_core(
+ root_agent_id: int,
+ tenant_id: str,
+ user_id: str,
+ version_no: int = 0,
+) -> dict:
+ """Build ExportAndImportDataFormat dict for an agent tree at the given version."""
export_agent_dict = {}
- search_list = deque([agent_id])
- agent_id_set = set()
+ search_list: deque = deque([(root_agent_id, version_no)])
+ visited: set = set()
mcp_info_set = set()
- while len(search_list):
- left_ele = search_list.popleft()
- if left_ele in agent_id_set:
+ while search_list:
+ current_agent_id, current_version_no = search_list.popleft()
+ visit_key = (current_agent_id, current_version_no)
+ if visit_key in visited:
continue
+ visited.add(visit_key)
- agent_id_set.add(left_ele)
- agent_info = await export_agent_by_agent_id(agent_id=left_ele, tenant_id=tenant_id, user_id=user_id)
+ agent_info = await export_agent_by_agent_id(
+ agent_id=current_agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=current_version_no,
+ )
- # collect mcp name
for tool in agent_info.tools:
if tool.source == "mcp" and tool.usage:
mcp_info_set.add(tool.usage)
- search_list.extend(agent_info.managed_agents)
+ relations = query_sub_agent_relations(
+ main_agent_id=current_agent_id,
+ tenant_id=tenant_id,
+ version_no=current_version_no,
+ )
+ for rel in relations:
+ child_id = rel["selected_agent_id"]
+ child_version = resolve_sub_agent_version_no(
+ child_id,
+ rel.get("selected_agent_version_no"),
+ tenant_id,
+ )
+ search_list.append((child_id, child_version))
+
export_agent_dict[str(agent_info.agent_id)] = agent_info
- # convert mcp info to MCPInfo list
mcp_info_list = []
for mcp_server_name in mcp_info_set:
- # get mcp url by mcp_server_name and tenant_id
mcp_url = get_mcp_server_by_name_and_tenant(mcp_server_name, tenant_id)
mcp_info_list.append(
MCPInfo(mcp_server_name=mcp_server_name, mcp_url=mcp_url))
export_data = ExportAndImportDataFormat(
- agent_id=agent_id, agent_info=export_agent_dict, mcp_info=mcp_info_list)
+ agent_id=root_agent_id,
+ agent_info=export_agent_dict,
+ mcp_info=mcp_info_list,
+ )
return export_data.model_dump()
-async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) -> ExportAndImportAgentInfo:
+async def export_agent_dict_impl(
+ agent_id: int,
+ authorization: str = Header(None),
+ version_no: int = 0,
+) -> dict:
"""
- Export a single agent's information based on agent_id
+ Export the configuration information of the specified agent and all its sub-agents.
+
+ Args:
+ agent_id (int): The ID of the agent to export.
+ authorization (str): User authentication information, obtained from the Header.
+ version_no (int): Version to export. Default 0 = draft.
+
+ Returns:
+ dict: ExportAndImportDataFormat as a plain dict (via model_dump).
"""
+ user_id, tenant_id, _ = get_current_user_info(authorization)
+ return await _export_agent_dict_core(
+ root_agent_id=agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=version_no,
+ )
+
+
+async def export_agent_dict_for_repository_impl(
+ agent_id: int,
+ tenant_id: str,
+ user_id: str,
+ version_no: int,
+) -> dict:
+ """Export agent tree for marketplace repository storage (no HTTP auth header)."""
+ return await _export_agent_dict_core(
+ root_agent_id=agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=version_no,
+ )
+
+
+async def export_agent_impl(
+ agent_id: int,
+ authorization: str = Header(None),
+ version_no: int = 0,
+) -> str:
+ """Serialize export_agent_dict_impl output to a JSON string for download or ZIP embedding."""
+ agent_dict = await export_agent_dict_impl(
+ agent_id, authorization, version_no=version_no
+ )
+ return json.dumps(agent_dict)
+
+
+def _collect_skill_names_from_tree(
+ agent_id: int,
+ tenant_id: str,
+ version_no: int,
+ visited: Optional[set] = None,
+) -> List[str]:
+ """Collect unique skill names from an agent tree at the given version."""
+ if visited is None:
+ visited = set()
+
+ skill_names: List[str] = []
+ seen_names: set = set()
+
+ def _walk(current_agent_id: int, current_version_no: int) -> None:
+ visit_key = (current_agent_id, current_version_no)
+ if visit_key in visited:
+ return
+ visited.add(visit_key)
+
+ skill_instances = skill_db.query_skill_instances_by_agent_id(
+ agent_id=current_agent_id,
+ tenant_id=tenant_id,
+ version_no=current_version_no,
+ )
+ for inst in skill_instances:
+ skill_id = inst.get("skill_id")
+ skill = skill_db.get_skill_by_id(skill_id, tenant_id)
+ if skill:
+ name = skill.get("name")
+ if name and name not in seen_names:
+ seen_names.add(name)
+ skill_names.append(name)
+
+ relations = query_sub_agent_relations(
+ main_agent_id=current_agent_id,
+ tenant_id=tenant_id,
+ version_no=current_version_no,
+ )
+ for rel in relations:
+ child_id = rel["selected_agent_id"]
+ child_version = resolve_sub_agent_version_no(
+ child_id,
+ rel.get("selected_agent_version_no"),
+ tenant_id,
+ )
+ _walk(child_id, child_version)
+
+ _walk(agent_id, version_no)
+ return skill_names
+
+
+def collect_skill_zip_entries(
+ agent_id: int,
+ tenant_id: str,
+ version_no: int = 0,
+) -> List[SkillZipEntry]:
+ """Export skill ZIP payloads for all skills in an agent tree."""
+ skill_names = _collect_skill_names_from_tree(agent_id, tenant_id, version_no)
+ if not skill_names:
+ return []
+
+ skill_service = SkillService(tenant_id=tenant_id)
+ exported = skill_service.export_skills_by_names(skill_names, tenant_id)
+ return [
+ SkillZipEntry(
+ skill_name=entry["skill_name"],
+ skill_zip_base64=entry["skill_zip_base64"],
+ )
+ for entry in exported
+ ]
+
+
+async def export_agent_by_agent_id(
+ agent_id: int,
+ tenant_id: str,
+ user_id: str,
+ version_no: int = 0,
+) -> ExportAndImportAgentInfo:
+ """Export a single agent's information based on agent_id and version_no."""
agent_info = search_agent_info_by_agent_id(
- agent_id=agent_id, tenant_id=tenant_id)
+ agent_id=agent_id, tenant_id=tenant_id, version_no=version_no
+ )
agent_relation_in_db = query_sub_agents_id_list(
- main_agent_id=agent_id, tenant_id=tenant_id)
- tool_list = await create_tool_config_list(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id)
+ main_agent_id=agent_id, tenant_id=tenant_id, version_no=version_no
+ )
+ tool_list = await create_tool_config_list(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=version_no,
+ )
+
+ # Collect skill names from skill instances
+ skill_names: List[str] = []
+ try:
+ skill_instances = skill_db.query_skill_instances_by_agent_id(
+ agent_id=agent_id, tenant_id=tenant_id, version_no=version_no
+ )
+ for inst in skill_instances:
+ skill_id = inst.get("skill_id")
+ skill = skill_db.get_skill_by_id(skill_id, tenant_id)
+ if skill:
+ name = skill.get("name")
+ if name:
+ skill_names.append(name)
+ except Exception as e:
+ logger.warning(
+ f"Failed to collect skill instances for agent {agent_id}: {e}")
# Check if any tool is KnowledgeBaseSearchTool and set its metadata to empty dict
for tool in tool_list:
- if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "DataMateSearchTool"]:
+ if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "AnalyzeAudioTool", "AnalyzeVideoTool", "DataMateSearchTool"]:
tool.metadata = {}
# Get model_id and model display name from agent_info
@@ -1113,16 +1653,20 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str)
model_display_name = None
if model_id is not None:
model_info = get_model_by_model_id(model_id)
- model_display_name = model_info.get("display_name") if model_info is not None else None
+ model_display_name = model_info.get(
+ "display_name") if model_info is not None else None
# Get business_logic_model_id and business logic model display name
business_logic_model_id = agent_info.get("business_logic_model_id")
business_logic_model_display_name = None
if business_logic_model_id is not None:
- business_logic_model_info = get_model_by_model_id(business_logic_model_id)
- business_logic_model_display_name = business_logic_model_info.get("display_name") if business_logic_model_info is not None else None
+ business_logic_model_info = get_model_by_model_id(
+ business_logic_model_id)
+ business_logic_model_display_name = business_logic_model_info.get(
+ "display_name") if business_logic_model_info is not None else None
agent_info = ExportAndImportAgentInfo(agent_id=agent_id,
+ tenant_id=agent_info["tenant_id"],
name=agent_info["name"],
display_name=agent_info["display_name"],
description=agent_info["description"],
@@ -1130,6 +1674,7 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str)
author=agent_info.get("author"),
max_steps=agent_info["max_steps"],
provide_run_summary=agent_info["provide_run_summary"],
+ verification_config=agent_info.get("verification_config"),
duty_prompt=agent_info.get(
"duty_prompt"),
constraint_prompt=agent_info.get(
@@ -1142,14 +1687,19 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str)
model_id=model_id,
model_name=model_display_name,
business_logic_model_id=business_logic_model_id,
- business_logic_model_name=business_logic_model_display_name)
+ business_logic_model_name=business_logic_model_display_name,
+ skill_names=skill_names,
+ prompt_template_id=agent_info.get(
+ "prompt_template_id"),
+ prompt_template_name=agent_info.get("prompt_template_name"))
return agent_info
async def import_agent_impl(
agent_info: ExportAndImportDataFormat,
authorization: str = Header(None),
- force_import: bool = False
+ force_import: bool = False,
+ skill_name_to_id: Optional[Dict[str, int]] = None
):
"""
Import agent using DFS.
@@ -1237,9 +1787,9 @@ async def import_agent_by_agent_id(
enabled=True,
params=tool.params))
# check the validity of the agent parameters
- if import_agent_info.max_steps <= 0 or import_agent_info.max_steps > 20:
+ if import_agent_info.max_steps <= 0 or import_agent_info.max_steps > 30:
raise ValueError(
- f"Invalid max steps: {import_agent_info.max_steps}. max steps must be greater than 0 and less than 20.")
+ f"Invalid max steps: {import_agent_info.max_steps}. max steps must be greater than 0 and less than 30.")
if not import_agent_info.name.isidentifier():
raise ValueError(
f"Invalid agent name: {import_agent_info.name}. agent name must be a valid python variable name.")
@@ -1275,8 +1825,11 @@ async def import_agent_by_agent_id(
"model_name": import_agent_info.model_name,
"business_logic_model_id": business_logic_model_id,
"business_logic_model_name": import_agent_info.business_logic_model_name,
+ "prompt_template_id": import_agent_info.prompt_template_id or SYSTEM_PROMPT_TEMPLATE_ID,
+ "prompt_template_name": import_agent_info.prompt_template_name or SYSTEM_PROMPT_TEMPLATE_NAME,
"max_steps": import_agent_info.max_steps,
"provide_run_summary": import_agent_info.provide_run_summary,
+ "verification_config": getattr(import_agent_info, "verification_config", None),
"duty_prompt": import_agent_info.duty_prompt,
"constraint_prompt": import_agent_info.constraint_prompt,
"few_shots_prompt": import_agent_info.few_shots_prompt,
@@ -1300,7 +1853,8 @@ async def import_agent_by_agent_id(
release_note="Initial version from Agent Market"
)
except Exception as e:
- logger.warning(f"Failed to auto-publish version v1 for agent {new_agent_id}: {str(e)}")
+ logger.warning(
+ f"Failed to auto-publish version v1 for agent {new_agent_id}: {str(e)}")
return new_agent_id
@@ -1329,12 +1883,11 @@ async def clear_agent_new_mark_impl(agent_id: int, tenant_id: str, user_id: str)
user_id (str): User ID (for audit purposes)
"""
rowcount = clear_agent_new_mark(agent_id, tenant_id, user_id)
- logger.info(f"clear_agent_new_mark_impl called for agent_id={agent_id}, tenant_id={tenant_id}, user_id={user_id}, affected_rows={rowcount}")
+ logger.info(
+ f"clear_agent_new_mark_impl called for agent_id={agent_id}, tenant_id={tenant_id}, user_id={user_id}, affected_rows={rowcount}")
return rowcount
-
-
async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]:
"""
list all agent info
@@ -1380,7 +1933,8 @@ async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]:
# Apply visibility filter for DEV/USER based on group overlap
if not can_edit_all:
- agent_group_ids = set(convert_string_to_list(agent.get("group_ids")))
+ agent_group_ids = set(
+ convert_string_to_list(agent.get("group_ids")))
ingroup_permission = agent.get("ingroup_permission")
is_creator = str(agent.get("created_by")) == str(user_id)
# Hide agent if: no group overlap OR (ingroup_permission is PRIVATE AND user is not creator)
@@ -1408,23 +1962,24 @@ async def list_all_agent_info_impl(tenant_id: str, user_id: str) -> list[dict]:
simple_agent_list: list[dict] = []
for entry in enriched_agents:
agent = entry["raw_agent"]
- unavailable_reasons = list(dict.fromkeys(entry["unavailable_reasons"]))
+ unavailable_reasons = list(
+ dict.fromkeys(entry["unavailable_reasons"]))
model_id = agent.get("model_id")
model_info = None
if model_id is not None:
if model_id not in model_cache:
- model_cache[model_id] = get_model_by_model_id(model_id, tenant_id)
+ model_cache[model_id] = get_model_by_model_id(
+ model_id, tenant_id)
model_info = model_cache.get(model_id)
- # Permission logic:
- # - If creator or can_edit_all: PERMISSION_EDIT
- # - Otherwise: use ingroup_permission, default to PERMISSION_READ if None
- if can_edit_all or str(agent.get("created_by")) == str(user_id):
- permission = PERMISSION_EDIT
- else:
- ingroup_permission = agent.get("ingroup_permission")
- permission = ingroup_permission if ingroup_permission is not None else PERMISSION_READ
+ # Permission logic (ASSET_OWNER-scoped + non-ASSET_OWNER role => READ_ONLY first):
+ permission = resolve_agent_list_permission(
+ user_role=user_role,
+ agent=agent,
+ user_id=user_id,
+ can_edit_all=can_edit_all,
+ )
simple_agent_list.append({
"agent_id": agent["agent_id"],
@@ -1486,8 +2041,9 @@ def _mark_duplicates(groups: dict[str, list[dict]], reason_key: str) -> None:
for duplicate_entry in sorted_entries[1:]:
duplicate_entry["unavailable_reasons"].append(reason_key)
- _mark_duplicates(name_groups, "duplicate_name")
- _mark_duplicates(display_name_groups, "duplicate_display_name")
+ _mark_duplicates(name_groups, AgentUnavailableReason.DUPLICATE_NAME)
+ _mark_duplicates(display_name_groups,
+ AgentUnavailableReason.DUPLICATE_DISPLAY_NAME)
def _collect_model_availability_reasons(agent: dict, tenant_id: str, model_cache: Dict[int, Optional[dict]]) -> list[str]:
@@ -1499,7 +2055,7 @@ def _collect_model_availability_reasons(agent: dict, tenant_id: str, model_cache
model_id=agent.get("model_id"),
tenant_id=tenant_id,
model_cache=model_cache,
- reason_key="model_unavailable"
+ reason_key=AgentUnavailableReason.MODEL_UNAVAILABLE
))
return reasons
@@ -1557,15 +2113,17 @@ def check_agent_availability(
agent_info = search_agent_info_by_agent_id(agent_id, tenant_id)
if not agent_info:
- return False, ["agent_not_found"]
+ return False, [AgentUnavailableReason.AGENT_NOT_FOUND]
# Check tool availability
- tool_info = search_tools_for_sub_agent(agent_id=agent_id, tenant_id=tenant_id)
- tool_id_list = [tool["tool_id"] for tool in tool_info if tool.get("tool_id") is not None]
+ tool_info = search_tools_for_sub_agent(
+ agent_id=agent_id, tenant_id=tenant_id)
+ tool_id_list = [tool["tool_id"]
+ for tool in tool_info if tool.get("tool_id") is not None]
if tool_id_list:
tool_statuses = check_tool_is_available(tool_id_list)
if not all(tool_statuses):
- unavailable_reasons.append("tool_unavailable")
+ unavailable_reasons.append(AgentUnavailableReason.TOOL_UNAVAILABLE)
# Check model availability
model_reasons = _collect_model_availability_reasons(
@@ -1639,7 +2197,20 @@ async def prepare_agent_run(
is_debug=agent_request.is_debug,
override_version_no=agent_request.version_no,
override_model_id=agent_request.model_id,
+ tool_params=agent_request.tool_params,
)
+
+ # Mount conversation-level reusable ContextManager if enabled
+ cm_config = getattr(agent_run_info.agent_config,
+ 'context_manager_config', None)
+ if cm_config and cm_config.enabled:
+ cm = agent_run_manager.get_or_create_context_manager(
+ conversation_id=str(agent_request.conversation_id),
+ config=cm_config,
+ max_steps=agent_run_info.agent_config.max_steps
+ )
+ agent_run_info.context_manager = cm
+
agent_run_manager.register_agent_run(
agent_request.conversation_id, agent_run_info, user_id)
return agent_run_info, memory_context
@@ -1744,18 +2315,19 @@ def _memory_token(message_text: str) -> str:
yield data_chunk
except Exception as run_exc:
logger.error(
- f"Agent run error after memory failure: {str(run_exc)}")
- # Emit an error chunk and terminate the stream immediately
- error_payload = json.dumps(
- {"type": "error", "content": str(run_exc)}, ensure_ascii=False)
- yield f"data: {error_payload}\n\n"
+ "Agent run error after memory failure: %r",
+ run_exc,
+ exc_info=True,
+ )
+ yield _safe_agent_stream_error_chunk()
return
- except Exception as e:
- logger.error(f"Generate stream with memory error: {str(e)}")
- # Emit an error chunk and terminate the stream immediately
- error_payload = json.dumps(
- {"type": "error", "content": str(e)}, ensure_ascii=False)
- yield f"data: {error_payload}\n\n"
+ except Exception as stream_exc:
+ logger.error(
+ "Generate stream with memory error: %r",
+ stream_exc,
+ exc_info=True,
+ )
+ yield _safe_agent_stream_error_chunk()
return
finally:
# Always unregister preprocess task
@@ -1763,7 +2335,6 @@ def _memory_token(message_text: str) -> str:
# Helper function for run_agent_stream, used when user memory is disabled (no memory tokens)
-@monitoring_manager.monitor_endpoint("agent_service.generate_stream_no_memory", exclude_params=["authorization"])
async def generate_stream_no_memory(
agent_request: AgentRequest,
user_id: str,
@@ -1773,7 +2344,6 @@ async def generate_stream_no_memory(
"""Stream agent responses without any memory preprocessing tokens or fallback logic."""
# Prepare run info respecting memory disabled (honor provided user_id/tenant_id)
- monitoring_manager.add_span_event("generate_stream_no_memory.started")
agent_run_info, memory_context = await prepare_agent_run(
agent_request=agent_request,
user_id=user_id,
@@ -1781,10 +2351,7 @@ async def generate_stream_no_memory(
language=language,
allow_memory_search=False,
)
- monitoring_manager.add_span_event("generate_stream_no_memory.completed")
- monitoring_manager.add_span_event(
- "generate_stream_no_memory.streaming.started")
async for data_chunk in _stream_agent_chunks(
agent_request=agent_request,
user_id=user_id,
@@ -1793,11 +2360,8 @@ async def generate_stream_no_memory(
memory_ctx=memory_context,
):
yield data_chunk
- monitoring_manager.add_span_event(
- "generate_stream_no_memory.streaming.completed")
-@monitoring_manager.monitor_endpoint("agent_service.run_agent_stream", exclude_params=["authorization"])
async def run_agent_stream(
agent_request: AgentRequest,
http_request: Request,
@@ -1810,27 +2374,6 @@ async def run_agent_stream(
Start an agent run and stream responses.
If user_id or tenant_id is provided, authorization will be overridden. (Useful in northbound apis)
"""
- import time
-
- # Add initial span attributes for tracking
- monitoring_manager.set_span_attributes(
- agent_id=agent_request.agent_id,
- conversation_id=agent_request.conversation_id,
- is_debug=agent_request.is_debug,
- skip_user_save=skip_user_save,
- has_override_user_id=user_id is not None,
- has_override_tenant_id=tenant_id is not None,
- query_length=len(agent_request.query) if agent_request.query else 0,
- history_count=len(
- agent_request.history) if agent_request.history else 0,
- minio_files_count=len(
- agent_request.minio_files) if agent_request.minio_files else 0
- )
-
- # Step 1: Resolve user tenant language
- resolve_start_time = time.time()
- monitoring_manager.add_span_event("user_resolution.started")
-
resolved_user_id, resolved_tenant_id, language = _resolve_user_tenant_language(
authorization=authorization,
http_request=http_request,
@@ -1838,25 +2381,7 @@ async def run_agent_stream(
tenant_id=tenant_id,
)
- resolve_duration = time.time() - resolve_start_time
- monitoring_manager.add_span_event("user_resolution.completed", {
- "duration": resolve_duration,
- "user_id": resolved_user_id,
- "tenant_id": resolved_tenant_id,
- "language": language
- })
- monitoring_manager.set_span_attributes(
- resolved_user_id=resolved_user_id,
- resolved_tenant_id=resolved_tenant_id,
- language=language,
- user_resolution_duration=resolve_duration
- )
-
- # Step 2: Save user message (if needed)
if not agent_request.is_debug and not skip_user_save:
- save_start_time = time.time()
- monitoring_manager.add_span_event("user_message_save.started")
-
save_messages(
agent_request,
target=MESSAGE_ROLE["USER"],
@@ -1864,56 +2389,39 @@ async def run_agent_stream(
tenant_id=resolved_tenant_id,
)
- save_duration = time.time() - save_start_time
- monitoring_manager.add_span_event("user_message_save.completed", {
- "duration": save_duration
- })
- monitoring_manager.set_span_attributes(
- user_message_saved=True,
- user_message_save_duration=save_duration
- )
- else:
- monitoring_manager.add_span_event("user_message_save.skipped", {
- "reason": "debug_mode" if agent_request.is_debug else "skip_user_save_flag"
- })
- monitoring_manager.set_span_attributes(user_message_saved=False)
-
- # Step 3: Build memory context (skip for debug mode)
- memory_start_time = time.time()
- monitoring_manager.add_span_event("memory_context_build.started")
-
memory_ctx_preview = build_memory_context(
resolved_user_id, resolved_tenant_id, agent_request.agent_id, skip_query=agent_request.is_debug
)
-
- memory_duration = time.time() - memory_start_time
memory_enabled = memory_ctx_preview.user_config.memory_switch
- monitoring_manager.add_span_event("memory_context_build.completed", {
- "duration": memory_duration,
- "memory_enabled": memory_enabled,
- "agent_share_option": getattr(memory_ctx_preview.user_config, "agent_share_option", "unknown"),
- "debug_mode": agent_request.is_debug
- })
- monitoring_manager.set_span_attributes(
+
+ agent_metadata = monitoring_manager.bind_agent_context(AgentRunMetadata(
+ agent_id=agent_request.agent_id,
+ conversation_id=agent_request.conversation_id,
+ user_id=resolved_user_id,
+ tenant_id=resolved_tenant_id,
+ query=agent_request.query,
+ is_debug=agent_request.is_debug,
+ language=language,
memory_enabled=memory_enabled,
- memory_context_build_duration=memory_duration,
- agent_share_option=getattr(
- memory_ctx_preview.user_config, "agent_share_option", "unknown")
- )
+ history_count=len(
+ agent_request.history) if agent_request.history else 0,
+ minio_files_count=len(
+ agent_request.minio_files) if agent_request.minio_files else 0,
+ extra_metadata={
+ "agent_share_option": getattr(
+ memory_ctx_preview.user_config,
+ "agent_share_option",
+ "unknown",
+ ),
+ "skip_user_save": skip_user_save,
+ "has_override_user_id": user_id is not None,
+ "has_override_tenant_id": tenant_id is not None,
+ },
+ ))
- # Step 4: Choose streaming strategy
- strategy_start_time = time.time()
use_memory_stream = memory_enabled and not agent_request.is_debug
- monitoring_manager.add_span_event("streaming_strategy.selected", {
- "strategy": "with_memory" if use_memory_stream else "no_memory",
- "memory_enabled": memory_enabled,
- "is_debug": agent_request.is_debug
- })
-
if use_memory_stream:
- monitoring_manager.add_span_event(
- "stream_generator.memory_stream.creating")
stream_gen = generate_stream_with_memory(
agent_request,
user_id=resolved_user_id,
@@ -1921,8 +2429,6 @@ async def run_agent_stream(
language=language,
)
else:
- monitoring_manager.add_span_event(
- "stream_generator.no_memory_stream.creating")
stream_gen = generate_stream_no_memory(
agent_request,
user_id=resolved_user_id,
@@ -1930,43 +2436,25 @@ async def run_agent_stream(
language=language,
)
- strategy_duration = time.time() - strategy_start_time
- monitoring_manager.add_span_event("streaming_strategy.completed", {
- "duration": strategy_duration,
- "selected_strategy": "with_memory" if use_memory_stream else "no_memory"
- })
- monitoring_manager.set_span_attributes(
- streaming_strategy=(
- "with_memory" if use_memory_stream else "no_memory"),
- strategy_selection_duration=strategy_duration
- )
-
- # Step 5: Create streaming response
- response_start_time = time.time()
- monitoring_manager.add_span_event("streaming_response.creating")
+ async def stream_with_agent_context():
+ try:
+ with agent_monitoring_context(agent_metadata):
+ async for data_chunk in stream_gen:
+ yield data_chunk
+ except Exception as stream_exc:
+ logger.error(
+ "Agent stream response error: %r",
+ stream_exc,
+ exc_info=True,
+ )
+ yield _safe_agent_stream_error_chunk()
- response = StreamingResponse(
- stream_gen,
+ return StreamingResponse(
+ stream_with_agent_context(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
)
- response_duration = time.time() - response_start_time
- monitoring_manager.add_span_event("streaming_response.created", {
- "duration": response_duration,
- "media_type": "text/event-stream"
- })
- monitoring_manager.set_span_attributes(
- response_creation_duration=response_duration,
- total_preparation_duration=(time.time() - resolve_start_time)
- )
-
- monitoring_manager.add_span_event("run_agent_stream.preparation_completed", {
- "total_preparation_time": time.time() - resolve_start_time
- })
-
- return response
-
def stop_agent_tasks(conversation_id: int, user_id: str):
"""
@@ -2153,3 +2641,124 @@ def get_sub_agents_recursive(parent_agent_id: int, depth: int = 0, max_depth: in
logger.exception(
f"Failed to get agent call relationship for agent {agent_id}: {str(e)}")
raise ValueError(f"Failed to get agent call relationship: {str(e)}")
+
+
+async def export_agent_with_skills_impl(
+ agent_id: int,
+ authorization: str,
+ version_no: int = 0,
+) -> dict:
+ """Export an agent, returning a ZIP if it has skill instances, otherwise a plain dict.
+
+ The response is either:
+ - A dict with {"_zip": True, "data": bytes, "filename": str} when the agent has skills
+ - ExportAndImportDataFormat as a plain dict when the agent has no skills
+ """
+ user_id, tenant_id, _ = get_current_user_info(authorization)
+
+ skill_zip_entries = collect_skill_zip_entries(
+ agent_id=agent_id, tenant_id=tenant_id, version_no=version_no
+ )
+
+ if not skill_zip_entries:
+ return await export_agent_dict_impl(
+ agent_id, authorization, version_no=version_no
+ )
+
+ agent_json_str = await export_agent_impl(
+ agent_id, authorization, version_no=version_no
+ )
+
+ zip_buffer = io.BytesIO()
+ with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
+ zf.writestr("agent.json", agent_json_str)
+ for entry in skill_zip_entries:
+ skill_zip_bytes = base64.b64decode(entry.skill_zip_base64)
+ zf.writestr(f"skills/{entry.skill_name}.zip", skill_zip_bytes)
+
+ zip_buffer.seek(0)
+ zip_data = zip_buffer.read()
+
+ agent_info = search_agent_info_by_agent_id(
+ agent_id=agent_id, tenant_id=tenant_id, version_no=version_no
+ )
+ agent_name = agent_info.get(
+ "name", "anonymous") if agent_info else "anonymous"
+
+ filename = f"{agent_name}.zip"
+
+ return {
+ "_zip": True,
+ "data": zip_data,
+ "filename": filename
+ }
+
+
+async def import_agent_with_skills_impl(
+ agent_info: "ExportAndImportDataFormat",
+ skills: List[SkillZipEntry],
+ authorization: str,
+ force_import: bool = False
+):
+ """Import an agent with skills bundled from a ZIP export.
+
+ For each skill in the bundle:
+ 1. Check if a skill with the same name already exists in the target tenant.
+ 2. If duplicates exist, raise SkillDuplicateError (do not create anything).
+ 3. If no duplicates, create the skill from ZIP bytes via SkillService.
+ 4. Create a SkillInstance linking the new skill_id to the new agent_id.
+
+ Then proceeds with the standard agent import flow using the mapped skill IDs.
+ """
+ from services.skill_service import SkillService
+
+ user_id, tenant_id, _ = get_current_user_info(authorization)
+
+ skill_name_to_zip_base64 = {
+ entry.skill_name: entry.skill_zip_base64 for entry in skills}
+
+ existing_skills = skill_db.list_skills(tenant_id)
+ existing_skill_names = {s.get("name") for s in existing_skills}
+
+ import_skill_names = set(skill_name_to_zip_base64.keys())
+ duplicate_names = list(import_skill_names & existing_skill_names)
+
+ if duplicate_names:
+ raise SkillDuplicateError(duplicate_names)
+
+ skill_name_to_id: Dict[str, int] = {}
+ skill_service = SkillService(tenant_id=tenant_id)
+
+ for skill_name, zip_base64 in skill_name_to_zip_base64.items():
+ zip_bytes = base64.b64decode(zip_base64)
+ result = skill_service.create_skill_from_zip_bytes(
+ zip_bytes=zip_bytes,
+ skill_name=skill_name,
+ source="导入",
+ user_id=user_id,
+ tenant_id=tenant_id,
+ skip_duplicate_check=True
+ )
+ skill_name_to_id[skill_name] = result.get("skill_id")
+
+ agent_id_mapping = await import_agent_impl(
+ agent_info, authorization, force_import,
+ skill_name_to_id=skill_name_to_id
+ )
+
+ main_agent_id = agent_id_mapping.get(agent_info.agent_id)
+ if main_agent_id:
+ for skill_name, new_skill_id in skill_name_to_id.items():
+ skill_db.create_or_update_skill_by_skill_info(
+ skill_info=SkillInstanceInfoRequest(
+ skill_id=new_skill_id,
+ agent_id=main_agent_id,
+ enabled=True,
+ version_no=0
+ ),
+ tenant_id=tenant_id,
+ user_id=user_id,
+ version_no=0
+ )
+
+ return agent_id_mapping
diff --git a/backend/services/agent_version_service.py b/backend/services/agent_version_service.py
index e8a443e3f..8ed6e14d4 100644
--- a/backend/services/agent_version_service.py
+++ b/backend/services/agent_version_service.py
@@ -22,6 +22,7 @@
delete_tool_snapshot,
delete_relation_snapshot,
delete_skill_snapshot,
+ restore_agent_draft,
get_next_version_no,
delete_version,
SOURCE_TYPE_NORMAL,
@@ -32,6 +33,7 @@
)
from database.model_management_db import get_model_by_model_id
from utils.str_utils import convert_string_to_list
+from consts.agent_unavailable_reasons import AgentUnavailableReason
logger = logging.getLogger("agent_version_service")
@@ -47,6 +49,17 @@ def _remove_audit_fields_for_insert(data: dict) -> None:
data.pop('delete_flag', None)
+def _build_sub_agent_relations(relations: List[dict]) -> List[dict]:
+ """Map relation snapshots to sub-agent relation payloads for API responses."""
+ return [
+ {
+ 'agent_id': r['selected_agent_id'],
+ 'version_no': r.get('selected_agent_version_no'),
+ }
+ for r in relations
+ ]
+
+
def publish_version_impl(
agent_id: int,
tenant_id: str,
@@ -90,11 +103,18 @@ def publish_version_impl(
_remove_audit_fields_for_insert(tool_snapshot)
insert_tool_snapshot(tool_snapshot)
- # Insert relation snapshots
+ # Insert relation snapshots with pinned child agent versions
for rel in relations_draft:
+ child_id = rel['selected_agent_id']
+ child_version = query_current_version_no(child_id, tenant_id)
+ if child_version is None:
+ raise ValueError(
+ f"Sub-agent {child_id} has no published version; publish the sub-agent first."
+ )
rel_snapshot = rel.copy()
rel_snapshot.pop('version_no', None)
rel_snapshot['version_no'] = new_version_no
+ rel_snapshot['selected_agent_version_no'] = child_version
_remove_audit_fields_for_insert(rel_snapshot)
insert_relation_snapshot(rel_snapshot)
@@ -124,7 +144,9 @@ def publish_version_impl(
'source_type': source_type,
'source_version_no': source_version_no,
'status': STATUS_RELEASED,
+ 'is_a2a': publish_as_a2a,
'created_by': user_id,
+ 'updated_by': user_id,
}
version_id = insert_version(version_data)
@@ -267,6 +289,7 @@ def get_version_detail_impl(
# Extract sub_agent_id_list from relations
result['sub_agent_id_list'] = [r['selected_agent_id'] for r in relations_snapshot]
+ result['sub_agent_relations'] = _build_sub_agent_relations(relations_snapshot)
# Get skill instances for this version (from ag_skill_instance_t with version_no)
from database import skill_db as skill_db_module
@@ -335,21 +358,18 @@ def _check_version_snapshot_availability(
# Check if agent info exists
if not agent_info:
- return False, ["agent_not_found"]
+ return False, [AgentUnavailableReason.AGENT_NOT_FOUND]
# Check model availability
model_id = agent_info.get('model_id')
if model_id is None or model_id == 0:
- unavailable_reasons.append("model_not_configured")
+ unavailable_reasons.append(AgentUnavailableReason.MODEL_NOT_CONFIGURED)
- # Check tools availability
- if not tool_instances:
- unavailable_reasons.append("no_tools")
- else:
- # Check if at least one tool is enabled
+ # Check tools availability (only when tools are configured)
+ if tool_instances:
has_enabled_tool = any(t.get('enabled', True) for t in tool_instances)
if not has_enabled_tool:
- unavailable_reasons.append("all_tools_disabled")
+ unavailable_reasons.append(AgentUnavailableReason.ALL_TOOLS_DISABLED)
return len(unavailable_reasons) == 0, unavailable_reasons
@@ -360,9 +380,11 @@ def rollback_version_impl(
target_version_no: int,
) -> dict:
"""
- Rollback to a specific version by updating current_version_no only.
- This does NOT create a new version - it simply points the draft to an existing version.
- The actual version creation happens when user clicks "publish".
+ Rollback to a specific version by restoring draft (version_no=0) with the target version's data.
+ This copies all snapshot data (agent, tools, relations, skills) from the target version into the draft,
+ then updates current_version_no to point to the target version.
+
+ The user can then continue editing or re-publish from the restored state.
Args:
agent_id: Agent ID
@@ -377,15 +399,35 @@ def rollback_version_impl(
if not version:
raise ValueError(f"Version {target_version_no} not found")
- # Update current_version_no in draft to point to target version
- rows_affected = update_agent_current_version(
+ # Get target version's snapshot data
+ (target_agent, target_tools,
+ target_relations) = query_agent_snapshot(agent_id, tenant_id, target_version_no)
+ if not target_agent:
+ raise ValueError(f"Agent snapshot for version {target_version_no} not found")
+
+ # Ensure the draft still exists before attempting an in-place restore.
+ draft_agent, _, _ = query_agent_draft(agent_id, tenant_id)
+ if not draft_agent:
+ raise ValueError("Agent draft not found")
+
+ # Get skill snapshots for target version
+ from database import skill_db as skill_db_module
+ target_skills = skill_db_module.query_skill_instances_by_agent_id(
agent_id=agent_id,
tenant_id=tenant_id,
- current_version_no=target_version_no,
+ version_no=target_version_no,
)
- if rows_affected == 0:
- raise ValueError("Agent draft not found")
+ # Atomically restore draft from target version snapshot
+ restore_agent_draft(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ target_version_no=target_version_no,
+ target_agent_snapshot=target_agent,
+ target_tool_snapshots=target_tools,
+ target_relation_snapshots=target_relations,
+ target_skill_snapshots=target_skills,
+ )
return {
"message": f"Successfully rolled back to version {target_version_no}",
@@ -687,6 +729,7 @@ def _get_version_detail_or_draft(
# Add tools (only enabled tools)
result['tools'] = [t for t in tools_draft if t.get('enabled', True)]
result['sub_agent_id_list'] = [r['selected_agent_id'] for r in relations_draft]
+ result['sub_agent_relations'] = _build_sub_agent_relations(relations_draft)
# Get draft skill instances (version_no=0)
skills_draft = skill_db_module.query_skill_instances_by_agent_id(
@@ -760,12 +803,11 @@ async def list_published_agents_impl(
CAN_EDIT_ALL_USER_ROLES,
get_user_tenant_by_user_id,
query_group_ids_by_user,
- PERMISSION_EDIT,
- PERMISSION_READ,
get_model_by_model_id,
check_agent_availability,
_apply_duplicate_name_availability_rules,
)
+ from services.asset_owner_visibility import resolve_agent_list_permission
from database.agent_version_db import query_agent_snapshot
# Get user role for permission check
@@ -798,7 +840,8 @@ async def list_published_agents_impl(
# Apply visibility filter for DEV/USER based on group overlap
if not can_edit_all:
agent_group_ids = set(convert_string_to_list(agent.get("group_ids")))
- if len(user_group_ids.intersection(agent_group_ids)) == 0:
+ is_creator = str(agent.get("created_by")) == str(user_id)
+ if not is_creator and len(user_group_ids.intersection(agent_group_ids)) == 0:
continue
agent_id = agent.get("agent_id")
@@ -834,9 +877,10 @@ async def list_published_agents_impl(
# Extract sub_agent_id_list from relations
agent_info['sub_agent_id_list'] = [r['selected_agent_id'] for r in relations_snapshot]
+ agent_info['sub_agent_relations'] = _build_sub_agent_relations(relations_snapshot)
- # Add published version info
- agent_info['published_version_no'] = current_version_no
+ # Add current version info
+ agent_info['current_version_no'] = current_version_no
# Check agent availability using the shared function
_, unavailable_reasons = check_agent_availability(
@@ -869,7 +913,12 @@ async def list_published_agents_impl(
model_cache[model_id] = get_model_by_model_id(model_id, tenant_id)
model_info = model_cache.get(model_id)
- permission = PERMISSION_EDIT if can_edit_all or str(agent.get("created_by")) == str(user_id) else PERMISSION_READ
+ permission = resolve_agent_list_permission(
+ user_role=user_role,
+ agent=agent,
+ user_id=user_id,
+ can_edit_all=can_edit_all,
+ )
simple_agent_list.append({
"agent_id": agent.get("agent_id"),
@@ -885,7 +934,9 @@ async def list_published_agents_impl(
"is_new": agent.get("is_new", False),
"group_ids": agent.get("group_ids", []),
"permission": permission,
- "published_version_no": agent.get("published_version_no"),
+ "current_version_no": agent.get("current_version_no"),
+ "greeting_message": agent.get("greeting_message"),
+ "example_questions": agent.get("example_questions"),
})
return simple_agent_list
diff --git a/backend/services/aidp_service.py b/backend/services/aidp_service.py
new file mode 100644
index 000000000..acb18142e
--- /dev/null
+++ b/backend/services/aidp_service.py
@@ -0,0 +1,99 @@
+"""
+AIDP Service Layer
+Handles API calls to AIDP for paginated knowledge base listing.
+"""
+import logging
+from typing import Any, Dict
+from urllib.parse import urljoin
+
+import httpx
+
+from consts.error_code import ErrorCode
+from consts.exceptions import AppException
+from nexent.utils.http_client_manager import http_client_manager
+
+logger = logging.getLogger("aidp_service")
+
+_LIST_PATH = "/KnowledgeBase/Tenants/aidp/KnowledgeBases"
+
+
+def _validate_params(server_url: str, api_key: str) -> str:
+ """Validate parameters and return normalized base URL."""
+ if not server_url or not isinstance(server_url, str):
+ raise AppException(
+ ErrorCode.AIDP_CONFIG_INVALID,
+ "AIDP server_url is required and must be a non-empty string",
+ )
+ if not server_url.startswith(("http://", "https://")):
+ raise AppException(
+ ErrorCode.AIDP_CONFIG_INVALID,
+ "AIDP server_url must start with http:// or https://",
+ )
+ if not api_key or not isinstance(api_key, str):
+ raise AppException(
+ ErrorCode.AIDP_CONFIG_INVALID,
+ "AIDP api_key is required and must be a non-empty string",
+ )
+ return server_url.rstrip("/")
+
+
+def fetch_aidp_knowledge_bases_impl(
+ server_url: str,
+ api_key: str,
+ page: int = 1,
+ page_size: int = 20,
+) -> Dict[str, Any]:
+ """Fetch paginated knowledge bases from AIDP API."""
+ normalized_url = _validate_params(server_url, api_key)
+
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ }
+
+ list_path = f"{_LIST_PATH}?page={page}&page_size={page_size}"
+ list_url = urljoin(f"{normalized_url}/", list_path)
+ logger.info("Fetching AIDP knowledge bases from %s", list_url)
+
+ try:
+ client = http_client_manager.get_sync_client(
+ base_url=normalized_url,
+ timeout=20.0,
+ verify_ssl=True,
+ )
+ response = client.get(list_url, headers=headers)
+ response.raise_for_status()
+ result = response.json()
+ if not isinstance(result, dict):
+ raise AppException(
+ ErrorCode.AIDP_SERVICE_ERROR,
+ "Unexpected AIDP knowledge base response format",
+ )
+ return result
+ except httpx.RequestError as e:
+ logger.exception("AIDP request failed: %s", e)
+ raise AppException(
+ ErrorCode.AIDP_CONNECTION_ERROR,
+ f"AIDP API request failed: {str(e)}",
+ )
+ except httpx.HTTPStatusError as e:
+ logger.exception(
+ "AIDP API HTTP error: %s, status_code: %s",
+ e,
+ e.response.status_code,
+ )
+ if e.response.status_code in (401, 403):
+ raise AppException(
+ ErrorCode.AIDP_AUTH_ERROR,
+ f"AIDP authentication failed: {str(e)}",
+ )
+ raise AppException(
+ ErrorCode.AIDP_SERVICE_ERROR,
+ f"AIDP API HTTP error {e.response.status_code}: {str(e)}",
+ )
+ except ValueError as e:
+ logger.exception("Failed to parse AIDP API response: %s", e)
+ raise AppException(
+ ErrorCode.AIDP_SERVICE_ERROR,
+ f"Failed to parse AIDP API response: {str(e)}",
+ )
diff --git a/backend/services/asset_owner_visibility.py b/backend/services/asset_owner_visibility.py
new file mode 100644
index 000000000..24cb697b2
--- /dev/null
+++ b/backend/services/asset_owner_visibility.py
@@ -0,0 +1,104 @@
+"""ASSET_OWNER tenant visibility filters, feature flags, and response post-processing."""
+
+from typing import Any, Dict, List, Optional
+
+from consts.const import (
+ AGENT_PROMPTS_HIDDEN_FLAG,
+ ASSET_OWNER_ROLE,
+ ASSET_OWNER_TENANT_ID,
+ ENABLE_ASSET_OWNER_ROLE,
+ PERMISSION_EDIT,
+ PERMISSION_READ,
+)
+from consts.exceptions import ValidationError
+
+
+_PROMPT_FIELDS = ("duty_prompt", "constraint_prompt", "few_shots_prompt")
+
+
+ASSET_OWNER_RESOURCES_ROUTE = "/asset-owner-resources"
+
+
+def is_asset_owner_enabled() -> bool:
+ """Return whether the ASSET_OWNER feature flag is enabled."""
+ return ENABLE_ASSET_OWNER_ROLE
+
+
+def require_asset_owner_enabled() -> None:
+ """Raise ValidationError when the ASSET_OWNER feature is disabled."""
+ if not ENABLE_ASSET_OWNER_ROLE:
+ raise ValidationError("ASSET_OWNER feature is not enabled")
+
+
+def filter_accessible_routes_for_asset_owner_feature(
+ accessible_routes: List[str],
+) -> List[str]:
+ """Remove asset-owner nav route when the ASSET_OWNER feature flag is disabled."""
+ if ENABLE_ASSET_OWNER_ROLE:
+ return accessible_routes
+ return [r for r in accessible_routes if r != ASSET_OWNER_RESOURCES_ROUTE]
+
+
+def can_view_skill(caller_tenant_id: Optional[str], skill_tenant_id: Optional[str]) -> bool:
+ """
+ Return True when the caller may view a skill and its files.
+
+ ASSET_OWNER-scoped skills (tenant_id asset_owner_tenant_id or legacy "") are
+ visible only to callers in the ASSET_OWNER virtual tenant.
+ """
+
+ if skill_tenant_id == ASSET_OWNER_TENANT_ID:
+ return caller_tenant_id == ASSET_OWNER_TENANT_ID
+ return True
+
+
+def resolve_agent_list_permission(
+ user_role: str,
+ agent: Dict[str, Any],
+ user_id: str,
+ can_edit_all: bool,
+) -> str:
+ """
+ Resolve list-item permission for an agent.
+
+ Highest priority: ASSET_OWNER-scoped agents are READ_ONLY for callers whose
+ user_role is not ASSET_OWNER (overrides can_edit_all, creator, ingroup_permission).
+ """
+ role = (user_role or "").upper()
+ if agent.get("tenant_id") == ASSET_OWNER_TENANT_ID and role != ASSET_OWNER_ROLE:
+ return PERMISSION_READ
+ if can_edit_all or str(agent.get("created_by")) == str(user_id):
+ return PERMISSION_EDIT
+ ingroup_permission = agent.get("ingroup_permission")
+ return ingroup_permission if ingroup_permission is not None else PERMISSION_READ
+
+
+def apply_agent_detail_prompt_visibility(
+ caller_tenant_id: Optional[str],
+ agent_info: Dict[str, Any],
+) -> Dict[str, Any]:
+ """
+ Mask system prompt fields when a non-ASSET_OWNER caller views an ASSET_OWNER-scoped agent.
+
+ Sets duty_prompt, constraint_prompt, and few_shots_prompt to None and adds
+ prompts_hidden=True so clients can render a permission-denied state.
+ """
+ result = dict(agent_info)
+ if caller_tenant_id == ASSET_OWNER_TENANT_ID:
+ return result
+ if result.get("tenant_id") != ASSET_OWNER_TENANT_ID:
+ return result
+ for field in _PROMPT_FIELDS:
+ result[field] = None
+ result[AGENT_PROMPTS_HIDDEN_FLAG] = True
+ return result
+
+
+def postprocess_knowledge_visibility(
+ items: List[Dict[str, Any]],
+ caller_role: Optional[str],
+ caller_tenant_id: Optional[str],
+) -> List[Dict[str, Any]]:
+ """Return knowledge records after visibility post-processing (no-op for now)."""
+ _ = (caller_role, caller_tenant_id)
+ return items
diff --git a/backend/services/auto_summary_scheduler.py b/backend/services/auto_summary_scheduler.py
new file mode 100644
index 000000000..5bc44e442
--- /dev/null
+++ b/backend/services/auto_summary_scheduler.py
@@ -0,0 +1,211 @@
+"""
+Background scheduler that periodically checks knowledge bases with
+auto-summary enabled and regenerates summaries as needed.
+"""
+import logging
+import threading
+import time
+from datetime import datetime, timedelta
+from typing import Optional
+
+from consts.scheduler import (
+ FREQUENCY_MAP,
+ SCHEDULER_CHECK_INTERVAL_SECONDS,
+)
+from database.knowledge_db import get_knowledge_bases_for_auto_summary
+from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
+from utils.config_utils import tenant_config_manager
+
+logger = logging.getLogger(__name__)
+
+# Check interval from centralized config
+CHECK_INTERVAL_SECONDS = SCHEDULER_CHECK_INTERVAL_SECONDS
+
+# Track knowledge bases currently being processed to avoid duplicates
+_in_flight: set = set()
+
+
+def _parse_last_summary_time(last_summary_time) -> Optional[datetime]:
+ """Parse last_summary_time from various formats."""
+ if last_summary_time is None:
+ return None
+ if isinstance(last_summary_time, datetime):
+ return last_summary_time.replace(tzinfo=None)
+ if isinstance(last_summary_time, str):
+ try:
+ return datetime.fromisoformat(last_summary_time)
+ except (ValueError, TypeError):
+ return None
+ return None
+
+
+def _is_due_for_summary(last_summary_time, frequency: str, last_doc_update_time) -> bool:
+ """Check if a knowledge base is due for summary regeneration.
+
+ Args:
+ last_summary_time: Timestamp of last summary generation
+ frequency: Summary frequency (e.g., '3h', '1d')
+ last_doc_update_time: Timestamp of last document add/delete operation
+
+ Returns:
+ True if summary should be regenerated, False otherwise
+ """
+ interval = FREQUENCY_MAP.get(frequency)
+ if interval is None:
+ return False
+
+ last = _parse_last_summary_time(last_summary_time)
+ if last is None:
+ return True # Never summarized, do it now
+
+ # Check if time interval has elapsed
+ if (datetime.now() - last) < interval:
+ return False
+
+ # Check if there are new document changes since last summary
+ doc_update = _parse_last_summary_time(last_doc_update_time)
+ if doc_update is None:
+ return True # No doc update time recorded, assume need summary
+
+ # Skip if no new documents since last summary
+ if doc_update <= last:
+ logger.info(f"Skipping summary: no document changes since last summary")
+ return False
+
+ return True
+
+
+def _run_auto_summary_for_kb(index_name: str, tenant_id: str):
+ """Run the summary generation for a single knowledge base."""
+ if index_name in _in_flight:
+ logger.info(f"Skipping {index_name}: already being processed")
+ return
+
+ _in_flight.add(index_name)
+ try:
+ logger.info(f"Starting auto-summary for knowledge base: {index_name}")
+ vdb_core = get_vector_db_core()
+ service = ElasticSearchService()
+
+ from utils.document_vector_utils import (
+ process_documents_for_clustering,
+ kmeans_cluster_documents,
+ summarize_clusters_map_reduce,
+ merge_cluster_summaries,
+ )
+
+ # Get model_id from tenant config for LLM summarization
+ model_id = None
+ if tenant_id:
+ try:
+ tenant_config = tenant_config_manager.load_config(tenant_id)
+ model_id_str = tenant_config.get("LLM_ID")
+ if model_id_str:
+ model_id = int(model_id_str)
+ logger.info(f"Using LLM model ID {model_id} for auto-summary (tenant: {tenant_id})")
+ else:
+ logger.warning(f"No LLM_ID configured for tenant {tenant_id}, summary will be placeholder only")
+ except Exception as e:
+ logger.warning(f"Failed to get LLM_ID from tenant config: {e}")
+
+ sample_count = 40 # Smaller sample for auto-summary
+ document_samples, doc_embeddings = process_documents_for_clustering(
+ index_name=index_name,
+ vdb_core=vdb_core,
+ sample_doc_count=sample_count,
+ )
+
+ if not document_samples:
+ logger.warning(f"No documents found for auto-summary: {index_name}")
+ return
+
+ clusters = kmeans_cluster_documents(doc_embeddings, k=None)
+ cluster_summaries = summarize_clusters_map_reduce(
+ document_samples=document_samples,
+ clusters=clusters,
+ language="zh",
+ doc_max_words=100,
+ cluster_max_words=150,
+ model_id=model_id,
+ tenant_id=tenant_id,
+ )
+ final_summary = merge_cluster_summaries(cluster_summaries)
+
+ # Save the summary and update last_summary_time
+ service.change_summary(
+ index_name=index_name,
+ summary_result=final_summary,
+ user_id="auto_scheduler",
+ )
+ # change_summary already calls update_last_summary_time
+ logger.info(f"Auto-summary completed for knowledge base: {index_name}")
+
+ except Exception as e:
+ logger.error(f"Auto-summary failed for {index_name}: {e}", exc_info=True)
+ finally:
+ _in_flight.discard(index_name)
+
+
+def _scheduler_loop(stop_event: threading.Event):
+ """Main scheduler loop that runs in a background thread."""
+ logger.info("Auto-summary scheduler started")
+ while not stop_event.is_set():
+ try:
+ kbs = get_knowledge_bases_for_auto_summary()
+ logger.info(f"Checking {len(kbs)} knowledge bases for auto-summary")
+
+ for kb in kbs:
+ if stop_event.is_set():
+ break
+ frequency = kb.get("summary_frequency")
+ if _is_due_for_summary(
+ kb.get("last_summary_time"),
+ frequency,
+ kb.get("last_doc_update_time")
+ ):
+ _run_auto_summary_for_kb(
+ index_name=kb["index_name"],
+ tenant_id=kb.get("tenant_id", ""),
+ )
+
+ except Exception as e:
+ logger.error(f"Auto-summary scheduler check failed: {e}", exc_info=True)
+
+ # Wait for next check interval, but respond to stop_event
+ stop_event.wait(timeout=CHECK_INTERVAL_SECONDS)
+
+ logger.info("Auto-summary scheduler stopped")
+
+
+class AutoSummaryScheduler:
+ """Manages the auto-summary background thread."""
+
+ def __init__(self):
+ self._stop_event = threading.Event()
+ self._thread: Optional[threading.Thread] = None
+
+ def start(self):
+ """Start the scheduler thread."""
+ if self._thread and self._thread.is_alive():
+ logger.warning("Auto-summary scheduler is already running")
+ return
+ self._stop_event.clear()
+ self._thread = threading.Thread(
+ target=_scheduler_loop,
+ args=(self._stop_event,),
+ daemon=True,
+ name="auto-summary-scheduler",
+ )
+ self._thread.start()
+ logger.info("Auto-summary scheduler thread started")
+
+ def stop(self):
+ """Signal the scheduler thread to stop."""
+ self._stop_event.set()
+ if self._thread:
+ self._thread.join(timeout=60)
+ logger.info("Auto-summary scheduler thread stopped")
+
+
+# Singleton instance
+auto_summary_scheduler = AutoSummaryScheduler()
diff --git a/backend/services/cas_service.py b/backend/services/cas_service.py
new file mode 100644
index 000000000..7db3fce1a
--- /dev/null
+++ b/backend/services/cas_service.py
@@ -0,0 +1,424 @@
+import json
+import logging
+import os
+import secrets
+import ssl
+import urllib.parse
+import urllib.request
+from xml.etree.ElementTree import Element
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Any, Dict, Optional
+
+import defusedxml.ElementTree as ET
+from defusedxml.common import DefusedXmlException
+
+from consts.const import (
+ CAS_CA_BUNDLE,
+ CAS_CALLBACK_BASE_URL,
+ CAS_EMAIL_ATTRIBUTE,
+ CAS_ENABLED,
+ CAS_LOGIN_MODE,
+ CAS_LOGOUT_URL,
+ CAS_RENEW_BEFORE_SECONDS,
+ CAS_RENEW_TIMEOUT_SECONDS,
+ CAS_ROLE_ATTRIBUTE,
+ CAS_ROLE_MAP_JSON,
+ CAS_SERVER_URL,
+ CAS_SESSION_MAX_AGE_SECONDS,
+ CAS_SSL_VERIFY,
+ CAS_SYNTHETIC_EMAIL_DOMAIN,
+ CAS_TENANT_ATTRIBUTE,
+ CAS_USER_ATTRIBUTE,
+ CAS_VALIDATE_PATH,
+ DEFAULT_TENANT_ID,
+ LOCAL_SESSION_MAX_AGE_SECONDS,
+)
+from database.cas_session_db import (
+ create_cas_session,
+ revoke_cas_session_by_index,
+ revoke_cas_sessions_by_user_id,
+)
+from database.oauth_account_db import get_oauth_account_by_provider
+from database.user_tenant_db import get_user_tenant_by_user_id, upsert_user_tenant
+from services.oauth_service import (
+ create_or_update_oauth_account,
+ find_supabase_user_id_by_email,
+)
+from services.skill_service import init_skill_list_for_tenant
+from services.tool_configuration_service import init_tool_list_for_tenant
+from utils.auth_utils import calculate_expires_at, generate_session_jwt, get_supabase_admin_client
+
+logger = logging.getLogger(__name__)
+
+CAS_PROVIDER = "cas"
+VALID_ROLES = {"SU", "ADMIN", "DEV", "USER"}
+
+
+class CasAuthenticationError(Exception):
+ pass
+
+
+@dataclass
+class CasPrincipal:
+ cas_user_id: str
+ email: str
+ username: str
+ role: str
+ tenant_id: str
+ session_index: str
+ expires_at: datetime
+
+
+def get_cas_config() -> Dict[str, Any]:
+ mode = CAS_LOGIN_MODE if CAS_LOGIN_MODE in {"button", "force", "disabled"} else "disabled"
+ enabled = CAS_ENABLED and bool(CAS_SERVER_URL)
+ if not enabled:
+ mode = "disabled"
+ return {
+ "enabled": enabled,
+ "login_mode": mode,
+ "renew_before_seconds": CAS_RENEW_BEFORE_SECONDS,
+ "renew_timeout_seconds": CAS_RENEW_TIMEOUT_SECONDS,
+ "display_name": "CAS",
+ }
+
+
+def build_login_url(redirect: str = "/") -> str:
+ _ensure_enabled()
+ service_url = _build_callback_url("/api/user/cas/callback", {"redirect": _normalize_redirect(redirect)})
+ return f"{CAS_SERVER_URL}/login?service={service_url}"
+
+
+def build_renew_url() -> str:
+ _ensure_enabled()
+ service_url = _build_callback_url("/api/user/cas/renew_callback", {})
+ return f"{CAS_SERVER_URL}/login?service={service_url}&gateway=true"
+
+
+def build_logout_url() -> str:
+ _ensure_enabled()
+ configured_logout_url = CAS_LOGOUT_URL.strip()
+ if not configured_logout_url:
+ return ""
+
+ parsed_config = urllib.parse.urlsplit(configured_logout_url)
+ if parsed_config.scheme and parsed_config.netloc:
+ logout_url = configured_logout_url
+ else:
+ logout_url = f"{CAS_SERVER_URL}/{configured_logout_url.lstrip('/')}"
+
+ parsed = urllib.parse.urlsplit(logout_url)
+ if parsed.query:
+ return logout_url
+
+ query = f"service={CAS_CALLBACK_BASE_URL}"
+ return urllib.parse.urlunsplit((parsed.scheme, parsed.netloc, parsed.path, query, parsed.fragment))
+
+
+async def login_with_ticket(ticket: str, redirect: str = "/") -> Dict[str, Any]:
+ redirect = _normalize_redirect(redirect)
+ service_url = _build_callback_url("/api/user/cas/callback", {"redirect": redirect})
+ principal = validate_service_ticket(ticket, service_url)
+ return await _create_project_session(principal, redirect=redirect)
+
+
+async def renew_with_ticket(ticket: str) -> Dict[str, Any]:
+ service_url = _build_callback_url("/api/user/cas/renew_callback", {})
+ principal = validate_service_ticket(ticket, service_url)
+ return await _create_project_session(principal, redirect="/", renew=True)
+
+
+def validate_service_ticket(ticket: str, service_url: str) -> CasPrincipal:
+ _ensure_enabled()
+ if not ticket:
+ raise CasAuthenticationError("CAS ticket is missing")
+
+ validate_path = CAS_VALIDATE_PATH if CAS_VALIDATE_PATH.startswith("/") else f"/{CAS_VALIDATE_PATH}"
+ validate_url = f"{CAS_SERVER_URL}{validate_path}"
+ xml_text = _http_get_text(f"{validate_url}?service={service_url}&ticket={ticket}")
+ logger.info("CAS serviceValidate response: %s", xml_text)
+ return parse_service_validate_response(xml_text, fallback_session_index=ticket)
+
+
+def parse_service_validate_response(xml_text: str, fallback_session_index: str = "") -> CasPrincipal:
+ try:
+ root = ET.fromstring(xml_text)
+ except (ET.ParseError, DefusedXmlException) as exc:
+ raise CasAuthenticationError("Invalid CAS validation response") from exc
+
+ failure = _find_first(root, "authenticationFailure")
+ if failure is not None:
+ raise CasAuthenticationError((failure.text or "CAS authentication failed").strip())
+
+ success = _find_first(root, "authenticationSuccess")
+ if success is None:
+ raise CasAuthenticationError("CAS authentication failed")
+
+ user = _get_child_text(success, "user")
+ attrs_node = _find_first(success, "attributes")
+ attrs = _extract_attributes(attrs_node) if attrs_node is not None else {}
+
+ cas_user_id = _attribute_or_default(attrs, CAS_USER_ATTRIBUTE, user) or user
+ if not cas_user_id:
+ raise CasAuthenticationError("CAS user id is missing")
+
+ email = _attribute_or_default(attrs, CAS_EMAIL_ATTRIBUTE, "")
+ username = attrs.get("displayName") or attrs.get("name") or cas_user_id
+ role = _map_role(_attribute_or_default(attrs, CAS_ROLE_ATTRIBUTE, "USER"))
+ tenant_id = _attribute_or_default(attrs, CAS_TENANT_ATTRIBUTE, DEFAULT_TENANT_ID) or DEFAULT_TENANT_ID
+ session_index = attrs.get("SessionIndex") or attrs.get("sessionIndex") or fallback_session_index
+ expires_at = _resolve_expires_at(attrs)
+
+ if not email:
+ safe_user = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in cas_user_id)
+ email = f"{safe_user}@{CAS_SYNTHETIC_EMAIL_DOMAIN}"
+
+ return CasPrincipal(
+ cas_user_id=str(cas_user_id),
+ email=str(email).lower(),
+ username=str(username),
+ role=role,
+ tenant_id=str(tenant_id),
+ session_index=str(session_index or ""),
+ expires_at=expires_at,
+ )
+
+
+def parse_logout_request(logout_request: str) -> Dict[str, str]:
+ if not logout_request:
+ return {"cas_user_id": "", "session_index": ""}
+ try:
+ root = ET.fromstring(logout_request)
+ except (ET.ParseError, DefusedXmlException):
+ logger.warning("Invalid CAS logoutRequest XML")
+ return {"cas_user_id": "", "session_index": ""}
+
+ session_index = _get_child_text(root, "SessionIndex")
+ cas_user_id = (
+ _get_child_text(root, "NameID")
+ or _get_child_text(root, "nameID")
+ or _get_child_text(root, "user")
+ or _get_child_text(root, "casUserId")
+ )
+ return {"cas_user_id": cas_user_id or "", "session_index": session_index or ""}
+
+
+def revoke_from_logout_request(logout_request: str) -> Dict[str, Any]:
+ parsed = parse_logout_request(logout_request)
+ revoked = 0
+ if parsed["cas_user_id"]:
+ revoked = revoke_cas_sessions_by_user_id(parsed["cas_user_id"])
+ logger.info(
+ "CAS SLO revoke by cas_user_id: cas_user_id=%s revoked=%s",
+ parsed["cas_user_id"],
+ revoked,
+ )
+ if revoked == 0 and parsed["session_index"]:
+ revoked = revoke_cas_session_by_index(parsed["session_index"])
+ logger.info(
+ "CAS SLO revoke by session_index: session_index=%s revoked=%s",
+ parsed["session_index"],
+ revoked,
+ )
+ if revoked == 0:
+ logger.warning("CAS SLO did not revoke any session: %s", parsed)
+ return {"revoked": revoked, **parsed}
+
+
+async def _create_project_session(principal: CasPrincipal, redirect: str = "/", renew: bool = False) -> Dict[str, Any]:
+ user_id = _resolve_project_user(principal)
+ existing_tenant = get_user_tenant_by_user_id(user_id)
+ user_tenant = upsert_user_tenant(
+ user_id=user_id,
+ tenant_id=principal.tenant_id,
+ user_role=principal.role,
+ user_email=principal.email,
+ )
+ if not existing_tenant:
+ await init_tool_list_for_tenant(principal.tenant_id, user_id)
+ await init_skill_list_for_tenant(principal.tenant_id, user_id)
+
+ now = datetime.now()
+ max_local_expiry = now + timedelta(seconds=LOCAL_SESSION_MAX_AGE_SECONDS)
+ expires_at_dt = min(principal.expires_at, max_local_expiry)
+ expires_in_seconds = max(1, int((expires_at_dt - now).total_seconds()))
+
+ session_id = secrets.token_urlsafe(32)
+ create_cas_session(
+ session_id=session_id,
+ user_id=user_id,
+ cas_user_id=principal.cas_user_id,
+ cas_session_index=principal.session_index,
+ expires_at=expires_at_dt,
+ )
+
+ jwt_token = generate_session_jwt(user_id, expires_in=expires_in_seconds, session_id=session_id)
+
+ return {
+ "user": {
+ "id": str(user_id),
+ "email": principal.email,
+ "role": user_tenant.get("user_role", principal.role),
+ },
+ "session": {
+ "access_token": jwt_token,
+ "refresh_token": "",
+ "expires_at": calculate_expires_at(jwt_token),
+ "expires_in_seconds": expires_in_seconds,
+ },
+ "redirect_url": redirect,
+ "renew": renew,
+ }
+
+
+def _resolve_project_user(principal: CasPrincipal) -> str:
+ existing = get_oauth_account_by_provider(CAS_PROVIDER, principal.cas_user_id)
+ if existing:
+ create_or_update_oauth_account(
+ user_id=existing["user_id"],
+ provider=CAS_PROVIDER,
+ provider_user_id=principal.cas_user_id,
+ email=principal.email,
+ username=principal.username,
+ tenant_id=principal.tenant_id,
+ )
+ return existing["user_id"]
+
+ admin_client = get_supabase_admin_client()
+ if not admin_client:
+ raise RuntimeError("Supabase admin client not available")
+
+ user_id = find_supabase_user_id_by_email(admin_client, principal.email)
+ if not user_id:
+ create_resp = admin_client.auth.admin.create_user(
+ {
+ "email": principal.email,
+ "password": secrets.token_urlsafe(32),
+ "email_confirm": True,
+ "user_metadata": {
+ "full_name": principal.username,
+ "provider": CAS_PROVIDER,
+ "cas_user_id": principal.cas_user_id,
+ },
+ }
+ )
+ user_id = create_resp.user.id
+
+ create_or_update_oauth_account(
+ user_id=user_id,
+ provider=CAS_PROVIDER,
+ provider_user_id=principal.cas_user_id,
+ email=principal.email,
+ username=principal.username,
+ tenant_id=principal.tenant_id,
+ )
+ return user_id
+
+
+def _ensure_enabled() -> None:
+ if not CAS_ENABLED or not CAS_SERVER_URL:
+ raise CasAuthenticationError("CAS is not configured")
+
+
+def _build_callback_url(path: str, params: Dict[str, str]) -> str:
+ if not CAS_CALLBACK_BASE_URL:
+ raise CasAuthenticationError("CAS callback base URL is not configured")
+ query = _build_callback_query(params)
+ suffix = f"?{query}" if query else ""
+ return f"{CAS_CALLBACK_BASE_URL}{path}{suffix}"
+
+
+def _build_callback_query(params: Dict[str, str]) -> str:
+ return "&".join(f"{key}={value}" for key, value in params.items())
+
+
+def _normalize_redirect(redirect: str) -> str:
+ if not redirect or not redirect.startswith("/") or redirect.startswith("//"):
+ return "/"
+ return redirect
+
+
+def _build_ssl_context() -> ssl.SSLContext:
+ if CAS_CA_BUNDLE and os.path.isfile(CAS_CA_BUNDLE):
+ return ssl.create_default_context(cafile=CAS_CA_BUNDLE)
+ if not CAS_SSL_VERIFY:
+ ctx = ssl.create_default_context()
+ ctx.check_hostname = False
+ ctx.verify_mode = ssl.CERT_NONE
+ return ctx
+ return ssl.create_default_context()
+
+
+def _http_get_text(url: str) -> str:
+ req = urllib.request.Request(url, headers={"Accept": "application/xml,text/xml,*/*"})
+ with urllib.request.urlopen(req, timeout=15, context=_build_ssl_context()) as resp:
+ return resp.read().decode("utf-8")
+
+
+def _local_name(tag: str) -> str:
+ return tag.rsplit("}", 1)[-1]
+
+
+def _find_first(node: Element, name: str) -> Optional[Element]:
+ for child in node.iter():
+ if _local_name(child.tag) == name:
+ return child
+ return None
+
+
+def _get_child_text(node: Element, name: str) -> str:
+ found = _find_first(node, name)
+ return (found.text or "").strip() if found is not None else ""
+
+
+def _extract_attributes(attrs_node: Element) -> Dict[str, str]:
+ attrs: Dict[str, str] = {}
+ for child in list(attrs_node):
+ value = (child.text or "").strip()
+ if value:
+ attrs[_local_name(child.tag)] = value
+ return attrs
+
+
+def _attribute_or_default(attrs: Dict[str, str], key: str, default: str) -> str:
+ if key and key in attrs:
+ return attrs[key]
+ return default
+
+
+def _map_role(raw_role: str) -> str:
+ role = (raw_role or "USER").upper()
+ try:
+ role_map = json.loads(CAS_ROLE_MAP_JSON) if CAS_ROLE_MAP_JSON else {}
+ role = str(role_map.get(raw_role, role_map.get(role, role))).upper()
+ except Exception:
+ logger.warning("Invalid CAS_ROLE_MAP_JSON; falling back to raw role")
+ return role if role in VALID_ROLES else "USER"
+
+
+def _resolve_expires_at(attrs: Dict[str, str]) -> datetime:
+ for key in ("expiresAt", "expirationDate", "validUntil", "notOnOrAfter"):
+ value = attrs.get(key)
+ if not value:
+ continue
+ parsed = _parse_datetime(value)
+ if parsed:
+ return parsed
+ return datetime.now() + timedelta(seconds=CAS_SESSION_MAX_AGE_SECONDS)
+
+
+def _parse_datetime(value: str) -> Optional[datetime]:
+ try:
+ if value.isdigit():
+ timestamp = int(value)
+ if timestamp > 10_000_000_000:
+ timestamp = timestamp / 1000
+ return datetime.fromtimestamp(timestamp)
+ normalized = value.replace("Z", "+00:00")
+ parsed = datetime.fromisoformat(normalized)
+ if parsed.tzinfo:
+ parsed = parsed.astimezone().replace(tzinfo=None)
+ return parsed
+ except Exception:
+ return None
diff --git a/backend/services/config_sync_service.py b/backend/services/config_sync_service.py
index 9fe50813a..7feea9452 100644
--- a/backend/services/config_sync_service.py
+++ b/backend/services/config_sync_service.py
@@ -20,7 +20,7 @@
MODEL_ENGINE_ENABLED,
TENANT_NAME
)
-from database.model_management_db import get_model_id_by_display_name
+from database.model_management_db import get_model_id_by_display_name, get_model_records
from utils.config_utils import (
get_env_key,
get_model_name_from_config,
@@ -31,6 +31,20 @@
logger = logging.getLogger("config_sync_service")
+def get_model_id_for_config(model_type: str, display_name: str, tenant_id: str) -> Optional[int]:
+ if not display_name:
+ return None
+
+ records = get_model_records(
+ {"display_name": display_name, "model_type": model_type},
+ tenant_id
+ )
+ if records:
+ return records[0].get("model_id")
+
+ return get_model_id_by_display_name(display_name, tenant_id)
+
+
def handle_model_config(tenant_id: str, user_id: str, config_key: str, model_id: Optional[int], tenant_config_dict: dict) -> None:
"""
Handle model configuration updates, deletions, and settings operations
@@ -98,8 +112,8 @@ async def save_config_impl(config, tenant_id, user_id):
model_display_name = model_config.get("displayName")
config_key = get_env_key(model_type) + "_ID"
- model_id = get_model_id_by_display_name(
- model_display_name, tenant_id)
+ model_id = get_model_id_for_config(
+ model_type, model_display_name, tenant_id)
handle_model_config(tenant_id, user_id, config_key,
model_id, tenant_config_dict)
@@ -112,6 +126,21 @@ async def save_config_impl(config, tenant_id, user_id):
embedding_api_config = model_config.get("apiConfig", {})
env_config[f"{model_prefix}_API_KEY"] = safe_value(
embedding_api_config.get("apiKey"))
+
+ # Save STT specific fields for speech recognition models
+ if model_type == "stt":
+ if model_config.get("modelFactory"):
+ stt_factory_key = "STT_MODEL_FACTORY"
+ tenant_config_manager.set_single_config(
+ user_id, tenant_id, stt_factory_key, model_config.get("modelFactory"))
+ if model_config.get("modelAppid"):
+ stt_appid_key = "STT_MODEL_APPID"
+ tenant_config_manager.set_single_config(
+ user_id, tenant_id, stt_appid_key, model_config.get("modelAppid"))
+ if model_config.get("accessToken"):
+ stt_token_key = "STT_ACCESS_TOKEN"
+ tenant_config_manager.set_single_config(
+ user_id, tenant_id, stt_token_key, model_config.get("accessToken"))
logger.info("Configuration saved successfully")
@@ -167,6 +196,7 @@ def build_models_config(tenant_id: str) -> dict:
def build_model_config(model_config: dict) -> dict:
if not model_config:
return {
+ "id": None,
"name": "",
"displayName": "",
"apiConfig": {
@@ -176,6 +206,7 @@ def build_model_config(model_config: dict) -> dict:
}
config = {
+ "id": model_config.get("model_id"),
"name": get_model_name_from_config(model_config) if model_config else "",
"displayName": model_config.get("display_name", ""),
"apiConfig": {
@@ -187,4 +218,11 @@ def build_model_config(model_config: dict) -> dict:
if "embedding" in model_config.get("model_type", ""):
config["dimension"] = model_config.get("max_tokens", 0)
+ # Add voice model specific fields (STT and TTS)
+ model_type = model_config.get("model_type", "")
+ if model_type == "stt" or model_type == "tts":
+ config["modelFactory"] = model_config.get("model_factory", "")
+ config["modelAppid"] = model_config.get("model_appid", "")
+ config["accessToken"] = model_config.get("access_token", "")
+
return config
diff --git a/backend/services/conversation_management_service.py b/backend/services/conversation_management_service.py
index b98e79897..12edea7d5 100644
--- a/backend/services/conversation_management_service.py
+++ b/backend/services/conversation_management_service.py
@@ -8,6 +8,7 @@
from consts.const import LANGUAGE, MODEL_CONFIG_MAPPING, MESSAGE_ROLE, DEFAULT_EN_TITLE, DEFAULT_ZH_TITLE
from consts.model import AgentRequest, ConversationResponse, MessageRequest, MessageUnit
+from consts.exceptions import ConversationNotFoundError
from database.conversation_db import (
create_conversation,
create_conversation_message,
@@ -18,16 +19,20 @@
get_conversation,
get_conversation_history,
get_conversation_list,
+ get_latest_assistant_message_id,
get_message_id_by_index,
get_source_images_by_conversation,
get_source_images_by_message,
get_source_searches_by_conversation,
get_source_searches_by_message,
rename_conversation,
+ update_message_minio_files,
update_message_opinion
)
from nexent.core.utils.observer import MessageObserver, ProcessType
+from nexent.monitor import set_monitoring_context, set_monitoring_operation
from nexent.core.models import OpenAIModel
+from agents.agent_run_manager import agent_run_manager
from utils.config_utils import get_model_name_from_config, tenant_config_manager
from utils.prompt_template_utils import get_generate_title_prompt_template
from utils.str_utils import remove_think_blocks
@@ -122,7 +127,15 @@ def save_message(request: MessageRequest, user_id: str, tenant_id: str):
# Parse image URL list
content_json = json.loads(unit_content)
if isinstance(content_json, dict) and 'images_url' in content_json:
+ # Deduplicate image URLs before saving
+ seen_urls = set()
+ unique_urls = []
for image_url in content_json['images_url']:
+ if image_url not in seen_urls:
+ seen_urls.add(image_url)
+ unique_urls.append(image_url)
+ # Also deduplicate against any URLs already saved in this same message
+ for image_url in unique_urls:
image_data = {'message_id': message_id, 'conversation_id': conversation_id,
'image_url': image_url}
create_source_image(image_data)
@@ -200,7 +213,7 @@ def save_message(request: MessageRequest, user_id: str, tenant_id: str):
def save_conversation_user(request: AgentRequest, user_id: str, tenant_id: str):
user_role_count = sum(1 for item in getattr(
- request, "history", []) if item.get("role") == MESSAGE_ROLE["USER"])
+ request, "history", []) if item.role == MESSAGE_ROLE["USER"])
conversation_req = MessageRequest(conversation_id=request.conversation_id, message_idx=user_role_count * 2,
role=MESSAGE_ROLE["USER"], message=[MessageUnit(type="string", content=request.query)], minio_files=request.minio_files)
@@ -209,7 +222,7 @@ def save_conversation_user(request: AgentRequest, user_id: str, tenant_id: str):
def save_conversation_assistant(request: AgentRequest, messages: List[str], user_id: str, tenant_id: str):
user_role_count = sum(1 for item in getattr(
- request, "history", []) if item.get("role") == MESSAGE_ROLE["USER"])
+ request, "history", []) if item.role == MESSAGE_ROLE["USER"])
message_list = []
for item in messages:
@@ -222,7 +235,7 @@ def save_conversation_assistant(request: AgentRequest, messages: List[str], user
message_list.append(message)
conversation_req = MessageRequest(conversation_id=request.conversation_id, message_idx=user_role_count * 2 + 1,
- role=MESSAGE_ROLE["ASSISTANT"], message=message_list, minio_files=request.minio_files)
+ role=MESSAGE_ROLE["ASSISTANT"], message=message_list, minio_files=None)
save_message(conversation_req, user_id=user_id, tenant_id=tenant_id)
@@ -239,9 +252,14 @@ def call_llm_for_title(question: str, tenant_id: str, language: str = LANGUAGE["
str: Generated title
"""
prompt_template = get_generate_title_prompt_template(language=language)
+ set_monitoring_context(tenant_id=tenant_id, user_id=None)
model_config = tenant_config_manager.get_model_config(
key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
+ display_name = model_config.get("display_name", "") if model_config else ""
+ set_monitoring_operation("title_generation", display_name=display_name or None)
+
+ timeout_seconds = model_config.get("timeout_seconds") if model_config else None
# Create OpenAIModel instance
llm = OpenAIModel(
@@ -251,7 +269,9 @@ def call_llm_for_title(question: str, tenant_id: str, language: str = LANGUAGE["
temperature=0.7,
top_p=0.95,
model_factory=model_config.get("model_factory", None),
- ssl_verify=model_config.get("ssl_verify", True)
+ ssl_verify=model_config.get("ssl_verify", True),
+ timeout_seconds=timeout_seconds,
+ stream=False,
)
# Build messages - use new template variable 'question' instead of 'content'
@@ -287,7 +307,9 @@ def update_conversation_title(conversation_id: int, title: str, user_id: str = N
"""
success = rename_conversation(conversation_id, title, user_id)
if not success:
- raise Exception(f"Conversation {conversation_id} does not exist or has been deleted")
+ raise ConversationNotFoundError(
+ f"Conversation {conversation_id} does not exist or has been deleted"
+ )
return success
@@ -362,6 +384,11 @@ def delete_conversation_service(conversation_id: int, user_id: str) -> bool:
success = delete_conversation(conversation_id, user_id)
if not success:
raise Exception(f"Conversation {conversation_id} does not exist or has been deleted")
+
+ # Defensive cleanup: release the ContextManager associated with this conversation
+ # to avoid memory leaks in edge cases
+ agent_run_manager.clear_conversation_context_manager(conversation_id)
+
return True
except Exception as e:
logging.error(f"Failed to delete conversation: {str(e)}")
@@ -429,13 +456,15 @@ def get_conversation_history_service(conversation_id: int, user_id: str) -> List
search_by_message[message_id] = []
search_by_message[message_id].append(search_item)
- # Collect image content - grouped by message_id
+ # Collect image content - grouped by message_id, with URL deduplication
image_by_message = {}
for record in history_data['image_records']:
message_id = record['message_id']
if message_id not in image_by_message:
image_by_message[message_id] = []
- image_by_message[message_id].append(record['image_url'])
+ # Only add if not already present (by URL)
+ if record['image_url'] not in image_by_message[message_id]:
+ image_by_message[message_id].append(record['image_url'])
# Sort by message index and build final message list, including images and search content
messages = []
@@ -495,6 +524,10 @@ def get_conversation_history_service(conversation_id: int, user_id: str) -> List
'opinion_flag': msg['opinion_flag']
}
+ # Add minio_files field (if any, e.g., skill-generated attachments)
+ if 'minio_files' in msg and msg['minio_files']:
+ message_item['minio_files'] = msg['minio_files']
+
# Add image content (if any)
if message_id in image_by_message:
message_item['picture'] = image_by_message[message_id]
@@ -687,3 +720,52 @@ async def get_message_id_by_index_impl(conversation_id: int, message_index: int)
if message_id is None:
raise Exception("Message not found.")
return message_id
+
+
+def save_skill_files_to_conversation(
+ conversation_id: int,
+ skill_file_uploads: List[Dict[str, Any]],
+ user_id: str,
+) -> bool:
+ """
+ Append skill file upload records to the latest assistant message in a conversation.
+
+ This persists generated documents (e.g., DOCX, XLSX created by skills) to the
+ conversation history so they appear in subsequent GET /conversation/{id} calls.
+
+ Args:
+ conversation_id: Target conversation ID
+ skill_file_uploads: List of upload metadata dicts (e.g., from upload_fileobj)
+ user_id: User ID for ownership validation
+
+ Returns:
+ bool: True if files were saved, False if no assistant message was found
+ """
+ if not skill_file_uploads:
+ return False
+
+ try:
+ message_id = get_latest_assistant_message_id(conversation_id, user_id)
+ if message_id is None:
+ logging.warning(
+ "[skill-file] no assistant message found for conversation=%s, "
+ "cannot persist skill file uploads",
+ conversation_id,
+ )
+ return False
+
+ success = update_message_minio_files(message_id, skill_file_uploads)
+ if success:
+ logging.info(
+ "[skill-file] persisted %d file(s) to message_id=%s conversation=%s",
+ len(skill_file_uploads),
+ message_id,
+ conversation_id,
+ )
+ return success
+ except Exception as exc:
+ logging.exception(
+ "[skill-file] failed to persist skill file uploads for conversation=%s",
+ conversation_id,
+ )
+ return False
diff --git a/backend/services/data_process_service.py b/backend/services/data_process_service.py
index 2b222a584..a7529127c 100644
--- a/backend/services/data_process_service.py
+++ b/backend/services/data_process_service.py
@@ -15,7 +15,7 @@
import redis
import torch
from PIL import Image
-from celery import states, chain
+from celery import states
from transformers import CLIPProcessor, CLIPModel
from nexent.data_process.core import DataProcessCore
@@ -25,7 +25,7 @@
from database.attachment_db import delete_file, file_exists, get_file_size_from_minio, get_file_stream, upload_file
from utils.file_management_utils import convert_office_to_pdf
from data_process.app import app as celery_app
-from data_process.tasks import process, forward
+from data_process.tasks import submit_process_forward_chain
from data_process.utils import get_task_info, get_all_task_ids_from_redis
# Limit concurrent LibreOffice processes to avoid resource exhaustion
@@ -54,7 +54,8 @@ def __init__(self):
self._inspector = None
self._inspector_last_time = 0
- self._inspector_ttl = 60 # Inspector cache time in seconds
+ # 5 minutes - inspector is expensive to create (ping all workers)
+ self._inspector_ttl = 300
self._inspector_lock = None
self._inspector_lock = threading.Lock()
@@ -105,7 +106,7 @@ async def stop(self):
logger.info("Data processing service stopped")
def _get_celery_inspector(self):
- """Get Celery inspector"""
+ """Get Celery inspector (cached for performance)"""
with self._inspector_lock:
now = time.time()
if self._inspector and now - self._inspector_last_time < self._inspector_ttl:
@@ -117,9 +118,9 @@ def _get_celery_inspector(self):
f"Celery broker URL is not configured properly, reconfiguring to {celery_app.conf.broker_url}")
try:
inspector = celery_app.control.inspect()
- inspector.ping()
self._inspector = inspector
self._inspector_last_time = now
+ self._inspector_init_time = now
return inspector
except Exception as e:
self._inspector = None
@@ -142,67 +143,131 @@ async def get_all_tasks(self, filter: bool = True) -> List[Dict[str, Any]]:
all_tasks = []
try:
start_time = time.time()
- logger.debug(
- "Getting inspector to check for active and reserved tasks (concurrent)")
+ inspector_start = time.time()
inspector = self._get_celery_inspector()
- logger.debug(
- f"⏰ Inspector initialization took {time.time() - start_time}s")
+ inspector_duration = time.time() - inspector_start
- # Collect task IDs from different sources
+ # Collect task IDs from different sources and keep runtime metadata
task_ids = set()
+ runtime_task_meta: Dict[str, Dict[str, Any]] = {}
+
+ def _normalize_runtime_meta(task: Dict[str, Any]) -> Dict[str, Any]:
+ task_name_full = task.get('name', '') or ''
+ task_name = task_name_full.split(
+ '.')[-1] if task_name_full else ''
+ kwargs = task.get('kwargs') or {}
+ if isinstance(kwargs, str):
+ try:
+ import json as _json
+ kwargs = _json.loads(kwargs)
+ except Exception:
+ kwargs = {}
+ if not isinstance(kwargs, dict):
+ kwargs = {}
+ return {
+ 'task_name': task_name,
+ 'index_name': kwargs.get('index_name', ''),
+ 'path_or_url': kwargs.get('source', ''),
+ 'original_filename': kwargs.get('original_filename', ''),
+ }
+
+ celery_start = time.time()
+
+ # Use short timeout for inspector since workers can respond in ~0.1s
+ # Default 1s timeout is unnecessary and causes delay
+ short_timeout = 0.2
def get_active():
- return inspector.active()
+ t = time.time()
+ # Create fresh inspector with short timeout for each call
+ short_inspector = celery_app.control.inspect(
+ timeout=short_timeout)
+ result = short_inspector.active()
+ elapsed = time.time() - t
+ logger.info(
+ f"[get_all_tasks] inspector.active() took {elapsed:.3f}s")
+ return result if result else {}
def get_reserved():
- return inspector.reserved()
+ t = time.time()
+ short_inspector = celery_app.control.inspect(
+ timeout=short_timeout)
+ result = short_inspector.reserved()
+ elapsed = time.time() - t
+ logger.info(
+ f"[get_all_tasks] inspector.reserved() took {elapsed:.3f}s")
+ return result if result else {}
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future_active = executor.submit(get_active)
future_reserved = executor.submit(get_reserved)
- active_tasks_dict = future_active.result()
- reserved_tasks_dict = future_reserved.result()
- logger.debug(
- f"⏰ Get active and reserved tasks (concurrent) took {time.time() - start_time}s")
+ active_tasks_dict = future_active.result(
+ timeout=short_timeout + 0.5)
+ reserved_tasks_dict = future_reserved.result(
+ timeout=short_timeout + 0.5)
+ celery_duration = time.time() - celery_start
+ if celery_duration > 0.5:
+ logger.warning(
+ f"[get_all_tasks] Inspector took {celery_duration:.3f}s (expected <0.5s)")
if active_tasks_dict:
for worker, tasks in active_tasks_dict.items():
for task in tasks:
task_id = task.get('id')
if task_id:
task_ids.add(task_id)
+ runtime_task_meta[task_id] = _normalize_runtime_meta(
+ task)
if reserved_tasks_dict:
for worker, tasks in reserved_tasks_dict.items():
for task in tasks:
task_id = task.get('id')
if task_id:
task_ids.add(task_id)
+ # Keep active metadata if already present
+ runtime_task_meta.setdefault(
+ task_id, _normalize_runtime_meta(task))
- # Currently, we don't have scheduled tasks, so skip getting scheduled tasks here
- start_time = time.time()
- logger.debug("Getting task IDs from Redis backend")
- # Also get task IDs from Redis backend (covers completed/failed tasks within expiry)
+ # Get task IDs from Redis backend (covers completed/failed tasks within expiry)
try:
redis_task_ids = get_all_task_ids_from_redis(self.redis_client)
- logger.debug(
- f"⏰ Get Redis task IDs took {time.time() - start_time}s")
for task_id in redis_task_ids:
- # Add to the set, duplicates will be handled
task_ids.add(task_id)
except Exception as redis_error:
logger.warning(
f"Failed to query Redis for stored task IDs: {str(redis_error)}")
- logger.debug(
- f"Total unique task IDs collected (inspector + Redis): {len(task_ids)}")
- tasks = [get_task_info(task_id) for task_id in task_ids]
+
+ task_id_list = list(task_ids)
+ # Batch fetch all task info
+ tasks = [get_task_info(task_id) for task_id in task_id_list]
all_task_infos = await asyncio.gather(*tasks, return_exceptions=True)
- for task_info in all_task_infos:
+ for idx, task_info in enumerate(all_task_infos):
if isinstance(task_info, Exception):
logger.warning(
f"Failed to get status for a task: {task_info}")
continue
+ task_id = task_id_list[idx]
+ runtime_meta = runtime_task_meta.get(task_id, {})
+ # Backfill runtime info for pending/reserved tasks that do not have result metadata yet
+ if runtime_meta:
+ if not task_info.get('task_name') and runtime_meta.get('task_name'):
+ task_info['task_name'] = runtime_meta.get('task_name')
+ if not task_info.get('index_name') and runtime_meta.get('index_name'):
+ task_info['index_name'] = runtime_meta.get(
+ 'index_name')
+ if not task_info.get('path_or_url') and runtime_meta.get('path_or_url'):
+ task_info['path_or_url'] = runtime_meta.get(
+ 'path_or_url')
+ if not task_info.get('original_filename') and runtime_meta.get('original_filename'):
+ task_info['original_filename'] = runtime_meta.get(
+ 'original_filename')
+
if filter and not (task_info.get('index_name') and task_info.get('task_name')):
- continue
+ # Keep user-visible queued tasks even before worker updates task meta.
+ if task_info.get('task_name') not in {'process', 'forward', 'process_and_forward'}:
+ continue
+ if not task_info.get('index_name'):
+ continue
all_tasks.append(task_info)
- logger.debug(f"Retrieved {len(all_tasks)} tasks.")
except Exception as e:
logger.error(f"Error retrieving all tasks: {str(e)}")
all_tasks = []
@@ -255,6 +320,17 @@ async def load_image(self, image_url: str) -> Optional[Image.Image]:
async def _load_image(self, session: aiohttp.ClientSession, path: str) -> Optional[Image.Image]:
"""Internal method to load an image from various sources"""
try:
+ if path.startswith('s3://'):
+ # Fetch from MinIO using s3://bucket/key
+ file_stream = get_file_stream(object_name=path)
+ if file_stream is None:
+ raise FileNotFoundError(
+ f"Unable to fetch file from URL: {path}")
+ file_data = file_stream.read()
+ image_based64_str = base64.b64encode(
+ file_data).decode('utf-8')
+ path = f"data:image/jpeg;base64,{image_based64_str}"
+
# Check if input is base64 encoded
if path.startswith('data:image'):
# Extract the base64 data after the comma
@@ -463,6 +539,8 @@ async def create_batch_tasks_impl(self, authorization: Optional[str], request: B
chunking_strategy = source_config.get('chunking_strategy')
index_name = source_config.get('index_name')
original_filename = source_config.get('original_filename')
+ embedding_model_id = source_config.get('embedding_model_id')
+ tenant_id = source_config.get('tenant_id')
# Validate required fields
if not source:
@@ -474,28 +552,23 @@ async def create_batch_tasks_impl(self, authorization: Optional[str], request: B
f"Missing required field 'index_name' in source config: {source_config}")
continue
- # Create and submit a chain: process -> forward
- task_chain = chain(
- process.s(
- source=source,
- source_type=source_type,
- chunking_strategy=chunking_strategy,
- index_name=index_name,
- original_filename=original_filename
- ).set(queue='process_q'),
- forward.s(
- index_name=index_name,
- source=source,
- source_type=source_type,
- original_filename=original_filename,
- authorization=authorization
- ).set(queue='forward_q')
+ chain_id = submit_process_forward_chain(
+ source=source,
+ source_type=source_type,
+ chunking_strategy=chunking_strategy,
+ index_name=index_name,
+ original_filename=original_filename,
+ authorization=authorization,
+ embedding_model_id=embedding_model_id,
+ tenant_id=tenant_id,
)
+ if not chain_id:
+ logger.error(
+ f"Failed to enqueue process-forward chain for source: {source}")
+ continue
- task_result = task_chain.apply_async()
-
- task_ids.append(task_result.id)
- logger.debug(f"Created task {task_result.id} for source: {source}")
+ task_ids.append(chain_id)
+ logger.debug(f"Created task {chain_id} for source: {source}")
logger.info(
f"Created {len(task_ids)} individual tasks for batch processing")
return task_ids
@@ -527,7 +600,7 @@ async def process_uploaded_text_file(self, file_content: bytes, filename: str, c
f"Processing uploaded file: {filename} using SDK DataProcessCore")
data_processor = DataProcessCore()
- chunks = data_processor.file_process(
+ chunks, _ = data_processor.file_process(
file_data=file_content,
filename=filename,
chunking_strategy=chunking_strategy
@@ -559,7 +632,7 @@ async def process_uploaded_text_file(self, file_content: bytes, filename: str, c
}
async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: str) -> None:
- """Full conversion pipeline: download → convert → upload → validate → cleanup.
+ """Full conversion pipeline: download -> convert -> upload -> validate -> cleanup.
All five steps run inside data-process so that LibreOffice only needs to be
installed in this container.
@@ -576,7 +649,8 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st
# Step 1: Download original Office file from MinIO
original_stream = get_file_stream(object_name)
if original_stream is None:
- raise OfficeConversionException(f"Source file not found in storage: {object_name}")
+ raise OfficeConversionException(
+ f"Source file not found in storage: {object_name}")
original_filename = os.path.basename(object_name)
input_path = os.path.join(temp_dir, original_filename)
@@ -588,10 +662,12 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st
try:
pdf_path = await convert_office_to_pdf(input_path, temp_dir, timeout=30)
except Exception as exc:
- raise OfficeConversionException(f"LibreOffice conversion failed: {exc}") from exc
+ raise OfficeConversionException(
+ f"LibreOffice conversion failed: {exc}") from exc
# Step 3: Upload converted PDF to MinIO
- result = upload_file(file_path=pdf_path, object_name=pdf_object_name)
+ result = upload_file(file_path=pdf_path,
+ object_name=pdf_object_name)
if not result.get('success'):
raise OfficeConversionException(
f"Failed to upload PDF to MinIO: {result.get('error', 'Unknown error')}"
@@ -600,14 +676,16 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st
# Step 4: Validate the uploaded PDF (header check + minimum size)
remote_size = get_file_size_from_minio(pdf_object_name)
if remote_size <= 0:
- raise OfficeConversionException("PDF validation failed: cannot read remote file size")
+ raise OfficeConversionException(
+ "PDF validation failed: cannot read remote file size")
if remote_size < 100:
raise OfficeConversionException(
f"PDF validation failed: file too small ({remote_size} bytes)"
)
remote_stream = get_file_stream(pdf_object_name)
if remote_stream is None:
- raise OfficeConversionException("PDF validation failed: cannot read uploaded file")
+ raise OfficeConversionException(
+ "PDF validation failed: cannot read uploaded file")
try:
header = remote_stream.read(5)
finally:
@@ -616,7 +694,8 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st
except Exception:
pass
if not header.startswith(b'%PDF-'):
- raise OfficeConversionException("PDF validation failed: invalid PDF header")
+ raise OfficeConversionException(
+ "PDF validation failed: invalid PDF header")
except OfficeConversionException:
# Clean up any partially-uploaded remote PDF so a future retry starts clean
@@ -624,14 +703,16 @@ async def convert_office_to_pdf_impl(self, object_name: str, pdf_object_name: st
delete_file(pdf_object_name)
raise
except Exception as exc:
- raise OfficeConversionException(f"Unexpected error during conversion: {exc}") from exc
+ raise OfficeConversionException(
+ f"Unexpected error during conversion: {exc}") from exc
finally:
# Step 5: Clean up local temporary directory
if temp_dir and os.path.exists(temp_dir):
try:
shutil.rmtree(temp_dir)
except Exception as cleanup_err:
- logger.warning(f"Failed to cleanup temp dir '{temp_dir}': {cleanup_err}")
+ logger.warning(
+ f"Failed to cleanup temp dir '{temp_dir}': {cleanup_err}")
def convert_celery_states_to_custom(self, process_celery_state: Optional[str], forward_celery_state: Optional[str]) -> str:
"""Map Celery task states to a custom frontend state string.
diff --git a/backend/services/datamate_service.py b/backend/services/datamate_service.py
index 776e0eb1d..41858440b 100644
--- a/backend/services/datamate_service.py
+++ b/backend/services/datamate_service.py
@@ -51,7 +51,7 @@ async def _create_datamate_knowledge_records(knowledge_base_ids: List[str],
"tenant_id": tenant_id,
"user_id": user_id,
# Use datamate as embedding model name
- "embedding_model_name": embedding_model_names[i]
+ "embedding_model_name": embedding_model_names[i],
}
# Run synchronous database operation in executor to avoid blocking
diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py
index d73c91c72..585669c0c 100644
--- a/backend/services/file_management_service.py
+++ b/backend/services/file_management_service.py
@@ -4,12 +4,14 @@
import os
from io import BytesIO
from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
import httpx
from fastapi import UploadFile
from consts.const import (
+ ASSET_OWNER_ATTACHMENTS_PREFIX,
+ ASSET_OWNER_TENANT_ID,
DATA_PROCESS_SERVICE,
FILE_PREVIEW_SIZE_LIMIT,
MAX_CONCURRENT_UPLOADS,
@@ -36,6 +38,7 @@
from utils.file_management_utils import save_upload_file
from nexent import MessageObserver
+from nexent.multi_modal.utils import parse_s3_url
from nexent.core.models import OpenAILongContextModel
# Create upload directory
@@ -49,8 +52,220 @@
logger = logging.getLogger("file_management_service")
+ALLOWED_SKILL_UPLOAD_ROOT = Path("/mnt/nexent").resolve()
-async def upload_files_impl(destination: str, file: List[UploadFile], folder: str = None, index_name: Optional[str] = None) -> tuple:
+
+def is_allowed_skill_upload_path(file_path: str) -> bool:
+ """Return True when a local file path is under the allowed skill upload root."""
+ if not file_path:
+ return False
+
+ try:
+ candidate_path = Path(file_path).resolve()
+ except Exception:
+ return False
+
+ try:
+ candidate_path.relative_to(ALLOWED_SKILL_UPLOAD_ROOT)
+ return True
+ except ValueError:
+ return False
+
+
+
+
+def resolve_minio_upload_folder(
+ folder: Optional[str],
+ user_id: Optional[str] = None,
+ uploader_tenant_id: Optional[str] = None,
+) -> str:
+ """Map caller context to the MinIO object prefix used for uploads.
+
+ Resolution order (first match wins):
+ 1. Asset-owner tenant → ``attachments/asset_owner/{user_id}``
+ 2. ``folder == "knowledge_base"`` → shared ``knowledge_base`` prefix
+ 3. Otherwise → per-user ``attachments/{user_id}`` when ``user_id`` is set
+ 4. Legacy fallback → ``folder`` if provided, else ``attachments``
+
+ Access control for reads is enforced separately; this function only
+ chooses the storage prefix.
+
+ Args:
+ folder: Requested folder hint (e.g. ``"knowledge_base"`` or a legacy path).
+ user_id: Uploader user ID; required for user-scoped attachment paths.
+ uploader_tenant_id: Uploader tenant ID; asset-owner tenants use a dedicated prefix.
+
+ Returns:
+ Resolved MinIO folder prefix (no leading or trailing slash).
+ """
+ if uploader_tenant_id == ASSET_OWNER_TENANT_ID:
+ return f"{ASSET_OWNER_ATTACHMENTS_PREFIX}/{user_id}"
+
+ if folder == "knowledge_base":
+ return "knowledge_base"
+
+ if folder == "skill-files":
+ if user_id:
+ return f"skill-files/{user_id}"
+ return "skill-files"
+
+ if user_id:
+ return f"attachments/{user_id}"
+
+ return folder or "attachments"
+
+
+def check_file_access(
+ object_name: str,
+ user_id: Optional[str],
+ caller_tenant_id: Optional[str] = None,
+) -> bool:
+ """
+ Check if user has permission to access the file.
+
+ Access rules:
+ - knowledge_base/*: All authenticated users can access
+ - attachments/{user_id}/*: Only the owner (user_id) can access
+ - images_in_attachments/*: All authenticated users can access
+
+ Args:
+ object_name: File object name in storage
+ user_id: Current user ID
+
+ Returns:
+ True if access is allowed, False otherwise
+ """
+ if not user_id:
+ return False
+
+ if object_name.startswith(ASSET_OWNER_ATTACHMENTS_PREFIX):
+ return caller_tenant_id == ASSET_OWNER_TENANT_ID
+
+ if object_name.startswith("knowledge_base/"):
+ # Knowledge base files: all authenticated users can access
+ return True
+
+ if object_name.startswith("images_in_attachments/"):
+ # Extracted image files used by knowledge-base image chunks.
+ # Keep them readable for authenticated users to avoid broken image citations.
+ return True
+
+ if object_name.startswith("skill-files/"):
+ # Generated documents are private to the uploader and must stay user-scoped.
+ return object_name.startswith(f"skill-files/{user_id}/")
+
+ # Check if file is in user's attachments folder
+ # Pattern: attachments/{user_id}/*
+ if object_name.startswith(f"attachments/{user_id}/"):
+ return True
+
+ # For backward compatibility, allow access to files in root attachments folder
+ # Pattern: attachments/{filename} (no user_id subfolder)
+ if object_name.startswith("attachments/") and "/" not in object_name.replace("attachments/", "", 1):
+ # Old format: attachments/filename (no subdirectory)
+ # Allow access for backward compatibility
+ return True
+
+ return False
+
+
+def check_file_access_batch(
+ object_names: List[str],
+ user_id: Optional[str],
+ caller_tenant_id: Optional[str] = None,
+) -> Dict[str, bool]:
+ """
+ Batch check file access permissions.
+
+ Args:
+ object_names: List of file object names
+ user_id: Current user ID
+ caller_tenant_id: Caller's tenant ID for ASSET_OWNER path checks
+
+ Returns:
+ Dict mapping object_name to access permission (True/False)
+ """
+ return {
+ obj_name: check_file_access(obj_name, user_id, caller_tenant_id)
+ for obj_name in object_names
+ }
+
+
+def validate_s3_url_access(
+ object_name: str,
+ user_id: Optional[str],
+ caller_tenant_id: Optional[str] = None,
+) -> None:
+ """
+ Validate if user has permission to access the S3 URL.
+
+ Args:
+ object_name: File object name in storage (extracted from S3 URL)
+ user_id: Current user ID
+
+ Raises:
+ PermissionError: If user doesn't have permission to access the file
+ """
+ if not user_id:
+ raise PermissionError("User authentication required to access files")
+
+ if not check_file_access(object_name, user_id, caller_tenant_id):
+ logger.warning(
+ f"[validate_s3_url_access] Access denied: object_name={object_name}, user_id={user_id}")
+ raise PermissionError(
+ f"Access denied: You don't have permission to access this file ({object_name})")
+
+
+def validate_urls_access(
+ urls: List[str],
+ user_id: Optional[str],
+ caller_tenant_id: Optional[str] = None,
+) -> None:
+ """
+ Validate if user has permission to access the given URLs.
+
+ Supports S3 URLs (s3://bucket/key or /bucket/key format).
+
+ Args:
+ urls: List of URLs to validate (S3, HTTP, or HTTPS)
+ user_id: Current user ID
+
+ Raises:
+ PermissionError: If user doesn't have permission to access any of the files
+ """
+ if not urls:
+ return
+
+ for url in urls:
+ if not url:
+ continue
+
+ # Only validate S3 URLs (MinIO storage)
+ # HTTP/HTTPS URLs are external resources and are not subject to MinIO access control
+ if url.startswith("s3://"):
+ try:
+ _, object_name = parse_s3_url(url)
+ validate_s3_url_access(object_name, user_id, caller_tenant_id)
+ except ValueError as e:
+ logger.warning(
+ f"[validate_urls_access] Failed to parse S3 URL: {url}, error: {e}")
+ raise PermissionError(f"Invalid S3 URL format: {url}")
+ elif url.startswith("/") and not url.startswith("//"):
+ # Handle /bucket/key format (absolute path style)
+ parts = url.strip("/").split("/", 1)
+ if len(parts) == 2:
+ bucket, object_name = parts
+ validate_s3_url_access(object_name, user_id, caller_tenant_id)
+
+
+async def upload_files_impl(
+ destination: str,
+ file: List[UploadFile],
+ folder: str = None,
+ index_name: Optional[str] = None,
+ user_id: Optional[str] = None,
+ uploader_tenant_id: Optional[str] = None,
+) -> tuple:
"""
Upload files to local storage or MinIO based on destination.
@@ -58,6 +273,9 @@ async def upload_files_impl(destination: str, file: List[UploadFile], folder: st
destination: "local" or "minio"
file: List of UploadFile objects
folder: Folder name for MinIO uploads
+ index_name: Knowledge base index for conflict resolution
+ user_id: User ID for attachment path isolation
+ uploader_tenant_id: Uploader tenant ID (ASSET_OWNER uses dedicated prefix)
Returns:
tuple: (errors, uploaded_file_paths, uploaded_filenames)
@@ -84,7 +302,9 @@ async def upload_files_impl(destination: str, file: List[UploadFile], folder: st
errors.append(f"Failed to save file: {f.filename}")
elif destination == "minio":
- minio_results = await upload_to_minio(files=file, folder=folder)
+ actual_folder = resolve_minio_upload_folder(
+ folder, user_id, uploader_tenant_id)
+ minio_results = await upload_to_minio(files=file, folder=actual_folder)
for result in minio_results:
if result.get("success"):
uploaded_filenames.append(result.get("file_name"))
@@ -137,8 +357,26 @@ def make_unique_names(original_names: List[str], taken_lower: set) -> List[str]:
return errors, uploaded_file_paths, uploaded_filenames
-async def upload_to_minio(files: List[UploadFile], folder: str) -> List[dict]:
- """Helper function to upload files to MinIO and return results."""
+async def upload_to_minio(
+ files: List[UploadFile],
+ folder: str,
+ user_id: Optional[str] = None,
+ uploader_tenant_id: Optional[str] = None,
+) -> List[dict]:
+ """
+ Helper function to upload files to MinIO and return results.
+
+ Args:
+ files: List of files to upload
+ folder: Storage folder path or resolved MinIO prefix
+ user_id: User ID for attachment path isolation when folder is generic
+ uploader_tenant_id: Uploader tenant ID for ASSET_OWNER attachment prefix
+
+ Returns:
+ List of upload results
+ """
+ actual_folder = resolve_minio_upload_folder(
+ folder, user_id, uploader_tenant_id)
results = []
for f in files:
try:
@@ -148,13 +386,20 @@ async def upload_to_minio(files: List[UploadFile], folder: str) -> List[dict]:
# Convert file content to BytesIO object
file_obj = BytesIO(file_content)
+ # Store original filename before upload
+ original_filename = f.filename or ""
+
# Upload file
result = upload_fileobj(
file_obj=file_obj,
- file_name=f.filename or "",
- prefix=folder
+ file_name=original_filename,
+ prefix=actual_folder,
+ file_size=len(file_content)
)
+ # Preserve original filename in result (upload_fileobj uses it for object name generation)
+ result["original_file_name"] = original_filename
+
# Reset file pointer for potential re-reading
await f.seek(0)
results.append(result)
@@ -166,6 +411,7 @@ async def upload_to_minio(files: List[UploadFile], folder: str) -> List[dict]:
results.append({
"success": False,
"file_name": f.filename,
+ "original_file_name": f.filename,
"error": "An error occurred while processing the file."
})
return results
@@ -206,6 +452,8 @@ def get_llm_model(tenant_id: str):
# Get the tenant config
main_model_config = tenant_config_manager.get_model_config(
key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
+ timeout_seconds = main_model_config.get(
+ "timeout_seconds") if main_model_config else None
long_text_to_text_model = OpenAILongContextModel(
observer=MessageObserver(),
model_id=get_model_name_from_config(main_model_config),
@@ -213,6 +461,7 @@ def get_llm_model(tenant_id: str):
api_key=main_model_config.get("api_key"),
max_context_tokens=main_model_config.get("max_tokens"),
ssl_verify=main_model_config.get("ssl_verify", True),
+ timeout_seconds=timeout_seconds,
)
return long_text_to_text_model
@@ -244,7 +493,8 @@ async def resolve_preview_file(object_name: str) -> Tuple[str, str, int]:
# Office documents - convert to PDF with caching
elif content_type in OFFICE_MIME_TYPES:
- name_without_ext = object_name.rsplit('.', 1)[0] if '.' in object_name else object_name
+ name_without_ext = object_name.rsplit(
+ '.', 1)[0] if '.' in object_name else object_name
hash_suffix = hashlib.md5(object_name.encode()).hexdigest()[:8]
pdf_object_name = f"preview/converted/{name_without_ext}_{hash_suffix}.pdf"
temp_pdf_object_name = f"preview/converting/{name_without_ext}_{hash_suffix}.pdf.tmp"
@@ -258,7 +508,8 @@ async def resolve_preview_file(object_name: str) -> Tuple[str, str, int]:
# Unsupported file type
else:
- raise UnsupportedFileTypeException(f"Unsupported file type for preview: {content_type}")
+ raise UnsupportedFileTypeException(
+ f"Unsupported file type for preview: {content_type}")
def get_preview_stream(actual_object_name: str, start: Optional[int] = None, end: Optional[int] = None):
@@ -282,7 +533,8 @@ def get_preview_stream(actual_object_name: str, start: Optional[int] = None, end
stream = get_file_range(actual_object_name, start, end)
if stream is None:
- raise NotFoundException("File not found or failed to read from storage")
+ raise NotFoundException(
+ "File not found or failed to read from storage")
return stream
@@ -296,7 +548,8 @@ def _is_pdf_cache_valid(pdf_object_name: str) -> bool:
# Verify the cached file is readable by fetching a small range
stream = get_file_range(pdf_object_name, 0, 0)
if stream is None:
- logger.warning(f"Corrupted cache detected (cannot read), deleting: {pdf_object_name}")
+ logger.warning(
+ f"Corrupted cache detected (cannot read), deleting: {pdf_object_name}")
delete_file(pdf_object_name)
return False
@@ -305,7 +558,8 @@ def _is_pdf_cache_valid(pdf_object_name: str) -> bool:
try:
close_fn()
except Exception as e:
- logger.warning(f"Failed to close cache probe stream for {pdf_object_name}: {str(e)}")
+ logger.warning(
+ f"Failed to close cache probe stream for {pdf_object_name}: {str(e)}")
return True
@@ -358,7 +612,8 @@ async def _convert_office_to_cached_pdf(
)
# Atomic move from temp to final location, then clean up temp
- copy_result = copy_file(source_object=temp_pdf_object_name, dest_object=pdf_object_name)
+ copy_result = copy_file(
+ source_object=temp_pdf_object_name, dest_object=pdf_object_name)
if not copy_result.get('success'):
logger.error(
"Failed to finalize converted PDF cache: object=%s, temp=%s, dest=%s, error=%s",
@@ -367,7 +622,8 @@ async def _convert_office_to_cached_pdf(
pdf_object_name,
copy_result.get('error', 'Unknown error'),
)
- raise RuntimeError("Failed to finalize converted PDF cache")
+ raise RuntimeError(
+ "Failed to finalize converted PDF cache")
delete_file(temp_pdf_object_name)
except Exception as e:
@@ -376,7 +632,8 @@ async def _convert_office_to_cached_pdf(
logger.error(f"Office conversion failed: {str(e)}")
if isinstance(e, OfficeConversionException):
raise
- raise OfficeConversionException("Office file conversion failed") from e
+ raise OfficeConversionException(
+ "Office file conversion failed") from e
finally:
# Clean up the file lock (prevents memory leak for many unique files)
async with _conversion_locks_guard:
diff --git a/backend/services/haotian_service.py b/backend/services/haotian_service.py
new file mode 100644
index 000000000..4d86823b5
--- /dev/null
+++ b/backend/services/haotian_service.py
@@ -0,0 +1,114 @@
+"""
+Haotian Service Layer
+
+Implements proxy fetching and normalization for Haotian external knowledge base APIs.
+"""
+
+import logging
+from typing import Any, Dict, List, Tuple
+
+import httpx
+
+logger = logging.getLogger("haotian_service")
+
+_DEFAULT_KNOWLEDGE_BASE_ID = "a8d68fbf-bd6e-5461-a9d1-cf1bb3522e38"
+
+
+def _normalize_list_payload(raw: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Normalize Haotian list payload to:
+ {
+ "knowledge_sets": [
+ {
+ "name": str,
+ "knowledge_bases": [{"dify_dataset_id": str, "name": str}]
+ }
+ ]
+ }
+
+ When dify_dataset_id is "null", it is replaced with the default ID.
+ """
+ knowledge_sets = raw.get("knowledge_sets", [])
+ if not isinstance(knowledge_sets, list):
+ knowledge_sets = []
+
+ normalized_sets: List[Dict[str, Any]] = []
+ for ks in knowledge_sets:
+ if not isinstance(ks, dict):
+ continue
+ set_name = str(ks.get("name", "") or "").strip()
+ if not set_name:
+ continue
+
+ bases = ks.get("knowledge_bases", [])
+ if not isinstance(bases, list):
+ bases = []
+
+ normalized_bases: List[Dict[str, Any]] = []
+ for kb in bases:
+ if not isinstance(kb, dict):
+ continue
+ dataset_id = str(kb.get("dify_dataset_id", "") or "").strip()
+ kb_name = str(kb.get("name", "") or "").strip()
+ if not kb_name:
+ continue
+ if dataset_id == "null" or not dataset_id:
+ dataset_id = _DEFAULT_KNOWLEDGE_BASE_ID
+ normalized_bases.append(
+ {"dify_dataset_id": dataset_id, "name": kb_name}
+ )
+
+ if normalized_bases:
+ normalized_sets.append(
+ {"name": set_name, "knowledge_bases": normalized_bases}
+ )
+
+ return {"knowledge_sets": normalized_sets}
+
+
+async def fetch_haotian_knowledge_sets_impl(
+ list_url: str,
+ external_authorization: str,
+ timeout_s: float = 20.0,
+) -> Dict[str, Any]:
+ """
+ Fetch knowledge sets from the external Haotian list API.
+ """
+ if not list_url or not isinstance(list_url, str):
+ raise ValueError("list_url is required and must be a non-empty string")
+ if not external_authorization or not isinstance(external_authorization, str):
+ raise ValueError(
+ "authorization is required and must be a non-empty string"
+ )
+
+ headers = {"Authorization": external_authorization}
+ async with httpx.AsyncClient(timeout=timeout_s, follow_redirects=True, trust_env=False) as client:
+ resp = await client.get(list_url, headers=headers)
+ if resp.status_code >= 400:
+ raise RuntimeError(
+ f"Haotian list API HTTP error: {resp.status_code}"
+ )
+ data = resp.json()
+ if not isinstance(data, dict):
+ raise RuntimeError("Haotian list API returned non-object JSON")
+ return _normalize_list_payload(data)
+
+
+async def test_haotian_connection_impl(
+ list_url: str,
+ external_authorization: str,
+ timeout_s: float = 10.0,
+) -> Tuple[bool, str]:
+ """
+ Test Haotian connection by calling list_url once.
+ """
+ try:
+ await fetch_haotian_knowledge_sets_impl(
+ list_url=list_url,
+ external_authorization=external_authorization,
+ timeout_s=timeout_s,
+ )
+ return (True, "")
+ except Exception as e:
+ return (False, str(e))
+
diff --git a/backend/services/image_service.py b/backend/services/image_service.py
index 8decbd541..fdef3b081 100644
--- a/backend/services/image_service.py
+++ b/backend/services/image_service.py
@@ -1,5 +1,9 @@
+import base64
+import ipaddress
import logging
+import socket
from http import HTTPStatus
+from urllib.parse import urlparse, urlunparse
import aiohttp
@@ -13,7 +17,119 @@
logger = logging.getLogger("image_service")
+def _validate_loopback_url(decoded_url: str) -> str | None:
+ """Validate that ``decoded_url`` is a genuine loopback URL and return a
+ rewritten URL whose host is a literal IPv4 loopback address, or ``None``
+ when the input is not safe to fetch directly.
+
+ This is a defense-in-depth check for the fast-path that bypasses the
+ data-processing service. The fast-path is only intended for loopback
+ images (e.g. served by an in-process component), so we must verify:
+
+ * The scheme is ``http`` or ``https``.
+ * The hostname resolves to one or more IPv4 addresses, and **every**
+ resolved address falls inside the standard IPv4 loopback range
+ ``127.0.0.0/8``. Mixed results are rejected to prevent an attacker
+ from racing DNS to a private address.
+ * The URL is rewritten so the host portion is a literal loopback IP.
+ This both (a) removes the user-controlled hostname from the final
+ request URL that ``aiohttp`` issues, and (b) blocks DNS rebinding
+ attacks where the hostname is re-resolved to a private address
+ between validation and the actual ``GET``.
+ """
+ try:
+ parsed = urlparse(decoded_url)
+ except Exception:
+ return None
+
+ if parsed.scheme not in {"http", "https"}:
+ return None
+
+ hostname = parsed.hostname
+ if not hostname:
+ return None
+
+ try:
+ resolved_infos = socket.getaddrinfo(hostname, None)
+ except socket.gaierror:
+ return None
+
+ if not resolved_infos:
+ return None
+
+ safe_addresses: list[str] = []
+ for info in resolved_infos:
+ sockaddr = info[4]
+ candidate = sockaddr[0]
+ try:
+ ip = ipaddress.ip_address(candidate)
+ except ValueError:
+ return None
+ if ip.version != 4 or not ip.is_loopback:
+ return None
+ safe_addresses.append(candidate)
+
+ # Prefer the literal 127.0.0.1 to keep the rewritten URL stable when
+ # the hostname resolves to multiple loopback aliases.
+ chosen_ip = (
+ "127.0.0.1" if "127.0.0.1" in safe_addresses else safe_addresses[0]
+ )
+
+ port = parsed.port
+ netloc = f"{chosen_ip}:{port}" if port is not None else chosen_ip
+
+ return urlunparse(
+ (
+ parsed.scheme,
+ netloc,
+ parsed.path,
+ parsed.params,
+ parsed.query,
+ parsed.fragment,
+ )
+ )
+
+
+async def _fetch_image_directly(safe_url: str):
+ """Fetch an image from a previously validated loopback URL.
+
+ ``safe_url`` MUST be the output of :func:`_validate_loopback_url` so that
+ it contains a literal loopback IPv4 address and is no longer
+ user-controlled. Redirects are disabled and ``trust_env`` is off to
+ ensure the request never leaks to a private/external host through
+ proxy variables or HTTP 30x responses.
+ """
+ timeout = aiohttp.ClientTimeout(total=10)
+ async with aiohttp.ClientSession(
+ timeout=timeout, trust_env=False
+ ) as session:
+ async with session.get(safe_url, allow_redirects=False) as response:
+ if response.status != HTTPStatus.OK:
+ error_text = await response.text()
+ logger.error(
+ "Failed to fetch loopback image directly: %s", error_text
+ )
+ return {"success": False, "error": "Failed to fetch image"}
+
+ content = await response.read()
+ content_type = response.headers.get("Content-Type", "image/jpeg")
+ return {
+ "success": True,
+ "base64": base64.b64encode(content).decode("utf-8"),
+ "content_type": content_type,
+ }
+
+
async def proxy_image_impl(decoded_url: str):
+ # Fast path: only for loopback URLs, fetch directly. This avoids an
+ # extra hop through the data-processing service for local images. For
+ # any other URL (including all external/knowledge-base images such as
+ # AIDP), fall back to the data-processing service proxy, which is the
+ # existing safe path that CodeQL does not flag.
+ safe_url = _validate_loopback_url(decoded_url)
+ if safe_url is not None:
+ return await _fetch_image_directly(safe_url)
+
# Create session to call the data processing service
async with aiohttp.ClientSession() as session:
# Call the data processing service to load the image
@@ -31,7 +147,11 @@ async def proxy_image_impl(decoded_url: str):
def get_vlm_model(tenant_id: str):
- # Get the tenant config
+ """Return the configured image understanding model for AnalyzeImageTool.
+
+ The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
+ for compatibility, but it is the user-facing image understanding configuration.
+ """
vlm_model_config = tenant_config_manager.get_model_config(
key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
if not vlm_model_config:
@@ -48,3 +168,27 @@ def get_vlm_model(tenant_id: str):
max_tokens=512,
ssl_verify=vlm_model_config.get("ssl_verify", True),
)
+
+
+def get_image_understanding_model(tenant_id: str):
+ return get_vlm_model(tenant_id=tenant_id)
+
+
+def get_video_understanding_model(tenant_id: str):
+ """Return the configured video understanding model for multimodal tools."""
+ vlm_model_config = tenant_config_manager.get_model_config(
+ key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id)
+ if not vlm_model_config:
+ return None
+ return OpenAIVLModel(
+ observer=MessageObserver(),
+ model_id=get_model_name_from_config(
+ vlm_model_config) if vlm_model_config else "",
+ api_base=vlm_model_config.get("base_url", ""),
+ api_key=vlm_model_config.get("api_key", ""),
+ temperature=0.7,
+ top_p=0.7,
+ frequency_penalty=0.5,
+ max_tokens=512,
+ ssl_verify=vlm_model_config.get("ssl_verify", True),
+ )
diff --git a/backend/services/invitation_service.py b/backend/services/invitation_service.py
index 58a45d369..4011c67cc 100644
--- a/backend/services/invitation_service.py
+++ b/backend/services/invitation_service.py
@@ -19,8 +19,15 @@
)
from database.user_tenant_db import get_user_tenant_by_user_id
from database.group_db import query_group_ids_by_user
+from database.role_permission_db import check_role_permission
+from consts.const import (
+ ASSET_OWNER_TENANT_ID,
+ ASSET_OWNER_INVITE_CODE_TYPE,
+ ENABLE_ASSET_OWNER_ROLE,
+)
from consts.exceptions import NotFoundException, UnauthorizedError, DuplicateError
from services.group_service import get_tenant_default_group_id
+from services.asset_owner_visibility import require_asset_owner_enabled
from utils.str_utils import convert_string_to_list
logger = logging.getLogger(__name__)
@@ -41,7 +48,7 @@ def create_invitation_code(
Args:
tenant_id (str): Tenant ID
- code_type (str): Invitation code type (ADMIN_INVITE, DEV_INVITE, USER_INVITE)
+ code_type (str): Invitation code type (ADMIN_INVITE, DEV_INVITE, USER_INVITE, ASSET_OWNER_INVITE)
invitation_code (Optional[str]): Invitation code, auto-generated if None
group_ids (Optional[List[int]]): Associated group IDs
capacity (int): Invitation code capacity
@@ -58,9 +65,21 @@ def create_invitation_code(
ValueError: When code_type is invalid
"""
# Validate code_type
- valid_code_types = ["ADMIN_INVITE", "DEV_INVITE", "USER_INVITE"]
+ valid_code_types = [
+ "ADMIN_INVITE",
+ "DEV_INVITE",
+ "USER_INVITE",
+ ASSET_OWNER_INVITE_CODE_TYPE,
+ ]
+ if ENABLE_ASSET_OWNER_ROLE:
+ valid_code_types.append(ASSET_OWNER_INVITE_CODE_TYPE)
if code_type not in valid_code_types:
- raise ValueError(f"Invalid code_type: {code_type}. Must be one of {valid_code_types}")
+ raise ValueError(
+ f"Invalid code_type: {code_type}. Must be one of {valid_code_types}")
+
+ if code_type == ASSET_OWNER_INVITE_CODE_TYPE and not ENABLE_ASSET_OWNER_ROLE:
+ raise UnauthorizedError(
+ "ASSET_OWNER feature is not enabled")
# Get user information
user_info = get_user_tenant_by_user_id(user_id)
@@ -70,10 +89,16 @@ def create_invitation_code(
user_role = user_info.get("user_role", "USER")
# Check permission based on code_type
- if code_type == "ADMIN_INVITE" and user_role not in ["SU"]:
- raise UnauthorizedError(f"User role {user_role} not authorized to create ADMIN_INVITE codes")
+ if code_type in ["ADMIN_INVITE", ASSET_OWNER_INVITE_CODE_TYPE] and user_role not in ["SU"]:
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to create ADMIN_INVITE codes")
elif code_type in ["DEV_INVITE", "USER_INVITE"] and user_role not in ["SU", "ADMIN"]:
- raise UnauthorizedError(f"User role {user_role} not authorized to create {code_type} codes")
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to create {code_type} codes")
+
+ if code_type == ASSET_OWNER_INVITE_CODE_TYPE:
+ tenant_id = ASSET_OWNER_TENANT_ID
+ group_ids = []
# Set default group_ids based on code_type if not provided
if group_ids is None:
@@ -95,7 +120,8 @@ def create_invitation_code(
# Check if invitation code already exists
if query_invitation_by_code(invitation_code):
- raise DuplicateError(f"Invitation code '{invitation_code}' already exists")
+ raise DuplicateError(
+ f"Invitation code '{invitation_code}' already exists")
# Create invitation (status will be set automatically)
invitation_id = add_invitation(
@@ -112,11 +138,13 @@ def create_invitation_code(
# Automatically update status based on expiry date and capacity
update_invitation_code_status(invitation_id)
- logger.info(f"Created invitation code {invitation_code} (type: {code_type}) for tenant {tenant_id} by user {user_id}")
+ logger.info(
+ f"Created invitation code {invitation_code} (type: {code_type}) for tenant {tenant_id} by user {user_id}")
# Get the final invitation info with correct status
invitation_info = query_invitation_by_id(invitation_id)
- normalized_info = _normalize_invitation_data(invitation_info) if invitation_info else None
+ normalized_info = _normalize_invitation_data(
+ invitation_info) if invitation_info else None
return {
"invitation_id": invitation_id,
@@ -154,8 +182,18 @@ def update_invitation_code(
raise UnauthorizedError(f"User {user_id} not found")
user_role = user_info.get("user_role", "USER")
- if user_role not in ["SU", "ADMIN"]:
- raise UnauthorizedError(f"User role {user_role} not authorized to update invitation codes")
+
+ invitation_info = query_invitation_by_id(invitation_id)
+ if not invitation_info:
+ raise NotFoundException(f"Invitation {invitation_id} not found")
+
+ code_type = invitation_info.get("code_type")
+ if code_type == ASSET_OWNER_INVITE_CODE_TYPE and user_role not in ["SU"]:
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to update invitation codes")
+ elif user_role not in ["SU", "ADMIN"]:
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to update invitation codes")
# Update invitation code
success = modify_invitation(
@@ -165,7 +203,8 @@ def update_invitation_code(
)
if success:
- logger.info(f"Updated invitation code {invitation_id} by user {user_id}")
+ logger.info(
+ f"Updated invitation code {invitation_id} by user {user_id}")
# Automatically update status after successful update
update_invitation_code_status(invitation_id)
@@ -193,15 +232,19 @@ def delete_invitation_code(invitation_id: int, user_id: str) -> bool:
raise UnauthorizedError(f"User {user_id} not found")
user_role = user_info.get("user_role", "USER")
- if user_role not in ["SU", "ADMIN"]:
- raise UnauthorizedError(
- f"User role {user_role} not authorized to delete invitation codes")
- # Check if invitation exists
invitation_info = query_invitation_by_id(invitation_id)
if not invitation_info:
raise NotFoundException(f"Invitation {invitation_id} not found")
+ code_type = invitation_info.get("code_type")
+ if code_type == ASSET_OWNER_INVITE_CODE_TYPE and user_role not in ["SU"]:
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to delete invitation codes")
+ elif user_role not in ["SU", "ADMIN"]:
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to delete invitation codes")
+
# Delete invitation code
success = remove_invitation(
invitation_id=invitation_id, updated_by=user_id)
@@ -306,7 +349,8 @@ def _calculate_current_status(invitation_data: Dict[str, Any]) -> Dict[str, Any]
if current_time.date() > expiry_datetime.date():
new_status = "EXPIRE"
except (ValueError, AttributeError, TypeError):
- logger.warning(f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}")
+ logger.warning(
+ f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}")
# Check capacity
if usage_count >= capacity:
@@ -346,7 +390,7 @@ def use_invitation_code(
) -> Dict[str, Any]:
"""
Use an invitation code by creating a usage record.
-
+
Args:
invitation_code (str): Invitation code to use
user_id (str): User ID using the code
@@ -359,7 +403,8 @@ def use_invitation_code(
"""
# Check if invitation is available
if not check_invitation_available(invitation_code):
- raise NotFoundException(f"Invitation code {invitation_code} is not available")
+ raise NotFoundException(
+ f"Invitation code {invitation_code} is not available")
# Get invitation code details
invitation_info = query_invitation_by_code(invitation_code)
@@ -426,7 +471,8 @@ def update_invitation_code_status(invitation_id: int) -> bool:
if current_time.date() > expiry_datetime.date():
new_status = "EXPIRE"
except (ValueError, AttributeError, TypeError):
- logger.warning(f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}")
+ logger.warning(
+ f"Invalid expiry_date format for invitation {invitation_id}: {expiry_date}")
# Check capacity if not expired
if new_status == "IN_USE" and usage_count >= capacity:
@@ -439,7 +485,8 @@ def update_invitation_code_status(invitation_id: int) -> bool:
updates={"status": new_status},
updated_by="system"
)
- logger.info(f"Updated invitation code {invitation_id} status to {new_status}")
+ logger.info(
+ f"Updated invitation code {invitation_id} status to {new_status}")
return True
return False
@@ -468,7 +515,8 @@ def _generate_unique_invitation_code(length: int = 6) -> str:
attempts += 1
- raise RuntimeError(f"Failed to generate unique invitation code after {max_attempts} attempts")
+ raise RuntimeError(
+ f"Failed to generate unique invitation code after {max_attempts} attempts")
def get_invitations_list(
@@ -506,9 +554,13 @@ def get_invitations_list(
# Permission logic:
# - If tenant_id is provided: ADMIN or SU can view that tenant's invitations
# - If tenant_id is not provided: Only SU can view all invitations
- if tenant_id:
- # If tenant_id is specified, user must be ADMIN/SU
- if user_role not in ["SU", "ADMIN"]:
+ if tenant_id is not None:
+ # ASSET_OWNER_TENANT_ID virtual tenant_id is used for asset-owner invites (SU only)
+ if tenant_id == ASSET_OWNER_TENANT_ID:
+ if user_role not in ["SU"]:
+ raise UnauthorizedError(
+ f"User role {user_role} not authorized to view asset owner invitations")
+ elif user_role not in ["SU", "ADMIN"]:
raise UnauthorizedError(
f"User role {user_role} not authorized to view invitation lists")
else:
@@ -531,6 +583,7 @@ def get_invitations_list(
# Normalize each invitation item in the list
if result and "items" in result:
- result["items"] = [_normalize_invitation_data(item) for item in result["items"]]
+ result["items"] = [_normalize_invitation_data(
+ item) for item in result["items"]]
return result
diff --git a/backend/services/mcp_management_service.py b/backend/services/mcp_management_service.py
new file mode 100644
index 000000000..a62de250a
--- /dev/null
+++ b/backend/services/mcp_management_service.py
@@ -0,0 +1,334 @@
+import logging
+from datetime import datetime
+from typing import Any, Dict, List
+from urllib.parse import urlencode
+
+import aiohttp
+
+from consts.exceptions import (
+ MCPConnectionError,
+ McpNotFoundError,
+ McpValidationError,
+)
+from database.community_mcp_db import (
+ create_mcp_community_record,
+ delete_mcp_community_record_by_id,
+ get_mcp_community_record_by_id_and_tenant,
+ get_mcp_community_records,
+ get_mcp_community_tag_stats,
+ list_mcp_community_records_by_tenant,
+ update_mcp_community_record_by_id,
+)
+from database.remote_mcp_db import get_mcp_record_by_id_and_tenant
+
+logger = logging.getLogger("mcp_management_service")
+
+MCP_REGISTRY_BASE_URL = "https://registry.modelcontextprotocol.io/v0.1/servers"
+
+
+# ---------------------------------------------------------------------------
+# Community MCP Service Functions
+# ---------------------------------------------------------------------------
+
+async def list_community_mcp_services(
+ *,
+ search: str | None = None,
+ tag: str | None = None,
+ transport_type: str | None = None,
+ cursor: str | None = None,
+ limit: int = 30,
+) -> Dict[str, Any]:
+ """List public community MCP services.
+
+ Args:
+ search: Search keyword
+ tag: Filter by tag
+ transport_type: Filter by transport (url or container)
+ cursor: Pagination cursor
+ limit: Items per page
+
+ Returns:
+ Dictionary with count, nextCursor, and items
+ """
+ db_result = get_mcp_community_records(
+ search=search,
+ tag=tag,
+ transport_type=transport_type,
+ cursor=cursor,
+ limit=limit,
+ )
+
+ raw_items = db_result.get("items", [])
+ items = []
+ for item in raw_items:
+ items.append({
+ "communityId": item.get("community_id"),
+ "name": item.get("mcp_name"),
+ "version": item.get("version"),
+ "description": item.get("description"),
+ "status": "active",
+ "createdAt": item.get("create_time"),
+ "updatedAt": item.get("update_time"),
+ "source": "community",
+ "transportType": item.get("transport_type"),
+ "serverUrl": item.get("mcp_server"),
+ "configJson": item.get("config_json") if isinstance(item.get("config_json"), dict) else None,
+ "registryJson": item.get("registry_json") if isinstance(item.get("registry_json"), dict) else None,
+ "tags": item.get("tags") or [],
+ })
+ return {
+ "count": len(items),
+ "nextCursor": db_result.get("nextCursor"),
+ "items": items,
+ }
+
+
+def list_community_mcp_tag_stats() -> List[Dict[str, Any]]:
+ """Get community MCP tag statistics.
+
+ Args:
+ tenant_id: Tenant ID
+
+ Returns:
+ List of tag statistics
+ """
+ return get_mcp_community_tag_stats()
+
+
+async def publish_community_mcp_service(
+ *,
+ tenant_id: str,
+ user_id: str,
+ mcp_id: int,
+ name: str | None = None,
+ description: str | None = None,
+ version: str | None = None,
+ tags: List[str] | None = None,
+ mcp_server: str | None = None,
+ config_json: Dict[str, Any] | None = None,
+) -> int:
+ """Publish a local MCP service to the community.
+
+ Optional ``name`` / ``description`` / ``version`` / ``tags`` / ``mcp_server`` /
+ ``config_json`` override the values copied from the local MCP row when creating
+ the community record. Omit an optional field (``None``) to keep the local MCP
+ value for that field.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ mcp_id: MCP record ID to publish
+ name: Optional community display name override
+ description: Optional description override
+ version: Optional version override
+ tags: Optional tags override
+ mcp_server: Optional remote MCP URL override
+ config_json: Optional container config override
+
+ Returns:
+ Community record ID
+
+ Raises:
+ McpNotFoundError: If MCP record is not found
+ """
+ source_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not source_record:
+ raise McpNotFoundError("MCP record not found")
+
+ source_registry_json = source_record.get("registry_json") if isinstance(source_record.get("registry_json"), dict) else None
+ source_config_json = source_record.get("config_json") if isinstance(source_record.get("config_json"), dict) else None
+
+ final_name = name if name is not None else source_record.get("mcp_name")
+ final_description = description if description is not None else source_record.get("description")
+ final_version = version if version is not None else source_record.get("version")
+ final_tags = tags if tags is not None else source_record.get("tags")
+ final_mcp_server = (
+ mcp_server if mcp_server is not None else source_record.get("mcp_server")
+ )
+ final_config_json = (
+ config_json if isinstance(config_json, dict) else source_config_json
+ )
+
+ # Remote MCP table may omit transport_type; community list still needs it for filters.
+ community_transport_type = "container" if final_config_json is not None else "url"
+
+ community_id = create_mcp_community_record(
+ mcp_data={
+ "mcp_name": final_name,
+ "mcp_server": final_mcp_server,
+ "version": final_version,
+ "registry_json": source_registry_json,
+ "transport_type": source_record.get("transport_type") or community_transport_type,
+ "config_json": final_config_json,
+ "tags": final_tags,
+ "description": final_description,
+ },
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+ return community_id
+
+
+async def update_community_mcp_service(
+ *,
+ tenant_id: str,
+ user_id: str,
+ community_id: int,
+ name: str | None,
+ description: str | None,
+ tags: List[str] | None,
+ version: str | None,
+ registry_json: Dict[str, Any] | None,
+) -> None:
+ """Update a community MCP service.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ community_id: Community record ID
+ name: New MCP service name
+ description: MCP service description
+ tags: MCP tags
+ version: MCP version
+ registry_json: Registry metadata JSON
+
+ Raises:
+ McpNotFoundError: If community MCP record is not found
+ """
+ current = get_mcp_community_record_by_id_and_tenant(community_id=community_id, tenant_id=tenant_id)
+ if not current:
+ raise McpNotFoundError("Community MCP record not found")
+
+ existing_config_json = current.get("config_json") if isinstance(current.get("config_json"), dict) else None
+ next_registry_json = registry_json if isinstance(registry_json, dict) else current.get("registry_json")
+ next_config_json = existing_config_json if isinstance(existing_config_json, dict) else None
+
+ update_mcp_community_record_by_id(
+ community_id=community_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=name,
+ description=description,
+ tags=tags,
+ version=version,
+ registry_json=next_registry_json,
+ config_json=next_config_json,
+ )
+
+
+async def delete_community_mcp_service(
+ *,
+ tenant_id: str,
+ user_id: str,
+ community_id: int,
+) -> None:
+ """Delete a community MCP service.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ community_id: Community record ID
+
+ Raises:
+ McpNotFoundError: If community MCP record is not found
+ """
+ current = get_mcp_community_record_by_id_and_tenant(community_id=community_id, tenant_id=tenant_id)
+ if not current:
+ raise McpNotFoundError("Community MCP record not found")
+ delete_mcp_community_record_by_id(
+ community_id=community_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
+
+async def list_my_community_mcp_services(
+ *,
+ tenant_id: str,
+) -> Dict[str, Any]:
+ """List MCP services published by the current user to the community.
+
+ Args:
+ tenant_id: Tenant ID
+
+ Returns:
+ Dictionary with count and items
+ """
+ rows = list_mcp_community_records_by_tenant(tenant_id=tenant_id)
+ items = []
+ for row in rows:
+ items.append({
+ "communityId": row.get("community_id"),
+ "name": row.get("mcp_name"),
+ "version": row.get("version"),
+ "description": row.get("description"),
+ "status": "active",
+ "createdAt": row.get("create_time"),
+ "updatedAt": row.get("update_time"),
+ "source": "community",
+ "transportType": row.get("transport_type"),
+ "serverUrl": row.get("mcp_server"),
+ "configJson": row.get("config_json") if isinstance(row.get("config_json"), dict) else None,
+ "registryJson": row.get("registry_json") if isinstance(row.get("registry_json"), dict) else None,
+ "tags": row.get("tags") or [],
+ })
+ return {
+ "count": len(items),
+ "items": items,
+ }
+
+
+# ---------------------------------------------------------------------------
+# Registry Functions
+# ---------------------------------------------------------------------------
+
+async def list_registry_mcp_services(
+ *,
+ search: str | None = None,
+ include_deleted: bool = False,
+ updated_since: str | None = None,
+ version: str | None = None,
+ cursor: str | None = None,
+ limit: int = 30,
+) -> Dict[str, Any]:
+ """List MCP services from the official MCP Registry.
+
+ Args:
+ search: Search keyword
+ include_deleted: Include deleted records
+ updated_since: Filter by update time
+ version: Filter by version
+ cursor: Pagination cursor
+ limit: Items per page
+
+ Returns:
+ Dictionary with servers and metadata
+ """
+ params: Dict[str, Any] = {"limit": limit}
+ if search:
+ params["search"] = search
+ if include_deleted:
+ params["include_deleted"] = "true"
+ if updated_since:
+ params["updated_since"] = updated_since
+ if version:
+ params["version"] = version
+ if cursor:
+ params["cursor"] = cursor
+
+ request_url = f"{MCP_REGISTRY_BASE_URL}?{urlencode(params)}"
+ timeout = aiohttp.ClientTimeout(total=20)
+
+ async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
+ async with session.get(request_url) as response:
+ if response.status >= 400:
+ raise RuntimeError(f"Registry request failed with status {response.status}")
+ payload = await response.json(content_type=None)
+
+ raw_servers = payload.get("servers") if isinstance(payload, dict) else []
+ metadata = payload.get("metadata") if isinstance(payload, dict) and isinstance(payload.get("metadata"), dict) else {}
+
+ return {
+ "servers": raw_servers if isinstance(raw_servers, list) else [],
+ "metadata": metadata,
+ }
diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py
index 9214a1ffa..2dc276aeb 100644
--- a/backend/services/model_health_service.py
+++ b/backend/services/model_health_service.py
@@ -1,8 +1,10 @@
import logging
+from typing import Optional
from nexent.core import MessageObserver
from nexent.core.models import OpenAIModel, OpenAIVLModel
-from nexent.core.models.embedding_model import JinaEmbedding, OpenAICompatibleEmbedding
+from nexent.core.models.embedding_model import JinaEmbedding, OpenAICompatibleEmbedding, DashScopeMultimodalEmbedding
+from nexent.monitor import set_monitoring_context, set_monitoring_operation
from nexent.core.models.rerank_model import OpenAICompatibleRerank
from services.voice_service import get_voice_service
@@ -13,6 +15,39 @@
logger = logging.getLogger("model_health_service")
+DASHSCOPE_MODEL_FACTORY = "dashscope"
+TOKENPONY_MODEL_FACTORY = "tokenpony"
+PROVIDER_CATALOG_HEALTHCHECK_FACTORIES = {DASHSCOPE_MODEL_FACTORY, TOKENPONY_MODEL_FACTORY}
+PROVIDER_CATALOG_HEALTHCHECK_TYPES = {"vlm", "vlm2", "vlm3"}
+
+EMBEDDING_TYPES = {"embedding", "multi_embedding"}
+
+
+def _normalize_embedding_url(base_url: str) -> str:
+ """Append /embeddings suffix to base_url if not already present.
+
+ For embedding and multimodal embedding models, the base_url should contain /embeddings.
+ If the user provides a base URL without the endpoint (e.g., https://api.jina.ai/v1),
+ this function normalizes it to include /embeddings (e.g., https://api.jina.ai/v1/embeddings).
+ """
+ if not base_url or "/embeddings" in base_url:
+ return base_url
+ return f"{base_url.rstrip('/')}/embeddings"
+
+
+def _infer_model_factory(model_type: str, base_url: str, current_factory: Optional[str] = None) -> Optional[str]:
+ """Infer model_factory from base_url if not already set or is generic.
+
+ Currently handles:
+ - multi_embedding with dashscope URL -> "dashscope"
+ - embedding with dashscope URL -> "dashscope" (uses OpenAI-compatible endpoint)
+ """
+ base_url_lower = base_url.lower()
+ if "dashscope" in base_url_lower:
+ return DASHSCOPE_MODEL_FACTORY
+
+ return current_factory
+
async def _embedding_dimension_check(
model_name: str,
@@ -20,44 +55,92 @@ async def _embedding_dimension_check(
model_base_url: str,
model_api_key: str,
ssl_verify: bool = True,
+ model_factory: Optional[str] = None,
+ timeout_seconds: Optional[float] = None,
):
- # Test connectivity based on different model types
+ if model_type in EMBEDDING_TYPES:
+ model_base_url = _normalize_embedding_url(model_base_url)
+
+ effective_timeout = timeout_seconds if timeout_seconds else 5.0
+
if model_type == "embedding":
+ # DashScope text embedding models use OpenAI-compatible endpoint, same as generic
embedding = await OpenAICompatibleEmbedding(
model_name=model_name,
base_url=model_base_url,
api_key=model_api_key,
embedding_dim=0,
ssl_verify=ssl_verify,
- ).dimension_check()
+ ).dimension_check(timeout=effective_timeout)
if len(embedding) > 0:
return len(embedding[0])
logging.warning(
f"Embedding dimension check for {model_name} gets empty response")
return 0
elif model_type == "multi_embedding":
- embedding = await JinaEmbedding(
- model_name=model_name,
- base_url=model_base_url,
- api_key=model_api_key,
- embedding_dim=0,
- ssl_verify=ssl_verify,
- ).dimension_check()
- if len(embedding) > 0:
+ model_factory_lower = (model_factory or "").lower()
+ if model_factory_lower == "dashscope":
+ embedding_instance = DashScopeMultimodalEmbedding(
+ api_key=model_api_key,
+ base_url=model_base_url,
+ model_name=model_name,
+ embedding_dim=0,
+ ssl_verify=ssl_verify,
+ )
+ else:
+ embedding_instance = JinaEmbedding(
+ api_key=model_api_key,
+ base_url=model_base_url,
+ model_name=model_name,
+ embedding_dim=0,
+ ssl_verify=ssl_verify,
+ )
+ embedding = await embedding_instance.dimension_check(timeout=effective_timeout)
+ if isinstance(embedding, list) and len(embedding) > 0 and isinstance(embedding[0], list):
return len(embedding[0])
logging.warning(
- f"Embedding dimension check for {model_name} gets empty response")
+ f"Embedding dimension check for {model_name} gets unexpected response: {type(embedding)}, value: {embedding}")
return 0
else:
raise ValueError(f"Unsupported model type: {model_type}")
+async def _provider_catalog_connectivity_check(
+ model_name: str,
+ model_type: str,
+ model_api_key: str,
+ model_factory: Optional[str],
+) -> bool:
+ """Validate provider-managed multimodal models through their model catalog."""
+ provider = (model_factory or "").lower()
+ if provider not in PROVIDER_CATALOG_HEALTHCHECK_FACTORIES:
+ return False
+
+ from services.model_provider_service import get_provider_models
+
+ model_list = await get_provider_models({
+ "provider": provider,
+ "model_type": model_type,
+ "api_key": model_api_key,
+ })
+ if not model_list or any(model.get("_error") for model in model_list):
+ return False
+
+ expected_model_id = model_name.lower()
+ return any(str(model.get("id", "")).lower() == expected_model_id for model in model_list)
+
+
async def _perform_connectivity_check(
model_name: str,
model_type: str,
model_base_url: str,
model_api_key: str,
ssl_verify: bool = True,
+ model_factory: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ display_name: Optional[str] = None,
+ timeout_seconds: Optional[float] = None,
) -> bool:
"""
Perform specific model connectivity check
@@ -67,6 +150,8 @@ async def _perform_connectivity_check(
model_base_url: Model base URL
model_api_key: API key
ssl_verify: Whether to verify SSL certificates (default: True)
+ display_name: Optional display name for monitoring
+ timeout_seconds: Optional request timeout in seconds
Returns:
bool: Connectivity check result
"""
@@ -74,33 +159,53 @@ async def _perform_connectivity_check(
model_base_url = model_base_url.replace(
LOCALHOST_NAME, DOCKER_INTERNAL_HOST).replace(LOCALHOST_IP, DOCKER_INTERNAL_HOST)
+ # Normalize embedding URLs by appending /embeddings if not present
+ if model_type in EMBEDDING_TYPES:
+ model_base_url = _normalize_embedding_url(model_base_url)
+
+ effective_timeout = timeout_seconds if timeout_seconds else 5.0
connectivity: bool
- # Test connectivity based on different model types
if model_type == "embedding":
- connectivity = len(await OpenAICompatibleEmbedding(
+ emb = await OpenAICompatibleEmbedding(
model_name=model_name,
base_url=model_base_url,
api_key=model_api_key,
embedding_dim=0,
- ssl_verify=ssl_verify
- ).dimension_check()) > 0
+ ssl_verify=ssl_verify,
+ ).dimension_check(timeout=effective_timeout)
+ connectivity = len(emb) > 0 and len(emb[0]) > 0
elif model_type == "multi_embedding":
- connectivity = len(await JinaEmbedding(
- model_name=model_name,
- base_url=model_base_url,
- api_key=model_api_key,
- embedding_dim=0,
- ssl_verify=ssl_verify
- ).dimension_check()) > 0
+ model_factory_lower = (model_factory or "").lower()
+ if model_factory_lower == "dashscope":
+ embedding = DashScopeMultimodalEmbedding(
+ api_key=model_api_key,
+ base_url=model_base_url,
+ model_name=model_name,
+ embedding_dim=0,
+ ssl_verify=ssl_verify,
+ )
+ else:
+ embedding = JinaEmbedding(
+ api_key=model_api_key,
+ base_url=model_base_url,
+ model_name=model_name,
+ embedding_dim=0,
+ ssl_verify=ssl_verify,
+ )
+ emb = await embedding.dimension_check(timeout=effective_timeout)
+ connectivity = len(emb) > 0 and len(emb[0]) > 0
elif model_type == "llm":
observer = MessageObserver()
+ set_monitoring_operation("connectivity_check",
+ display_name=display_name)
connectivity = await OpenAIModel(
observer,
model_id=model_name,
api_base=model_base_url,
api_key=model_api_key,
- ssl_verify=ssl_verify
+ ssl_verify=ssl_verify,
+ timeout_seconds=timeout_seconds,
).check_connectivity()
elif model_type == "rerank":
rerank_model = OpenAICompatibleRerank(
@@ -110,8 +215,22 @@ async def _perform_connectivity_check(
ssl_verify=ssl_verify,
)
connectivity = await rerank_model.connectivity_check()
- elif model_type == "vlm":
+ elif model_type in ("vlm", "vlm2", "vlm3"):
+ if (
+ model_type in PROVIDER_CATALOG_HEALTHCHECK_TYPES
+ and (model_factory or "").lower() in PROVIDER_CATALOG_HEALTHCHECK_FACTORIES
+ ):
+ connectivity = await _provider_catalog_connectivity_check(
+ model_name=model_name,
+ model_type=model_type,
+ model_api_key=model_api_key,
+ model_factory=model_factory,
+ )
+ return connectivity
+
observer = MessageObserver()
+ set_monitoring_operation("connectivity_check",
+ display_name=display_name)
connectivity = await OpenAIVLModel(
observer,
model_id=model_name,
@@ -119,53 +238,121 @@ async def _perform_connectivity_check(
api_key=model_api_key,
ssl_verify=ssl_verify
).check_connectivity()
- elif model_type in ["tts", "stt"]:
+ elif model_type == 'stt':
+ voice_service = get_voice_service()
+
+ # Determine STT provider based on model_factory
+ use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ # Use Volcano STT with appid and access_token
+ connectivity = await voice_service.check_voice_connectivity(
+ model_type="stt",
+ stt_config={
+ "model_factory": model_factory,
+ "model_appid": model_appid,
+ "access_token": access_token,
+ "base_url": model_base_url
+ }
+ )
+ else:
+ # Use Ali STT (default) with api_key and model name
+ connectivity = await voice_service.check_voice_connectivity(
+ model_type="stt",
+ stt_config={
+ "api_key": model_api_key,
+ "base_url": model_base_url,
+ "model": model_name
+ }
+ )
+ elif model_type == 'tts':
voice_service = get_voice_service()
- connectivity = await voice_service.check_voice_connectivity(model_type)
+
+ # Determine TTS provider based on model_factory
+ use_volc = model_factory and model_factory.lower() in ["volcengine", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ # Use Volcano TTS with appid and access_token
+ connectivity = await voice_service.check_voice_connectivity(
+ model_type="tts",
+ stt_config={
+ "model_factory": model_factory,
+ "model_appid": model_appid,
+ "access_token": access_token,
+ "base_url": model_base_url
+ }
+ )
+ else:
+ # Use Ali TTS (default) with api_key and model name
+ connectivity = await voice_service.check_voice_connectivity(
+ model_type="tts",
+ stt_config={
+ "api_key": model_api_key,
+ "base_url": model_base_url,
+ "model": model_name
+ }
+ )
else:
raise ValueError(f"Unsupported model type: {model_type}")
return connectivity
-async def check_model_connectivity(display_name: str, tenant_id: str) -> dict:
+async def check_model_connectivity(display_name: str, tenant_id: str, model_type: str = None) -> dict:
try:
# Query the database using display_name and tenant context from app layer
- model = get_model_by_display_name(display_name, tenant_id=tenant_id)
+ model = get_model_by_display_name(display_name, tenant_id=tenant_id, model_type=model_type)
if not model:
- raise LookupError(f"Model configuration not found for {display_name}")
+ raise LookupError(
+ f"Model configuration not found for {display_name}")
- # Still use repo/name concatenation for model instantiation
repo, name = model.get("model_repo", ""), model.get("model_name", "")
model_name = f"{repo}/{name}" if repo else name
- # Set model to "detecting" status
- update_data = {
- "connect_status": ModelConnectStatusEnum.DETECTING.value}
+ update_data = {"connect_status": ModelConnectStatusEnum.DETECTING.value}
update_model_record(model["model_id"], update_data)
model_type = model["model_type"]
model_base_url = model["base_url"]
model_api_key = model["api_key"]
- ssl_verify = model.get("ssl_verify", True) # Default to True if not present
+ # Default to True if not present
+ ssl_verify = model.get("ssl_verify", True)
+ model_factory = model.get("model_factory")
+ model_appid = model.get("model_appid")
+ access_token = model.get("access_token")
+ timeout_seconds = model.get("timeout_seconds")
try:
- # Use the common connectivity check function
+ set_monitoring_context(tenant_id=tenant_id)
+
+ ssl_verify_fallback = False
connectivity = await _perform_connectivity_check(
- model_name, model_type, model_base_url, model_api_key, ssl_verify
+ model_name, model_type, model_base_url, model_api_key, ssl_verify,
+ model_factory, model_appid, access_token, display_name, timeout_seconds,
)
+ if not connectivity and ssl_verify:
+ ssl_verify_fallback = True
+ connectivity = await _perform_connectivity_check(
+ model_name, model_type, model_base_url, model_api_key, False,
+ model_factory, model_appid, access_token, display_name, timeout_seconds,
+ )
except Exception as e:
- update_data = {"connect_status": ModelConnectStatusEnum.UNAVAILABLE.value}
+ update_data = {
+ "connect_status": ModelConnectStatusEnum.UNAVAILABLE.value}
logger.error(f"Error checking model connectivity: {str(e)}")
update_model_record(model["model_id"], update_data)
raise e
if connectivity:
- logger.info(f"CONNECTED: {model_name}; Base URL: {model.get('base_url')}; API Key: {model.get('api_key')}")
+ logger.info(
+ f"CONNECTED: {model_name}")
else:
- logger.warning(f"UNCONNECTED: {model_name}; Base URL: {model.get('base_url')}; API Key: {model.get('api_key')}")
+ logger.warning(
+ f"UNCONNECTED: {model_name}")
connect_status = ModelConnectStatusEnum.AVAILABLE.value if connectivity else ModelConnectStatusEnum.UNAVAILABLE.value
update_data = {"connect_status": connect_status}
+ if ssl_verify_fallback:
+ update_data["ssl_verify"] = False
update_model_record(model["model_id"], update_data)
return {
"connectivity": connectivity,
@@ -174,9 +361,9 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict:
except Exception as e:
logger.error(f"Error checking model connectivity: {str(e)}")
if 'model' in locals() and model:
- update_data = {"connect_status": ModelConnectStatusEnum.UNAVAILABLE.value}
+ update_data = {
+ "connect_status": ModelConnectStatusEnum.UNAVAILABLE.value}
update_model_record(model["model_id"], update_data)
- # Propagate for app layer to translate into HTTP
raise e
@@ -184,33 +371,40 @@ async def check_model_connectivity(display_name: str, tenant_id: str) -> dict:
async def verify_model_config_connectivity(model_config: dict):
"""
- Verify the connectivity of the model configuration, do not save to the database
- Args:
- model_config: Model configuration dictionary, containing necessary connection parameters
- Returns:
- dict: Contains the result of the connectivity test and error message if failed
+ Verify the connectivity of the model configuration, do not save to the database.
"""
try:
model_name = model_config.get("model_name", "")
model_type = model_config["model_type"]
- model_base_url = model_config["base_url"]
+ model_base_url = model_config.get("base_url", "")
model_api_key = model_config["api_key"]
- ssl_verify = model_config.get("ssl_verify", True) # Default to True if not present
+ # Default to True if not present
+ ssl_verify = model_config.get("ssl_verify", True)
+ model_factory = model_config.get("model_factory")
+ model_appid = model_config.get("model_appid")
+ access_token = model_config.get("access_token")
+ # Get timeout from model config if present
+ timeout_seconds = model_config.get("timeout_seconds")
+
+ # Infer model_factory from base_url when not provided
+ model_factory = _infer_model_factory(model_type, model_base_url, model_config.get("model_factory"))
try:
- # Use the common connectivity check function
connectivity = await _perform_connectivity_check(
- model_name, model_type, model_base_url, model_api_key, ssl_verify
+ model_name, model_type, model_base_url, model_api_key, ssl_verify,
+ model_factory, model_appid, access_token, None, timeout_seconds,
)
if not connectivity and ssl_verify:
connectivity = await _perform_connectivity_check(
- model_name, model_type, model_base_url, model_api_key, False
+ model_name, model_type, model_base_url, model_api_key, False,
+ model_factory, model_appid, access_token, None, timeout_seconds,
)
if not connectivity:
+ error_msg = f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection."
return {
"connectivity": False,
"model_name": model_name,
- "error": f"Failed to connect to model '{model_name}' at {model_base_url}. Please verify the URL, API key, and network connection."
+ "error": f"Failed to connect to model '{model_name}'. Please verify the URL, API key, and network connection."
}
return {
@@ -219,7 +413,8 @@ async def verify_model_config_connectivity(model_config: dict):
}
except ValueError as e:
error_msg = str(e)
- logger.warning(f"UNCONNECTED: {model_name}; Base URL: {model_base_url}; API Key: {model_api_key}; Error: {error_msg}")
+ logger.warning(
+ f"UNCONNECTED: {model_name}; Error: {error_msg}")
return {
"connectivity": False,
"model_name": model_name,
@@ -244,13 +439,26 @@ async def embedding_dimension_check(model_config: dict):
try:
ssl_verify = model_config.get("ssl_verify", True)
+ model_factory = _infer_model_factory(model_type, model_base_url, model_config.get("model_factory"))
+ timeout_seconds = model_config.get("timeout_seconds")
dimension = await _embedding_dimension_check(
- model_name, model_type, model_base_url, model_api_key, ssl_verify
+ model_name, model_type, model_base_url, model_api_key, ssl_verify,
+ model_factory=model_factory, timeout_seconds=timeout_seconds
)
+ # Fallback to ssl_verify=False if initial check fails
+ if dimension == 0 and ssl_verify:
+ dimension = await _embedding_dimension_check(
+ model_name, model_type, model_base_url, model_api_key, False,
+ model_factory=model_factory, timeout_seconds=timeout_seconds
+ )
+ if dimension == 0:
+ logger.error(f"Embedding dimension check returned 0 for model: {model_name}")
+ return None
return dimension
except ValueError as e:
- logger.error(f"Error checking embedding dimension: {str(e)}")
- return 0
+ logger.error(f"Error checking embedding dimension for {model_name}: {str(e)}")
+ return None
except Exception as e:
- logger.error(f"Error checking embedding dimension: {model_name}; Base URL: {model_base_url}; Error: {str(e)}")
- return 0
+ logger.error(
+ f"Error checking embedding dimension for {model_name}: {str(e)}")
+ return None
diff --git a/backend/services/model_management_service.py b/backend/services/model_management_service.py
index d012803be..1511a9301 100644
--- a/backend/services/model_management_service.py
+++ b/backend/services/model_management_service.py
@@ -3,23 +3,29 @@
from consts.const import LOCALHOST_IP, LOCALHOST_NAME, DOCKER_INTERNAL_HOST
from consts.model import ModelConnectStatusEnum
-from consts.provider import ProviderEnum, SILICON_BASE_URL, DASHSCOPE_BASE_URL, TOKENPONY_BASE_URL
+from consts.provider import (
+ ProviderEnum,
+ SILICON_BASE_URL,
+ DASHSCOPE_BASE_URL,
+ DASHSCOPE_REALTIME_BASE_URL,
+ TOKENPONY_BASE_URL,
+)
from database.model_management_db import (
create_model_record,
delete_model_record,
- get_model_by_display_name,
+ get_model_by_name_factory,
get_models_by_display_name,
get_model_records,
get_models_by_tenant_factory_type,
- update_model_record,
+ update_model_record
)
from services.model_provider_service import (
prepare_model_dict,
- merge_existing_model_tokens,
+ merge_existing_model_attributes,
get_provider_models,
)
-from services.model_health_service import embedding_dimension_check
+from services.model_health_service import embedding_dimension_check, _infer_model_factory
from utils.model_name_utils import (
add_repo_to_name,
split_repo_name,
@@ -31,6 +37,23 @@
logger = logging.getLogger("model_management_service")
+INDEPENDENT_MULTIMODAL_MODEL_TYPES = {"vlm", "vlm2", "vlm3"}
+
+
+def _has_display_name_conflict(existing_models: List[Dict[str, Any]], model_type: Optional[str]) -> bool:
+ """Allow the three multimodal slots to share display names across slots."""
+ if not existing_models:
+ return False
+
+ if model_type in INDEPENDENT_MULTIMODAL_MODEL_TYPES:
+ return any(
+ existing.get("model_type") == model_type
+ or existing.get("model_type") not in INDEPENDENT_MULTIMODAL_MODEL_TYPES
+ for existing in existing_models
+ )
+
+ return True
+
async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict[str, Any]):
"""Create a single model record for the given tenant.
@@ -45,9 +68,19 @@ async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict
model_base_url.replace(LOCALHOST_NAME, DOCKER_INTERNAL_HOST)
.replace(LOCALHOST_IP, DOCKER_INTERNAL_HOST)
)
- model_data['ssl_verify'] = True
+ # Auto-set ssl_verify based on api_key:
+ # - Empty api_key (local/LAN services) -> ssl_verify=False
+ # - "open/router" URL -> ssl_verify=False
+ # - Otherwise -> ssl_verify=True
+ model_api_key = model_data.get("api_key", "")
+ if not model_api_key or "open/router" in model_base_url:
+ model_data["ssl_verify"] = False
+ else:
+ model_data["ssl_verify"] = True
+
+ # Set model_factory to modelengine when using open/router URL
if "open/router" in model_base_url:
- model_data['ssl_verify'] = False
+ model_data["model_factory"] = "modelengine"
# Split model_name into repo and name
model_repo, model_name = split_repo_name(
model_data["model_name"]) if model_data.get("model_name") else ("", "")
@@ -66,17 +99,31 @@ async def create_model_for_tenant(user_id: str, tenant_id: str, model_data: Dict
# Check display name conflict scoped by tenant
if model_data.get("display_name"):
- existing_model_by_display = get_model_by_display_name(
+ existing_models_by_display = get_models_by_display_name(
model_data["display_name"], tenant_id)
- if existing_model_by_display:
+ if _has_display_name_conflict(existing_models_by_display, model_data.get("model_type")):
logging.error(
f"Name {model_data['display_name']} is already in use, please choose another display name")
raise ValueError(
f"Name {model_data['display_name']} is already in use, please choose another display name")
- # If embedding or multi_embedding, set max_tokens via embedding dimension check
+ # If embedding or multi_embedding, ensure base_url ends with /embeddings
if model_data.get("model_type") in ("embedding", "multi_embedding"):
- model_data["max_tokens"] = await embedding_dimension_check(model_data)
+ base_url = model_data.get("base_url", "")
+ if base_url and "/embeddings" not in base_url:
+ model_data["base_url"] = f"{base_url.rstrip('/')}/embeddings"
+ # Infer model_factory from base_url if not set
+ model_data["model_factory"] = _infer_model_factory(
+ model_data["model_type"], model_data["base_url"], model_data.get("model_factory")
+ )
+ # Get embedding dimension
+ dimension = await embedding_dimension_check(model_data)
+ if dimension is None:
+ raise ValueError(
+ f"Failed to get embedding dimension for model '{model_data.get('display_name', model_data.get('model_name'))}'. "
+ "Please verify the URL, API key, and network connection."
+ )
+ model_data["max_tokens"] = dimension
# Set default chunk_batch if not provided
if model_data.get("chunk_batch") is None:
model_data["chunk_batch"] = 10
@@ -114,8 +161,8 @@ async def create_provider_models_for_tenant(tenant_id: str, provider_request: Di
# Get provider model list
model_list = await get_provider_models(provider_request)
- # Merge existing model's max_tokens attribute
- model_list = merge_existing_model_tokens(
+ # Merge existing model's attributes (max_tokens, api_key, timeout_seconds, concurrency_limit)
+ model_list = merge_existing_model_attributes(
model_list, tenant_id, provider_request["provider"], provider_request["model_type"])
# Sort model list by ID
@@ -143,7 +190,7 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay
# ModelEngine models carry their own base_url in each model dict
model_url = ""
elif provider == ProviderEnum.DASHSCOPE.value:
- model_url = DASHSCOPE_BASE_URL
+ model_url = DASHSCOPE_REALTIME_BASE_URL if model_type in ("stt", "tts") else DASHSCOPE_BASE_URL
elif provider == ProviderEnum.TOKENPONY.value:
model_url = TOKENPONY_BASE_URL
else:
@@ -153,6 +200,13 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay
tenant_id, provider, model_type)
model_list_ids = {model.get("id")
for model in model_list} if model_list else set()
+ existing_model_map = {
+ add_repo_to_name(
+ model_repo=model["model_repo"],
+ model_name=model["model_name"],
+ ): model
+ for model in existing_model_list
+ }
# Delete existing models not present
for model in existing_model_list:
@@ -162,22 +216,23 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay
# Create or update new models
for model in model_list:
+ model["model_type"] = model_type
_, model_name = split_repo_name(
model["id"]) if model.get("id") else ("", "")
model_repo, model_name_only = split_repo_name(
model.get("id", "")) if model.get("id") else ("", "")
model_display_name = add_repo_to_name(model_repo, model_name_only)
if model_name:
- existing_model_by_display = get_model_by_display_name(
- model_display_name, tenant_id)
- if existing_model_by_display:
+ existing_model = existing_model_map.get(model_display_name)
+ if existing_model:
+ update_data = {}
# Check if max_tokens has changed
- existing_max_tokens = existing_model_by_display.get(
- "max_tokens")
+ existing_max_tokens = existing_model.get("max_tokens")
new_max_tokens = model.get("max_tokens")
if new_max_tokens is not None and existing_max_tokens != new_max_tokens:
- update_model_record(existing_model_by_display["model_id"], {
- "max_tokens": new_max_tokens}, user_id)
+ update_data["max_tokens"] = new_max_tokens
+ if update_data:
+ update_model_record(existing_model["model_id"], update_data, user_id)
continue
model_dict = await prepare_model_dict(
@@ -251,6 +306,15 @@ async def update_single_model_for_tenant(
m.get("model_type") == "multi_embedding" for m in existing_models
)
+ # Auto-set ssl_verify based on api_key if provided:
+ # - Empty api_key -> ssl_verify=False
+ # - Otherwise -> ssl_verify=True
+ if "api_key" in model_data:
+ if not model_data["api_key"]:
+ model_data["ssl_verify"] = False
+ else:
+ model_data["ssl_verify"] = True
+
if has_multi_embedding:
# Update both embedding and multi_embedding records
for model in existing_models:
@@ -276,12 +340,36 @@ async def update_single_model_for_tenant(
async def batch_update_models_for_tenant(user_id: str, tenant_id: str, model_list: List[Dict[str, Any]]):
- """Batch update models for a tenant."""
+ """Batch update models for a tenant by model_id or model_name."""
try:
for model in model_list:
- update_model_record(model["model_id"], model, user_id, tenant_id)
+ # Build update data excluding id fields
+ update_data = {k: v for k, v in model.items() if k not in ["model_id", "model_name"]}
+
+ model_id_or_name = model.get("model_id") or model.get("model_name")
+
+ # Check if model_id is a numeric string (primary key)
+ if model_id_or_name and model_id_or_name.isdigit():
+ update_model_record(int(model_id_or_name), update_data, user_id, tenant_id)
+ else:
+ # Parse "model_repo/model_name" format from frontend's model_id field
+ if "/" in model_id_or_name:
+ model_repo, model_name = model_id_or_name.split("/", 1)
+ else:
+ model_repo = None
+ model_name = model_id_or_name
+
+ logging.info(f"[DEBUG] Updating model by name: model_name={model_name}, model_repo={model_repo}, tenant_id={tenant_id}")
+
+ # Query to get model_id first, then update by primary key
+ model_record = get_model_by_name_factory(model_name, model_repo, tenant_id)
+ if not model_record:
+ logging.warning(f"Model not found: model_name={model_name}, model_repo={model_repo}, tenant_id={tenant_id}")
+ continue
+
+ update_model_record(model_record["model_id"], update_data, user_id, tenant_id)
- logging.debug("Batch update models successfully")
+ logging.info("[DEBUG] Batch update models successfully")
except Exception as e:
logging.error(f"Failed to batch update models: {str(e)}")
raise Exception(f"Failed to batch update models: {str(e)}")
@@ -484,6 +572,3 @@ async def list_models_for_admin(
logging.error(f"Failed to retrieve admin model list: {str(e)}")
raise Exception(f"Failed to retrieve admin model list: {str(e)}")
-
-
-
diff --git a/backend/services/model_provider_service.py b/backend/services/model_provider_service.py
index dbff17082..1aa89fa3b 100644
--- a/backend/services/model_provider_service.py
+++ b/backend/services/model_provider_service.py
@@ -6,7 +6,7 @@
DEFAULT_MAXIMUM_CHUNK_SIZE,
)
from consts.model import ModelConnectStatusEnum, ModelRequest
-from consts.provider import ProviderEnum
+from consts.provider import ProviderEnum, DASHSCOPE_REALTIME_BASE_URL
from database.model_management_db import get_models_by_tenant_factory_type
from services.model_health_service import embedding_dimension_check
from services.providers.base import AbstractModelProvider
@@ -100,11 +100,13 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
# Build the canonical representation using the existing Pydantic schema for
# consistency of validation and default handling.
# For embedding/multi_embedding models, max_tokens will be set via connectivity check later,
- # so use 0 as placeholder if not provided
+ # so use 0 as placeholder if not provided.
+ # Set default timeout_seconds to 120 for LLM models (embedding models don't need it).
model_type = model["model_type"]
is_embedding_type = model_type in ["embedding", "multi_embedding"]
max_tokens_value = model.get(
"max_tokens", 0) if not is_embedding_type else 0
+ timeout_seconds_value = 120 if not is_embedding_type else None
model_obj = ModelRequest(
model_factory=provider,
@@ -115,7 +117,8 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
display_name=model_display_name,
expected_chunk_size=expected_chunk_size,
maximum_chunk_size=maximum_chunk_size,
- chunk_batch=chunk_batch
+ chunk_batch=chunk_batch,
+ timeout_seconds=timeout_seconds_value
)
model_dict = model_obj.model_dump()
@@ -124,14 +127,18 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
# Determine the correct base_url and, for embeddings, update the actual
# dimension by performing a real connectivity check.
if model["model_type"] in ["embedding", "multi_embedding"]:
- if provider != ProviderEnum.MODELENGINE.value:
- # Ensure proper slash between base URL and endpoint
+ if provider == ProviderEnum.DASHSCOPE.value and model["model_type"] == "embedding":
model_dict["base_url"] = f"{model_url.rstrip('/')}/embeddings"
- else:
- # For ModelEngine embedding models, append the embeddings path
+ elif provider == ProviderEnum.MODELENGINE.value:
model_dict["base_url"] = f"{model_url.rstrip('/')}/{MODEL_ENGINE_NORTH_PREFIX}/embeddings"
- # The embedding dimension might differ from the provided max_tokens.
+ elif "/embeddings" in model_url:
+ # URL already contains /embeddings endpoint, use as-is
+ model_dict["base_url"] = model_url.rstrip('/')
+ else:
+ model_dict["base_url"] = f"{model_url.rstrip('/')}/embeddings"
model_dict["max_tokens"] = await embedding_dimension_check(model_dict)
+ elif model["model_type"] in ("stt", "tts") and provider == ProviderEnum.DASHSCOPE.value:
+ model_dict["base_url"] = DASHSCOPE_REALTIME_BASE_URL
elif model["model_type"] == "rerank":
if provider == ProviderEnum.DASHSCOPE.value:
model_dict["base_url"] = f"{model_url.replace('compatible-mode/v1','api/v1').rstrip('/')}/services/rerank/text-rerank/text-rerank"
@@ -155,19 +162,29 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
return model_dict
-def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider: str, model_type: str) -> List[dict]:
+def merge_existing_model_attributes(
+ model_list: List[dict],
+ tenant_id: str,
+ provider: str,
+ model_type: str,
+ fields: List[str] = None
+) -> List[dict]:
"""
- Merge existing model's max_tokens attribute into the model list.
+ Merge existing model's attributes into the model list.
Args:
model_list: List of models
tenant_id: Tenant ID
provider: Provider
model_type: Model type
+ fields: List of fields to merge (defaults to max_tokens, api_key, timeout_seconds, concurrency_limit)
Returns:
List[dict]: Merged model list
"""
+ if fields is None:
+ fields = ["max_tokens", "api_key", "timeout_seconds", "concurrency_limit"]
+
if model_type == "embedding" or model_type == "multi_embedding":
return model_list
@@ -184,15 +201,35 @@ def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider
"/" + existing_model["model_name"]
existing_model_map[model_full_name] = existing_model
- # Iterate through the model list, if the model exists in the existing model list, add max_tokens attribute
+ # Iterate through the model list, merge specified fields from existing models
for model in model_list:
if model.get("id") in existing_model_map:
- model["max_tokens"] = existing_model_map[model.get(
- "id")].get("max_tokens")
+ existing_model = existing_model_map[model.get("id")]
+ for field in fields:
+ if existing_model.get(field) is not None:
+ model[field] = existing_model.get(field)
return model_list
+def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider: str, model_type: str) -> List[dict]:
+ """
+ Merge existing model's max_tokens attribute into the model list.
+
+ DEPRECATED: Use merge_existing_model_attributes instead.
+
+ Args:
+ model_list: List of models
+ tenant_id: Tenant ID
+ provider: Provider
+ model_type: Model type
+
+ Returns:
+ List[dict]: Merged model list
+ """
+ return merge_existing_model_attributes(model_list, tenant_id, provider, model_type, ["max_tokens"])
+
+
# Re-export provider classes for backward compatibility
__all__ = [
"AbstractModelProvider",
@@ -200,6 +237,7 @@ def merge_existing_model_tokens(model_list: List[dict], tenant_id: str, provider
"ModelEngineProvider",
"prepare_model_dict",
"merge_existing_model_tokens",
+ "merge_existing_model_attributes",
"get_provider_models",
"get_model_engine_raw_url",
]
diff --git a/backend/services/northbound_service.py b/backend/services/northbound_service.py
index a6eaed77d..c5493a551 100644
--- a/backend/services/northbound_service.py
+++ b/backend/services/northbound_service.py
@@ -1,31 +1,40 @@
import asyncio
import hashlib
+import json
import logging
import time
from dataclasses import dataclass
-from typing import Any, Dict, Optional
+from os.path import basename
+from typing import Any, Dict, List, Optional
+from fastapi import HTTPException, UploadFile
from fastapi.responses import StreamingResponse
+
+from consts.const import ASSET_OWNER_TENANT_ID
from consts.exceptions import (
LimitExceededError,
UnauthorizedError,
+ ConversationNotFoundError,
)
-from consts.model import AgentRequest
-from database.conversation_db import get_conversation_messages
+from consts.model import AgentRequest, ToolParamsRequest
+from database.conversation_db import get_conversation_messages, get_source_searches_by_message
from database.token_db import log_token_usage, get_latest_usage_metadata
from services.agent_service import (
run_agent_stream,
stop_agent_tasks,
- list_all_agent_info_impl,
get_agent_id_by_name
)
+from services.agent_version_service import list_published_agents_impl
from services.conversation_management_service import (
save_conversation_user,
get_conversation_list_service,
create_new_conversation,
update_conversation_title as update_conversation_title_service,
)
+from services.file_management_service import upload_to_minio, resolve_minio_upload_folder, validate_urls_access
+from database.attachment_db import get_file_url, get_file_size_from_minio
+from nexent.multi_modal.utils import parse_s3_url
logger = logging.getLogger("northbound_service")
@@ -39,6 +48,188 @@ class NorthboundContext:
token_id: int = 0
+def _build_northbound_file_descriptor(
+ upload_result: Dict[str, Any],
+ original_file_name: str = "",
+ file_type: Optional[str] = None,
+ file_size: Optional[int] = None,
+) -> Dict[str, Any]:
+ """Normalize upload metadata for northbound API consumers."""
+ object_name = str(upload_result.get("object_name") or "").strip()
+ # Use original filename if provided, otherwise fall back to upload result or object name
+ if original_file_name:
+ file_name = original_file_name
+ else:
+ file_name = str(upload_result.get("file_name") or basename(object_name) or "")
+ # Frontend-compatible field order
+ descriptor = {
+ "object_name": object_name,
+ "name": file_name,
+ "type": file_type or "file",
+ # Use provided file_size, or from upload_result, or 0 as fallback
+ "size": file_size if file_size is not None else upload_result.get("file_size", 0),
+ # Use relative URL format matching frontend: /nexent/{object_name}
+ "url": f"/nexent/{object_name}",
+ "description": "",
+ }
+ presigned_url = upload_result.get("presigned_url")
+ if presigned_url:
+ descriptor["presigned_url"] = presigned_url
+ return descriptor
+
+
+async def upload_files_for_northbound(
+ ctx: NorthboundContext,
+ files: List[UploadFile],
+ folder: str = "attachments",
+) -> Dict[str, Any]:
+ """Upload files for northbound callers and return reusable storage references."""
+ if not files:
+ raise ValueError("No files in the request")
+
+ actual_folder = resolve_minio_upload_folder(folder, ctx.user_id, ctx.tenant_id)
+ results = await upload_to_minio(files=files, folder=actual_folder)
+ normalized_files = []
+ for result, upload_file in zip(results, files):
+ if result.get("success") and result.get("object_name"):
+ content_type = result.get("content_type", "")
+ file_type = "image" if content_type.startswith("image/") else "file"
+ # Extract original filename - use upload result first, then fallback to UploadFile
+ # The upload result contains the original filename passed to upload_fileobj
+ original_file_name = result.get("original_file_name") or upload_file.filename or ""
+ file_size = result.get("file_size", 0)
+ # If file_size is 0 but we have the UploadFile, try to get size from headers
+ if file_size == 0 and hasattr(upload_file, 'size') and upload_file.size:
+ file_size = upload_file.size
+ descriptor = _build_northbound_file_descriptor(
+ result,
+ original_file_name=original_file_name,
+ file_type=file_type,
+ file_size=file_size,
+ )
+ normalized_files.append(descriptor)
+
+ if not normalized_files:
+ raise ValueError("No valid files uploaded")
+
+ success_count = sum(1 for result in results if result.get("success", False))
+ failed_count = sum(1 for result in results if not result.get("success", False))
+
+ return {
+ "message": f"Processed {len(results)} files",
+ "requestId": ctx.request_id,
+ "summary": {
+ "total": len(results),
+ "uploaded": success_count,
+ "failed": failed_count,
+ },
+ "files": normalized_files,
+ }
+
+
+def _normalize_northbound_attachments(
+ attachments: Optional[List[Any]],
+ user_id: str,
+ tenant_id: str,
+) -> Optional[List[Dict[str, Any]]]:
+ """Convert northbound attachment references into internal minio_files objects.
+
+ Supports two formats:
+ 1. List of S3 URL strings (backward compatible): ["s3://nexent/...", "/nexent/...", "attachments/..."]
+ 2. List of attachment objects (full metadata): [{"object_name": "...", "name": "...", ...}]
+ """
+ from database.attachment_db import _build_mcp_presigned_url
+
+ if attachments is None:
+ return None
+ if not isinstance(attachments, list):
+ raise ValueError("attachments must be an array")
+
+ normalized_files: List[Dict[str, Any]] = []
+ for attachment in attachments:
+ # Handle dict format (full attachment object)
+ if isinstance(attachment, dict):
+ # Use the attachment dict directly, just ensure required fields
+ normalized_file = {
+ "object_name": attachment.get("object_name", ""),
+ "name": attachment.get("name", basename(attachment.get("object_name", ""))),
+ "type": attachment.get("type", "file"),
+ "size": attachment.get("size", 0),
+ "url": attachment.get("url", ""),
+ "description": attachment.get("description", ""),
+ }
+ # Add presigned_url if available, or generate one if we have object_name
+ if "presigned_url" in attachment:
+ normalized_file["presigned_url"] = attachment["presigned_url"]
+ elif normalized_file.get("object_name"):
+ try:
+ presigned_result = get_file_url(object_name=normalized_file["object_name"], expires=86400)
+ if presigned_result.get("success") and presigned_result.get("url"):
+ normalized_file["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"])
+ except Exception:
+ pass
+ normalized_files.append(normalized_file)
+ continue
+
+ # Handle string format (S3 URL)
+ if not isinstance(attachment, str) or not attachment.strip():
+ raise ValueError("attachments must contain non-empty S3 URLs or object paths")
+
+ attachment_url = attachment.strip()
+
+ # Support multiple URL formats:
+ # 1. s3://nexent/attachments/xxx.md
+ # 2. /nexent/attachments/xxx.md
+ # 3. attachments/xxx.md (relative path)
+ if attachment_url.startswith("s3://"):
+ try:
+ _, object_name = parse_s3_url(attachment_url)
+ except ValueError as exc:
+ raise ValueError(f"Invalid S3 URL format: {attachment_url}") from exc
+ validate_url = attachment_url
+ elif attachment_url.startswith("/nexent/"):
+ object_name = attachment_url[len("/nexent/"):]
+ validate_url = f"s3://nexent/{object_name}"
+ elif attachment_url.startswith("attachments/") or attachment_url.startswith("nexent/"):
+ object_name = attachment_url if attachment_url.startswith("nexent/") else attachment_url
+ validate_url = f"s3://nexent/{object_name}"
+ else:
+ raise ValueError(f"Invalid attachment format: {attachment_url}. Expected s3:// URL, /nexent/ path, or attachments/ path")
+
+ try:
+ validate_urls_access([validate_url], user_id, tenant_id)
+ presigned_result = get_file_url(object_name=object_name, expires=86400)
+ except PermissionError as exc:
+ detail = str(exc)
+ if "Invalid S3 URL format" in detail:
+ raise ValueError(detail) from exc
+ raise PermissionError(detail) from exc
+
+ # Get file size from MinIO
+ try:
+ file_size = get_file_size_from_minio(object_name)
+ except Exception:
+ file_size = 0
+
+ # Build frontend-compatible minio_files format
+ file_name = basename(object_name.rstrip("/"))
+ normalized_file = {
+ "object_name": object_name,
+ "name": file_name,
+ "type": "file",
+ "size": file_size,
+ # Use relative URL format matching frontend: /nexent/{object_name}
+ "url": f"/nexent/{object_name}",
+ "description": "",
+ }
+ # Use MCP proxy URL for presigned_url (same as frontend format)
+ if presigned_result.get("success") and presigned_result.get("url"):
+ normalized_file["presigned_url"] = _build_mcp_presigned_url(presigned_result["url"])
+ normalized_files.append(normalized_file)
+
+ return normalized_files
+
+
# -----------------------------
# In-memory idempotency and rate limit placeholders
# -----------------------------
@@ -111,6 +302,12 @@ def _build_idempotency_key(*parts: Any) -> str:
return ":".join(processed)
+def _build_title_update_idempotency_key(tenant_id: str, conversation_id: int, title: str) -> str:
+ """Build an ASCII-safe idempotency key for title updates."""
+ title_hash = hashlib.sha256(title.encode("utf-8")).hexdigest()
+ return _build_idempotency_key(tenant_id, str(conversation_id), title_hash)
+
+
# -----------------------------
# Agent resolver
# -----------------------------
@@ -126,7 +323,9 @@ async def start_streaming_chat(
conversation_id: Optional[int],
agent_name: str,
query: str,
+ attachments: Optional[List[Any]] = None,
meta_data: Optional[Dict[str, Any]] = None,
+ tool_params: Optional[ToolParamsRequest] = None,
idempotency_key: Optional[str] = None
) -> StreamingResponse:
try:
@@ -145,6 +344,11 @@ async def start_streaming_chat(
# Get history according to internal_conversation_id
history_resp = await get_conversation_history_internal(ctx, internal_conversation_id)
agent_id = await get_agent_id_by_name(agent_name=agent_name, tenant_id=ctx.tenant_id)
+ normalized_attachments = _normalize_northbound_attachments(
+ attachments=attachments,
+ user_id=ctx.user_id,
+ tenant_id=ctx.tenant_id,
+ )
# Idempotency: only prevent concurrent duplicate starts
composed_key = idempotency_key or _build_idempotency_key(ctx.tenant_id, str(conversation_id), agent_id, query)
await idempotency_start(composed_key)
@@ -153,8 +357,9 @@ async def start_streaming_chat(
agent_id=agent_id,
query=query,
history=(history_resp.get("data", {})).get("history", []),
- minio_files=None,
+ minio_files=normalized_attachments,
is_debug=False,
+ tool_params=tool_params,
)
# Synchronously persist the user message before starting the stream to avoid race conditions
@@ -257,15 +462,58 @@ async def list_conversations(ctx: NorthboundContext) -> Dict[str, Any]:
return {"message": "success", "data": conversations, "requestId": ctx.request_id}
+def _format_search_record(record: Dict[str, Any]) -> Dict[str, Any]:
+ """Format a search source record for API response."""
+ search_item = {
+ "title": record.get("source_title", ""),
+ "text": record.get("source_content", ""),
+ "source_type": record.get("source_type", ""),
+ "url": record.get("source_location", ""),
+ "filename": record.get("source_title", "") if record.get("source_type") == "file" else None,
+ "published_date": None,
+ "score": float(record["score_overall"]) if record.get("score_overall") is not None else None,
+ "tool_sign": record.get("tool_sign", ""),
+ "cite_index": record.get("cite_index")
+ }
+
+ if record.get("published_date"):
+ if hasattr(record["published_date"], "strftime"):
+ search_item["published_date"] = record["published_date"].strftime("%Y-%m-%d")
+ else:
+ search_item["published_date"] = str(record["published_date"])[:10]
+
+ return search_item
+
+
async def get_conversation_history_internal(ctx: NorthboundContext, conversation_id: int) -> Dict[str, Any]:
"""Internal helper to get conversation history without logging."""
history = get_conversation_messages(conversation_id)
- # Remove unnecessary fields
result = []
for message in history:
+ # Parse minio_files from database (stored as JSON string)
+ minio_files = []
+ raw_minio_files = message.get("minio_files")
+ if raw_minio_files:
+ try:
+ minio_files = json.loads(raw_minio_files) if isinstance(raw_minio_files, str) else raw_minio_files
+ except (json.JSONDecodeError, TypeError):
+ logger.warning(f"Failed to parse minio_files for message {message.get('message_id')}")
+
+ # Fetch search results for this message
+ message_id = message.get("message_id")
+ search_results = []
+ if message_id:
+ try:
+ search_records = get_source_searches_by_message(message_id, user_id=ctx.user_id)
+ search_results = [_format_search_record(r) for r in search_records]
+ except Exception as e:
+ logger.warning(f"Failed to get search records for message {message_id}: {str(e)}")
+
result.append({
"role": message["message_role"],
- "content": message["message_content"]
+ "content": message["message_content"],
+ "minio_files": minio_files,
+ "search": search_results
})
response = {
@@ -284,7 +532,18 @@ async def get_conversation_history(ctx: NorthboundContext, conversation_id: int)
async def get_agent_info_list(ctx: NorthboundContext) -> Dict[str, Any]:
try:
- agent_info_list = await list_all_agent_info_impl(tenant_id=ctx.tenant_id, user_id=ctx.user_id)
+ agent_info_list = await list_published_agents_impl(
+ tenant_id=ctx.tenant_id,
+ user_id=ctx.user_id,
+ )
+ # Match the same scope as /agent/published_list: non-asset-owner tenants
+ # also get the asset owner's published agents merged in.
+ if ctx.tenant_id != ASSET_OWNER_TENANT_ID:
+ asset_agent_list = await list_published_agents_impl(
+ tenant_id=ASSET_OWNER_TENANT_ID,
+ user_id=ctx.user_id,
+ )
+ agent_info_list.extend(asset_agent_list)
# Remove internal information that partner don't need
for agent_info in agent_info_list:
agent_info.pop("agent_id", None)
@@ -298,7 +557,11 @@ async def update_conversation_title(ctx: NorthboundContext, conversation_id: int
composed_key: Optional[str] = None
try:
# Idempotency: avoid concurrent duplicate title update for same conversation
- composed_key = idempotency_key or _build_idempotency_key(ctx.tenant_id, str(conversation_id), title)
+ composed_key = idempotency_key or _build_title_update_idempotency_key(
+ ctx.tenant_id,
+ conversation_id,
+ title,
+ )
await idempotency_start(composed_key)
update_conversation_title_service(conversation_id, title, ctx.user_id)
@@ -324,6 +587,8 @@ async def update_conversation_title(ctx: NorthboundContext, conversation_id: int
}
except LimitExceededError as _:
raise LimitExceededError("Duplicate request is still running, please wait.")
+ except ConversationNotFoundError:
+ raise
except Exception as e:
raise Exception(f"Failed to update conversation title for conversation_id {conversation_id}: {str(e)}")
finally:
diff --git a/backend/services/oauth_service.py b/backend/services/oauth_service.py
new file mode 100644
index 000000000..fe2aa0c42
--- /dev/null
+++ b/backend/services/oauth_service.py
@@ -0,0 +1,577 @@
+import json
+import logging
+import os
+import secrets
+import ssl
+import time
+import urllib.request
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlencode, quote
+
+import jwt
+from pydantic import EmailStr, TypeAdapter, ValidationError as PydanticValidationError
+
+from consts.const import (
+ ASSET_OWNER_INVITE_CODE_TYPE,
+ ASSET_OWNER_ROLE,
+ ASSET_OWNER_TENANT_ID,
+ DEFAULT_TENANT_ID,
+ OAUTH_CALLBACK_BASE_URL,
+ OAUTH_SSL_VERIFY,
+ OAUTH_CA_BUNDLE,
+ SUPABASE_JWT_SECRET,
+)
+from consts.exceptions import OAuthLinkError, OAuthProviderError
+from services.asset_owner_visibility import require_asset_owner_enabled
+from consts.oauth_providers import (
+ get_all_provider_definitions,
+ get_provider_definition,
+ is_provider_enabled,
+)
+from database.oauth_account_db import (
+ delete_oauth_account,
+ get_oauth_account_by_provider,
+ get_soft_deleted_oauth_account,
+ insert_oauth_account,
+ list_oauth_accounts_by_user_id,
+ reactivate_oauth_account,
+ update_oauth_account_tokens,
+)
+from database.user_tenant_db import get_user_tenant_by_user_id, insert_user_tenant
+
+logger = logging.getLogger(__name__)
+
+OAUTH_PENDING_EXPIRE_SECONDS = 10 * 60
+OAUTH_PENDING_PURPOSE = "oauth_account_completion"
+_EMAIL_ADAPTER = TypeAdapter(EmailStr)
+
+
+def _build_ssl_context() -> ssl.SSLContext:
+ if OAUTH_CA_BUNDLE and os.path.isfile(OAUTH_CA_BUNDLE):
+ return ssl.create_default_context(cafile=OAUTH_CA_BUNDLE)
+ if not OAUTH_SSL_VERIFY:
+ ctx = ssl.create_default_context()
+ ctx.check_hostname = False
+ ctx.verify_mode = ssl.CERT_NONE
+ return ctx
+ return ssl.create_default_context()
+
+
+_SSL_CTX = _build_ssl_context()
+
+
+def parse_state(state: str) -> Dict[str, str]:
+ parts = state.split(":", 2)
+ if len(parts) >= 2:
+ return {
+ "provider": parts[0],
+ "token": parts[1],
+ "link_user_id": parts[2] if len(parts) > 2 else "",
+ }
+ return {"provider": state, "token": "", "link_user_id": ""}
+
+
+def _resolve_field(data: dict, field_path: str) -> Any:
+ if "." not in field_path:
+ return data.get(field_path)
+ parts = field_path.split(".")
+ current = data
+ for part in parts:
+ if isinstance(current, dict):
+ current = current.get(part)
+ else:
+ return None
+ return current
+
+
+def get_supported_providers() -> set:
+ return set(get_all_provider_definitions().keys())
+
+
+def get_enabled_providers() -> List[Dict[str, str]]:
+ providers = []
+ for name, definition in get_all_provider_definitions().items():
+ if is_provider_enabled(definition):
+ providers.append(
+ {
+ "name": definition.name,
+ "display_name": definition.display_name,
+ "icon": definition.icon,
+ "enabled": True,
+ }
+ )
+ return providers
+
+
+def get_authorize_url(provider: str, link_user_id: str = "") -> str:
+ try:
+ definition = get_provider_definition(provider)
+ except KeyError:
+ raise OAuthProviderError(f"Unsupported OAuth provider: {provider}")
+
+ if not is_provider_enabled(definition):
+ raise OAuthProviderError(f"OAuth provider '{provider}' is not configured")
+
+ callback_url = (
+ f"{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider={provider}"
+ )
+ random_token = secrets.token_urlsafe(32)
+ if link_user_id:
+ state = f"{provider}:{random_token}:{link_user_id}"
+ else:
+ state = f"{provider}:{random_token}"
+
+ client_id = os.getenv(definition.client_id_env, "")
+ redirect_uri = (
+ quote(callback_url, safe="") if definition.encode_redirect_uri else callback_url
+ )
+
+ params = dict(definition.authorize_params)
+ param_map = definition.authorize_param_map
+ params[param_map.get("client_id", "client_id")] = client_id
+ params[param_map.get("redirect_uri", "redirect_uri")] = redirect_uri
+ params[param_map.get("state", "state")] = state
+
+ url = f"{definition.authorize_url}?{urlencode(params)}"
+ if definition.authorize_fragment:
+ url += definition.authorize_fragment
+ return url
+
+
+def _http_post_json(url: str, data: dict, headers: Optional[dict] = None) -> dict:
+ req_data = json.dumps(data).encode("utf-8")
+ req_headers = {"Content-Type": "application/json", "Accept": "application/json"}
+ if headers:
+ req_headers.update(headers)
+ req = urllib.request.Request(url, data=req_data, headers=req_headers, method="POST")
+ with urllib.request.urlopen(req, timeout=15, context=_SSL_CTX) as resp:
+ return json.loads(resp.read().decode("utf-8"))
+
+
+def _http_get_json(url: str, headers: Optional[dict] = None) -> dict:
+ req = urllib.request.Request(url, headers=headers or {})
+ with urllib.request.urlopen(req, timeout=15, context=_SSL_CTX) as resp:
+ return json.loads(resp.read().decode("utf-8"))
+
+
+def exchange_code_for_provider_token(provider: str, code: str) -> Dict[str, Any]:
+ try:
+ definition = get_provider_definition(provider)
+ except KeyError:
+ raise OAuthProviderError(f"Unsupported provider: {provider}")
+
+ client_id = os.getenv(definition.client_id_env, "")
+ client_secret = os.getenv(definition.client_secret_env, "")
+ callback_url = (
+ f"{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider={provider}"
+ )
+ redirect_uri = (
+ quote(callback_url, safe="") if definition.encode_redirect_uri else callback_url
+ )
+
+ param_map = definition.token_params_map
+
+ result: Dict[str, Any] = {"access_token": ""}
+
+ if definition.token_method.upper() == "POST":
+ body = dict(definition.token_extra_params)
+ body[param_map.get("client_id", "client_id")] = client_id
+ body[param_map.get("client_secret", "client_secret")] = client_secret
+ body[param_map.get("code", "code")] = code
+ body.setdefault(param_map.get("grant_type", "grant_type"), "authorization_code")
+ if param_map.get("redirect_uri", "") == "redirect_uri":
+ body["redirect_uri"] = redirect_uri
+
+ resp = _http_post_json(definition.token_url, data=body)
+ else:
+ params = dict(definition.token_extra_params)
+ params[param_map.get("client_id", "client_id")] = client_id
+ params[param_map.get("client_secret", "client_secret")] = client_secret
+ params[param_map.get("code", "code")] = code
+ params[param_map.get("grant_type", "grant_type")] = "authorization_code"
+ if param_map.get("redirect_uri", "") == "redirect_uri":
+ params["redirect_uri"] = redirect_uri
+
+ resp = _http_get_json(f"{definition.token_url}?{urlencode(params)}")
+
+ if definition.token_error_key and definition.token_error_key in resp:
+ err_msg = resp.get(
+ definition.token_error_message_key, str(resp[definition.token_error_key])
+ )
+ raise OAuthProviderError(f"{provider} token exchange failed: {err_msg}")
+
+ result["access_token"] = resp["access_token"]
+ if definition.token_response_id_key:
+ result["openid"] = resp.get(definition.token_response_id_key, "")
+
+ return result
+
+
+def get_provider_user_info(
+ provider: str, access_token: str, **kwargs: Any
+) -> Dict[str, Any]:
+ try:
+ definition = get_provider_definition(provider)
+ except KeyError:
+ raise OAuthProviderError(f"Unsupported provider: {provider}")
+
+ headers: Dict[str, str] = {"Accept": "application/json"}
+ if definition.userinfo_auth_scheme and access_token:
+ headers["Authorization"] = f"{definition.userinfo_auth_scheme} {access_token}"
+
+ url_params = {}
+ for key, value in definition.userinfo_params.items():
+ resolved = value.format(
+ openid=kwargs.get("openid", ""), access_token=access_token
+ )
+ url_params[key] = resolved
+
+ query = urlencode(url_params) if url_params else ""
+ separator = (
+ "&" if "?" in definition.userinfo_url and query else ("?" if query else "")
+ )
+ url = f"{definition.userinfo_url}{separator}{query}"
+
+ user_resp = _http_get_json(url, headers=headers)
+
+ field_map = definition.userinfo_field_map
+ result = {}
+ for our_key, provider_key in field_map.items():
+ if provider_key:
+ result[our_key] = _resolve_field(user_resp, provider_key) or ""
+ else:
+ result[our_key] = ""
+ result["id"] = str(result.get("id", ""))
+
+ if definition.userinfo_needs_email_fetch and not result.get("email"):
+ try:
+ emails_resp = _http_get_json(
+ definition.userinfo_email_url,
+ headers={"Authorization": f"Bearer {access_token}"},
+ )
+ if isinstance(emails_resp, list) and emails_resp:
+ primary = next(
+ (e for e in emails_resp if e.get("primary")),
+ emails_resp[0],
+ )
+ result["email"] = primary.get("email", "")
+ except Exception:
+ logger.warning(f"Failed to fetch {provider} user emails")
+
+ return result
+
+
+def generate_pending_oauth_token(
+ provider: str,
+ provider_user_id: str,
+ provider_email: Optional[str] = None,
+ provider_username: Optional[str] = None,
+ expires_in: int = OAUTH_PENDING_EXPIRE_SECONDS,
+) -> str:
+ if not SUPABASE_JWT_SECRET:
+ raise OAuthProviderError("JWT verification is not configured")
+
+ now = int(time.time())
+ payload = {
+ "purpose": OAUTH_PENDING_PURPOSE,
+ "provider": provider,
+ "provider_user_id": provider_user_id,
+ "provider_email": provider_email or "",
+ "provider_username": provider_username or "",
+ "iat": now,
+ "exp": now + expires_in,
+ }
+ return jwt.encode(payload, SUPABASE_JWT_SECRET, algorithm="HS256")
+
+
+def parse_pending_oauth_token(pending_token: str) -> Dict[str, str]:
+ if not pending_token:
+ raise OAuthLinkError("OAuth account completion session is missing")
+ if not SUPABASE_JWT_SECRET:
+ raise OAuthProviderError("JWT verification is not configured")
+
+ try:
+ payload = jwt.decode(
+ pending_token,
+ SUPABASE_JWT_SECRET,
+ algorithms=["HS256"],
+ options={"verify_exp": True, "verify_aud": False},
+ )
+ except jwt.ExpiredSignatureError as exc:
+ raise OAuthLinkError("OAuth account completion session has expired") from exc
+ except jwt.InvalidTokenError as exc:
+ raise OAuthLinkError("OAuth account completion session is invalid") from exc
+
+ if payload.get("purpose") != OAUTH_PENDING_PURPOSE:
+ raise OAuthLinkError("OAuth account completion session is invalid")
+ if not payload.get("provider") or not payload.get("provider_user_id"):
+ raise OAuthLinkError("OAuth account completion session is incomplete")
+
+ return {
+ "provider": str(payload.get("provider", "")),
+ "provider_user_id": str(payload.get("provider_user_id", "")),
+ "provider_email": str(payload.get("provider_email", "")),
+ "provider_username": str(payload.get("provider_username", "")),
+ }
+
+
+def get_pending_oauth_info(pending_token: str) -> Dict[str, Any]:
+ payload = parse_pending_oauth_token(pending_token)
+ provider_email = payload.get("provider_email") or ""
+ return {
+ "provider": payload["provider"],
+ "provider_username": payload.get("provider_username") or "",
+ "provider_email": provider_email,
+ "email_required": not bool(provider_email),
+ }
+
+
+def _validate_email(email: Optional[str]) -> str:
+ if not email:
+ raise OAuthLinkError("Email is required")
+ try:
+ return str(_EMAIL_ADAPTER.validate_python(email)).lower()
+ except PydanticValidationError as exc:
+ raise OAuthLinkError("Invalid email address") from exc
+
+
+def find_supabase_user_id_by_email(
+ admin_client: Any, email: Optional[str]
+) -> Optional[str]:
+ if not email:
+ return None
+
+ page = 1
+ while True:
+ users_resp = admin_client.auth.admin.list_users(page=page, per_page=100)
+ users = getattr(users_resp, "users", users_resp)
+ if users is None:
+ users = []
+ if not users:
+ return None
+ for user in users:
+ user_email = getattr(user, "email", "")
+ if user_email and user_email.lower() == email.lower():
+ return user.id
+ if len(users) < 100:
+ return None
+ page += 1
+
+
+def _role_from_invitation_type(code_type: str) -> str:
+ if code_type == "ADMIN_INVITE":
+ return "ADMIN"
+ if code_type == "DEV_INVITE":
+ return "DEV"
+ if code_type == ASSET_OWNER_INVITE_CODE_TYPE:
+ require_asset_owner_enabled()
+ return ASSET_OWNER_ROLE
+ return "USER"
+
+
+async def complete_pending_oauth_account(
+ pending_token: str,
+ password: str,
+ invite_code: str,
+ email: Optional[str] = None,
+) -> Dict[str, Any]:
+ from services.group_service import add_user_to_groups
+ from services.invitation_service import (
+ check_invitation_available,
+ get_invitation_by_code,
+ use_invitation_code,
+ )
+ from services.tool_configuration_service import init_tool_list_for_tenant
+ from services.user_management_service import generate_tts_stt_4_admin
+ from utils.auth_utils import calculate_expires_at, generate_session_jwt
+
+ pending = parse_pending_oauth_token(pending_token)
+ provider = pending["provider"]
+ provider_user_id = pending["provider_user_id"]
+ provider_email = pending.get("provider_email") or ""
+ provider_username = pending.get("provider_username") or ""
+
+ if len(password or "") < 6:
+ raise OAuthLinkError("Password must be at least 6 characters")
+
+ final_email = _validate_email(provider_email or email)
+ normalized_invite_code = invite_code.upper()
+
+ if get_oauth_account_by_provider(provider, provider_user_id):
+ raise OAuthLinkError(f"This {provider} account is already bound to another user")
+
+ if not check_invitation_available(normalized_invite_code):
+ raise OAuthLinkError("Invitation code is invalid or unavailable")
+
+ invitation_info = get_invitation_by_code(normalized_invite_code)
+ if not invitation_info:
+ raise OAuthLinkError("Invitation code is invalid or unavailable")
+
+ admin_client = None
+ try:
+ from utils.auth_utils import get_supabase_admin_client
+
+ admin_client = get_supabase_admin_client()
+ except Exception:
+ admin_client = None
+ if not admin_client:
+ raise RuntimeError("Supabase admin client not available")
+
+ existing_user_id = find_supabase_user_id_by_email(admin_client, final_email)
+ if existing_user_id:
+ raise OAuthLinkError(
+ "Email already exists. Please log in with email and password, "
+ "then link this OAuth account in settings."
+ )
+
+ create_resp = admin_client.auth.admin.create_user(
+ {
+ "email": final_email,
+ "password": password,
+ "email_confirm": True,
+ "user_metadata": {
+ "full_name": provider_username,
+ "provider": provider,
+ },
+ }
+ )
+ supabase_user_id = create_resp.user.id
+
+ tenant_id = invitation_info["tenant_id"]
+ if invitation_info.get("code_type") == ASSET_OWNER_INVITE_CODE_TYPE:
+ tenant_id = ASSET_OWNER_TENANT_ID
+ user_role = _role_from_invitation_type(invitation_info.get("code_type", "USER_INVITE"))
+ is_asset_owner_registration = user_role == ASSET_OWNER_ROLE
+
+ insert_user_tenant(
+ user_id=supabase_user_id,
+ tenant_id=tenant_id,
+ user_role=user_role,
+ user_email=final_email,
+ )
+
+ invitation_result = use_invitation_code(normalized_invite_code, supabase_user_id)
+ group_ids = invitation_result.get("group_ids", [])
+ if isinstance(group_ids, str):
+ from utils.str_utils import convert_string_to_list
+
+ group_ids = convert_string_to_list(group_ids)
+ if group_ids and not is_asset_owner_registration:
+ add_user_to_groups(supabase_user_id, group_ids, supabase_user_id)
+
+ if user_role == "ADMIN":
+ await generate_tts_stt_4_admin(tenant_id, supabase_user_id)
+ if not is_asset_owner_registration:
+ await init_tool_list_for_tenant(tenant_id, supabase_user_id)
+
+ create_or_update_oauth_account(
+ user_id=supabase_user_id,
+ provider=provider,
+ provider_user_id=provider_user_id,
+ email=final_email,
+ username=provider_username,
+ tenant_id=tenant_id,
+ )
+
+ expiry_seconds = 3600
+ jwt_token = generate_session_jwt(supabase_user_id, expires_in=expiry_seconds)
+ expires_at = calculate_expires_at(jwt_token)
+
+ return {
+ "user": {
+ "id": str(supabase_user_id),
+ "email": final_email,
+ "role": user_role,
+ },
+ "session": {
+ "access_token": jwt_token,
+ "refresh_token": "",
+ "expires_at": expires_at,
+ "expires_in_seconds": expiry_seconds,
+ },
+ }
+
+
+def create_or_update_oauth_account(
+ user_id: str,
+ provider: str,
+ provider_user_id: str,
+ email: Optional[str] = None,
+ username: Optional[str] = None,
+ tenant_id: Optional[str] = None,
+) -> Dict[str, Any]:
+ existing = get_oauth_account_by_provider(provider, provider_user_id)
+
+ if existing:
+ if existing.get("user_id") != user_id:
+ raise OAuthLinkError(
+ f"This {provider} account is already bound to another user"
+ )
+ else:
+ update_oauth_account_tokens(
+ provider=provider,
+ provider_user_id=provider_user_id,
+ provider_username=username,
+ )
+ updated = get_oauth_account_by_provider(provider, provider_user_id)
+ return updated if updated else existing
+
+ soft_deleted = get_soft_deleted_oauth_account(provider, provider_user_id)
+ if soft_deleted:
+ reactivate_oauth_account(
+ provider=provider,
+ provider_user_id=provider_user_id,
+ user_id=user_id,
+ provider_email=email,
+ provider_username=username,
+ tenant_id=tenant_id or DEFAULT_TENANT_ID,
+ )
+ reactivated = get_oauth_account_by_provider(provider, provider_user_id)
+ return reactivated if reactivated else {"provider": provider, "provider_user_id": provider_user_id, "user_id": user_id}
+
+ return insert_oauth_account(
+ user_id=user_id,
+ provider=provider,
+ provider_user_id=provider_user_id,
+ provider_email=email,
+ provider_username=username,
+ tenant_id=tenant_id or DEFAULT_TENANT_ID,
+ )
+
+
+def ensure_user_tenant_exists(user_id: str, email: str) -> Dict[str, Any]:
+ existing = get_user_tenant_by_user_id(user_id)
+ if existing:
+ return existing
+
+ insert_user_tenant(
+ user_id=user_id,
+ tenant_id=DEFAULT_TENANT_ID,
+ user_role="USER",
+ user_email=email,
+ )
+ logger.info(f"Created user_tenant for new OAuth user {user_id}")
+ result = get_user_tenant_by_user_id(user_id)
+ return result if result else {"user_id": user_id, "tenant_id": DEFAULT_TENANT_ID}
+
+
+def list_linked_accounts(user_id: str) -> List[Dict[str, Any]]:
+ accounts = list_oauth_accounts_by_user_id(user_id)
+ result = []
+ for acct in accounts:
+ result.append(
+ {
+ "provider": acct["provider"],
+ "provider_username": acct.get("provider_username"),
+ "provider_email": acct.get("provider_email"),
+ "linked_at": str(acct.get("create_time", "")),
+ }
+ )
+ return result
+
+
+def unlink_account(user_id: str, provider: str) -> bool:
+ success = delete_oauth_account(user_id, provider)
+ if not success:
+ raise OAuthLinkError(f"No linked {provider} account found")
+ return True
diff --git a/backend/services/prompt_service.py b/backend/services/prompt_service.py
index 3706c3cc5..f1564cdbc 100644
--- a/backend/services/prompt_service.py
+++ b/backend/services/prompt_service.py
@@ -1,18 +1,22 @@
import json
import logging
import queue
+import sys
import threading
from typing import Optional, List
from jinja2 import StrictUndefined, Template
-from consts.const import LANGUAGE
+from consts.const import LANGUAGE, ENABLE_JIUWEN_SDK
from consts.error_code import ErrorCode
from consts.error_message import ErrorMessage
from consts.exceptions import AppException
+from consts.model import AgentInfoRequest
from database.agent_db import search_agent_info_by_agent_id, query_all_agent_info_by_tenant_id, \
query_sub_agents_id_list
-from database.tool_db import query_tools_by_ids
+from database.model_management_db import get_model_by_model_id
+from database.knowledge_db import get_knowledge_name_map_by_index_names
+from database.tool_db import query_tools_by_ids, query_tool_instances_by_id
from services.agent_service import (
get_enable_tool_id_by_agent_id,
_check_agent_name_duplicate,
@@ -20,16 +24,49 @@
_regenerate_agent_name_with_llm,
_regenerate_agent_display_name_with_llm,
_generate_unique_agent_name_with_suffix,
- _generate_unique_display_name_with_suffix
+ _generate_unique_display_name_with_suffix,
+ update_agent,
)
+from services.prompt_template_service import resolve_prompt_generate_template
from utils.llm_utils import call_llm_for_system_prompt
-from utils.prompt_template_utils import get_prompt_generate_prompt_template
+from utils.prompt_template_utils import (
+ get_prompt_optimize_prompt_template,
+ get_prompt_template,
+)
+
+from dataclasses import dataclass, field
+from typing import Optional as Opt
+
+from adapters.exception import JiuwenSDKError, NexentCapabilityError
+
+
+def _get_jiuwen_adapter_class():
+ """Import Jiuwen adapter only when optimization paths need it."""
+ try:
+ from adapters import JiuwenSDKAdapter
+ except ModuleNotFoundError:
+ return None
+ return JiuwenSDKAdapter
+
# Configure logging
logger = logging.getLogger("prompt_service")
+PROMPT_SECTION_TYPE_TITLES = {
+ LANGUAGE["ZH"]: {
+ "duty": "智能体角色",
+ "constraint": "使用要求",
+ "few_shots": "示例",
+ },
+ LANGUAGE["EN"]: {
+ "duty": "Agent Role",
+ "constraint": "Usage Requirements",
+ "few_shots": "Few Shots",
+ },
+}
+
-def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: str, user_id: str, tenant_id: str, language: str, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None):
+def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description: str, user_id: str, tenant_id: str, language: str, prompt_template_id: Optional[int] = None, tool_ids: Optional[List[int]] = None, sub_agent_ids: Optional[List[int]] = None, knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True):
try:
for system_prompt in generate_and_save_system_prompt_impl(
agent_id=agent_id,
@@ -38,8 +75,11 @@ def gen_system_prompt_streamable(agent_id: int, model_id: int, task_description:
user_id=user_id,
tenant_id=tenant_id,
language=language,
+ prompt_template_id=prompt_template_id,
tool_ids=tool_ids,
- sub_agent_ids=sub_agent_ids
+ sub_agent_ids=sub_agent_ids,
+ knowledge_base_display_names=knowledge_base_display_names,
+ has_selected_resources=has_selected_resources,
):
# SSE format, each message ends with \n\n
yield f"data: {json.dumps({'success': True, 'data': system_prompt}, ensure_ascii=False)}\n\n"
@@ -62,8 +102,11 @@ def generate_and_save_system_prompt_impl(agent_id: int,
user_id: str,
tenant_id: str,
language: str,
+ prompt_template_id: Optional[int] = None,
tool_ids: Optional[List[int]] = None,
- sub_agent_ids: Optional[List[int]] = None):
+ sub_agent_ids: Optional[List[int]] = None,
+ knowledge_base_display_names: Optional[List[str]] = None,
+ has_selected_resources: bool = True):
# Get description of tool and agent from frontend-provided IDs
# Frontend always provides tool_ids and sub_agent_ids (could be empty arrays)
@@ -77,6 +120,20 @@ def generate_and_save_system_prompt_impl(agent_id: int,
tool_info_list = get_enabled_tool_description_for_generate_prompt(
tenant_id=tenant_id, agent_id=agent_id)
+ # Get knowledge base display names for few-shot examples
+ # Priority: frontend-provided > database query
+ if knowledge_base_display_names:
+ logger.debug(
+ f"Using frontend-provided knowledge base display names: {knowledge_base_display_names}")
+ else:
+ knowledge_base_display_names = get_knowledge_base_display_names(
+ tool_info_list=tool_info_list,
+ agent_id=agent_id,
+ tenant_id=tenant_id
+ )
+ logger.debug(
+ f"Using database query for knowledge base display names: {knowledge_base_display_names}")
+
# Handle sub-agent IDs
if sub_agent_ids and len(sub_agent_ids) > 0:
sub_agent_info_list = []
@@ -95,9 +152,21 @@ def generate_and_save_system_prompt_impl(agent_id: int,
sub_agent_info_list = get_enabled_sub_agent_description_for_generate_prompt(
tenant_id=tenant_id, agent_id=agent_id)
+ # Re-evaluate has_selected_resources based on the actual resolved lists.
+ # The frontend value indicates user intent, but after resolving tool_ids/sub_agent_ids
+ # the actual lists are the source of truth. If both lists are empty, constraint and
+ # few_shots sections have no meaningful content to generate, so we force False.
+ has_selected_resources = bool(tool_info_list or sub_agent_info_list)
+ logger.info(
+ "Resolved resource availability: tools=%d, sub_agents=%d, has_selected_resources=%s",
+ len(tool_info_list),
+ len(sub_agent_info_list),
+ has_selected_resources,
+ )
+
# 1. Real-time streaming push
final_results = {"duty": "", "constraint": "", "few_shots": "", "agent_var_name": "", "agent_display_name": "",
- "agent_description": ""}
+ "agent_description": "", "greeting_message": "", "example_questions": ""}
# Get all existing agent names and display names for duplicate checking (only if not in create mode)
all_agents = query_all_agent_info_by_tenant_id(tenant_id)
@@ -113,8 +182,18 @@ def generate_and_save_system_prompt_impl(agent_id: int,
]
# Collect results and yield non-name fields immediately, but hold name fields for duplicate checking
- for result_data in generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id,
- model_id, language):
+ for result_data in generate_system_prompt(
+ sub_agent_info_list,
+ task_description,
+ tool_info_list,
+ tenant_id,
+ user_id,
+ model_id,
+ language,
+ prompt_template_id,
+ knowledge_base_display_names,
+ has_selected_resources
+ ):
result_type = result_data["type"]
final_results[result_type] = result_data["content"]
@@ -133,7 +212,8 @@ def generate_and_save_system_prompt_impl(agent_id: int,
exclude_agent_id=agent_id,
agents_cache=all_agents
):
- logger.info(f"Agent name '{agent_name}' already exists, regenerating with LLM")
+ logger.info(
+ f"Agent name '{agent_name}' already exists, regenerating with LLM")
try:
agent_name = _regenerate_agent_name_with_llm(
original_name=agent_name,
@@ -143,12 +223,16 @@ def generate_and_save_system_prompt_impl(agent_id: int,
tenant_id=tenant_id,
language=language,
agents_cache=all_agents,
- exclude_agent_id=agent_id
+ exclude_agent_id=agent_id,
+ prompt_template_id=prompt_template_id,
+ user_id=user_id,
)
- logger.info(f"Regenerated agent name: '{agent_name}'")
+ logger.info(
+ f"Regenerated agent name: '{agent_name}'")
final_results["agent_var_name"] = agent_name
except Exception as e:
- logger.error(f"Failed to regenerate agent name with LLM: {str(e)}, using fallback")
+ logger.error(
+ f"Failed to regenerate agent name with LLM: {str(e)}, using fallback")
# Fallback: add suffix
agent_name = _generate_unique_agent_name_with_suffix(
agent_name,
@@ -174,7 +258,8 @@ def generate_and_save_system_prompt_impl(agent_id: int,
exclude_agent_id=agent_id,
agents_cache=all_agents
):
- logger.info(f"Agent display_name '{agent_display_name}' already exists, regenerating with LLM")
+ logger.info(
+ f"Agent display_name '{agent_display_name}' already exists, regenerating with LLM")
try:
agent_display_name = _regenerate_agent_display_name_with_llm(
original_display_name=agent_display_name,
@@ -184,12 +269,16 @@ def generate_and_save_system_prompt_impl(agent_id: int,
tenant_id=tenant_id,
language=language,
agents_cache=all_agents,
- exclude_agent_id=agent_id
+ exclude_agent_id=agent_id,
+ prompt_template_id=prompt_template_id,
+ user_id=user_id,
)
- logger.info(f"Regenerated agent display_name: '{agent_display_name}'")
+ logger.info(
+ f"Regenerated agent display_name: '{agent_display_name}'")
final_results["agent_display_name"] = agent_display_name
except Exception as e:
- logger.error(f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback")
+ logger.error(
+ f"Failed to regenerate agent display_name with LLM: {str(e)}, using fallback")
# Fallback: add suffix
agent_display_name = _generate_unique_display_name_with_suffix(
agent_display_name,
@@ -222,10 +311,159 @@ def generate_and_save_system_prompt_impl(agent_id: int,
if not has_content:
raise Exception("Failed to generate prompt content.")
+ # 3. Generate greeting message and example questions
+ try:
+ greeting_template = get_prompt_template('greeting_generate', language)
+ greeting_system_prompt = greeting_template.get("GREETING_SYSTEM_PROMPT", "")
+ greeting_user_prompt_template = greeting_template.get("USER_PROMPT", "")
+
+ greeting_user_prompt = Template(greeting_user_prompt_template, undefined=StrictUndefined).render({
+ "display_name": final_results.get("agent_display_name", ""),
+ "duty_description": final_results.get("duty", ""),
+ "business_description": task_description,
+ "few_shots": final_results.get("few_shots", ""),
+ })
+
+ greeting_result = call_llm_for_system_prompt(
+ model_id=model_id,
+ user_prompt=greeting_user_prompt,
+ system_prompt=greeting_system_prompt,
+ tenant_id=tenant_id,
+ )
+
+ parsed = None
+ try:
+ json_start = greeting_result.find("{")
+ json_end = greeting_result.rfind("}") + 1
+ if json_start >= 0 and json_end > json_start:
+ parsed = json.loads(greeting_result[json_start:json_end])
+ except json.JSONDecodeError:
+ logger.warning(f"Failed to parse greeting JSON from LLM output: {greeting_result}")
+
+ if parsed and "greeting_message" in parsed and "example_questions" in parsed:
+ greeting_message = parsed["greeting_message"]
+ example_questions = parsed["example_questions"]
+ if isinstance(example_questions, list) and len(example_questions) > 6:
+ example_questions = example_questions[:6]
+ else:
+ greeting_message = greeting_result.strip() if greeting_result else ""
+ example_questions = []
+
+ yield {
+ "type": "greeting_message",
+ "content": greeting_message,
+ "is_complete": True
+ }
+ yield {
+ "type": "example_questions",
+ "content": json.dumps(example_questions, ensure_ascii=False),
+ "is_complete": True
+ }
+
+ final_results["greeting_message"] = greeting_message
+ final_results["example_questions"] = json.dumps(example_questions, ensure_ascii=False)
+
+ # Update agent with greeting (skip in create mode)
+ if agent_id != 0:
+ update_agent(agent_id, AgentInfoRequest(
+ agent_id=agent_id,
+ greeting_message=greeting_message,
+ example_questions=example_questions,
+ ), user_id)
+ except Exception as e:
+ logger.warning(f"Greeting generation failed: {str(e)}, skipping greeting")
+
+def optimize_prompt_section_impl(
+ agent_id: int,
+ model_id: int,
+ task_description: str,
+ tenant_id: str,
+ language: str,
+ section_type: str,
+ section_title: str,
+ current_content: str,
+ feedback: str,
+ tool_ids: Optional[List[int]] = None,
+ sub_agent_ids: Optional[List[int]] = None,
+ knowledge_base_display_names: Optional[List[str]] = None,
+) -> dict:
+ normalized_section_type = (section_type or "").strip()
+ if normalized_section_type not in {"duty", "constraint", "few_shots"}:
+ raise AppException(
+ ErrorCode.COMMON_PARAMETER_INVALID,
+ "Unsupported prompt section type."
+ )
+
+ if not (current_content or "").strip():
+ raise AppException(
+ ErrorCode.COMMON_MISSING_REQUIRED_FIELD,
+ "Current section content is required."
+ )
-def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id: str, model_id: int, language: str = LANGUAGE["ZH"]):
+ if not (feedback or "").strip():
+ raise AppException(
+ ErrorCode.COMMON_MISSING_REQUIRED_FIELD,
+ "Optimization feedback is required."
+ )
+
+ tool_info_list = _resolve_prompt_generation_tools(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ tool_ids=tool_ids,
+ )
+ knowledge_base_display_names = _resolve_knowledge_base_display_names(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ tool_info_list=tool_info_list,
+ knowledge_base_display_names=knowledge_base_display_names,
+ )
+ sub_agent_info_list = _resolve_prompt_generation_sub_agents(
+ agent_id=agent_id,
+ tenant_id=tenant_id,
+ sub_agent_ids=sub_agent_ids,
+ )
+
+ prompt_template = get_prompt_optimize_prompt_template(language)
+ prompt_context = join_info_for_optimize_prompt_section(
+ prompt_for_optimize=prompt_template,
+ section_type=normalized_section_type,
+ section_title=section_title or _default_prompt_section_title(
+ normalized_section_type, language),
+ task_description=task_description,
+ current_content=current_content,
+ feedback=feedback,
+ tool_info_list=tool_info_list,
+ sub_agent_info_list=sub_agent_info_list,
+ language=language,
+ knowledge_base_display_names=knowledge_base_display_names,
+ )
+
+ optimized_content = call_llm_for_system_prompt(
+ model_id=model_id,
+ user_prompt=prompt_context,
+ system_prompt=prompt_template["OPTIMIZE_SYSTEM_PROMPT"],
+ tenant_id=tenant_id,
+ ).strip()
+
+ if not optimized_content:
+ raise AppException(ErrorCode.MODEL_PROMPT_GENERATION_FAILED)
+
+ return {
+ "section_type": normalized_section_type,
+ "section_title": section_title or _default_prompt_section_title(normalized_section_type, language),
+ "original_content": current_content,
+ "optimized_content": optimized_content,
+ }
+
+
+def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list, tenant_id: str, user_id: str, model_id: int, language: str = LANGUAGE["ZH"], prompt_template_id: Optional[int] = None, knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True):
"""Main function for generating system prompts"""
- prompt_for_generate = get_prompt_generate_prompt_template(language)
+ prompt_for_generate = resolve_prompt_generate_template(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ language=language,
+ prompt_template_id=prompt_template_id,
+ )
# Prepare content for generating system prompts
content = join_info_for_generate_system_prompt(
@@ -233,7 +471,9 @@ def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list
sub_agent_info_list=sub_agent_info_list,
task_description=task_description,
tool_info_list=tool_info_list,
- language=language
+ language=language,
+ knowledge_base_display_names=knowledge_base_display_names,
+ has_selected_resources=has_selected_resources,
)
# Initialize state
@@ -243,19 +483,111 @@ def generate_system_prompt(sub_agent_info_list, task_description, tool_info_list
stop_flags = {"duty": False, "constraint": False, "few_shots": False,
"agent_var_name": False, "agent_display_name": False, "agent_description": False}
- # Start all generation threads
+ # Get model concurrency limit to control the number of concurrent LLM calls
+ # If None or >= 6, no limit (all 6 calls run concurrently)
+ # If < 6, use semaphore to limit concurrent calls
+ model_config = get_model_by_model_id(model_id, tenant_id)
+ concurrency_limit = model_config.get(
+ "concurrency_limit") if model_config else None
+
+ # Start all generation threads with concurrency control
threads, error_holder = _start_generation_threads(
- content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id)
+ content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id,
+ has_selected_resources,
+ concurrency_limit=concurrency_limit
+ )
# Stream results
yield from _stream_results(produce_queue, latest, stop_flags, threads, error_holder)
-def _start_generation_threads(content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id):
- """Start all prompt generation threads"""
+def _resolve_prompt_generation_tools(
+ agent_id: int,
+ tenant_id: str,
+ tool_ids: Optional[List[int]] = None,
+) -> List[dict]:
+ if tool_ids and len(tool_ids) > 0:
+ logger.debug(f"Using frontend-provided tool IDs: {tool_ids}")
+ return query_tools_by_ids(tool_ids)
+
+ logger.debug("No tools selected (empty tool_ids list)")
+ return get_enabled_tool_description_for_generate_prompt(
+ tenant_id=tenant_id, agent_id=agent_id
+ )
+
+
+def _resolve_knowledge_base_display_names(
+ agent_id: int,
+ tenant_id: str,
+ tool_info_list: List[dict],
+ knowledge_base_display_names: Optional[List[str]] = None,
+) -> Optional[List[str]]:
+ if knowledge_base_display_names:
+ logger.debug(
+ f"Using frontend-provided knowledge base display names: {knowledge_base_display_names}"
+ )
+ return knowledge_base_display_names
+
+ resolved_names = get_knowledge_base_display_names(
+ tool_info_list=tool_info_list,
+ agent_id=agent_id,
+ tenant_id=tenant_id
+ )
+ logger.debug(
+ f"Using database query for knowledge base display names: {resolved_names}")
+ return resolved_names
+
+
+def _resolve_prompt_generation_sub_agents(
+ agent_id: int,
+ tenant_id: str,
+ sub_agent_ids: Optional[List[int]] = None,
+) -> List[dict]:
+ if sub_agent_ids and len(sub_agent_ids) > 0:
+ sub_agent_info_list = []
+ for sub_agent_id in sub_agent_ids:
+ try:
+ sub_agent_info = search_agent_info_by_agent_id(
+ agent_id=sub_agent_id, tenant_id=tenant_id)
+ sub_agent_info_list.append(sub_agent_info)
+ except Exception as exc:
+ logger.warning(
+ f"Failed to get sub-agent info for agent_id {sub_agent_id}: {str(exc)}"
+ )
+ logger.debug(f"Using frontend-provided sub-agent IDs: {sub_agent_ids}")
+ return sub_agent_info_list
+
+ logger.debug("No sub-agents selected (empty sub_agent_ids list)")
+ return get_enabled_sub_agent_description_for_generate_prompt(
+ tenant_id=tenant_id, agent_id=agent_id
+ )
+
+
+def _start_generation_threads(content, prompt_for_generate, produce_queue, latest, stop_flags, tenant_id, model_id,
+ has_selected_resources=True, concurrency_limit: Optional[int] = None):
+ """Start all prompt generation threads with optional concurrency control."""
# Shared error tracking across threads
error_holder = {"error": None}
+ # Total number of generation tasks
+ total_tasks = 6
+
+ # Determine effective concurrency limit
+ # None means unlimited, 0 or negative means unlimited
+ if concurrency_limit is None or concurrency_limit <= 0 or concurrency_limit >= total_tasks:
+ effective_limit = None
+ else:
+ effective_limit = concurrency_limit
+
+ # Use semaphore if concurrency is limited
+ semaphore = threading.Semaphore(
+ effective_limit) if effective_limit else None
+ if semaphore:
+ logger.info(
+ f"Using concurrency limit of {effective_limit} for prompt generation (total tasks: {total_tasks})")
+ else:
+ logger.info("Using unlimited concurrency for prompt generation")
+
def make_callback(tag):
def callback_fn(current_text):
latest[tag] = current_text
@@ -264,8 +596,16 @@ def callback_fn(current_text):
def run_and_flag(tag, sys_prompt):
try:
- call_llm_for_system_prompt(
- model_id, content, sys_prompt, make_callback(tag), tenant_id)
+ # Acquire semaphore before starting (if limited)
+ if semaphore:
+ semaphore.acquire()
+ try:
+ call_llm_for_system_prompt(
+ model_id, content, sys_prompt, make_callback(tag), tenant_id)
+ finally:
+ # Always release semaphore after completion
+ if semaphore:
+ semaphore.release()
except Exception as e:
logger.error(f"Error in {tag} generation: {e}")
error_holder["error"] = e
@@ -275,18 +615,32 @@ def run_and_flag(tag, sys_prompt):
threads = []
logger.info("Generating system prompt")
+ # Base sections always generated
prompt_configs = [
- ("duty", prompt_for_generate["DUTY_SYSTEM_PROMPT"]),
- ("constraint", prompt_for_generate["CONSTRAINT_SYSTEM_PROMPT"]),
- ("few_shots", prompt_for_generate["FEW_SHOTS_SYSTEM_PROMPT"]),
+ ("duty", prompt_for_generate["duty_system_prompt"]),
("agent_var_name",
- prompt_for_generate["AGENT_VARIABLE_NAME_SYSTEM_PROMPT"]),
+ prompt_for_generate["agent_variable_name_system_prompt"]),
("agent_display_name",
- prompt_for_generate["AGENT_DISPLAY_NAME_SYSTEM_PROMPT"]),
+ prompt_for_generate["agent_display_name_system_prompt"]),
("agent_description",
- prompt_for_generate["AGENT_DESCRIPTION_SYSTEM_PROMPT"])
+ prompt_for_generate["agent_description_system_prompt"])
]
+ # Constraint and few_shots sections are only generated when tools or sub-agents are selected
+ if has_selected_resources:
+ prompt_configs.extend([
+ ("constraint", prompt_for_generate["constraint_system_prompt"]),
+ ("few_shots", prompt_for_generate["few_shots_system_prompt"]),
+ ])
+ else:
+ logger.info(
+ "Skipping constraint and few_shots generation: no tools or sub-agents selected")
+ # Mark these sections as already complete with empty content
+ stop_flags["constraint"] = True
+ stop_flags["few_shots"] = True
+ latest["constraint"] = ""
+ latest["few_shots"] = ""
+
for tag, sys_prompt in prompt_configs:
thread = threading.Thread(target=run_and_flag, args=(tag, sys_prompt))
thread.start()
@@ -352,7 +706,7 @@ def _stream_results(produce_queue, latest, stop_flags, threads, error_holder):
last_results[tag] = latest[tag]
-def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_list, task_description, tool_info_list, language: str = LANGUAGE["ZH"]):
+def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_list, task_description, tool_info_list, language: str = LANGUAGE["ZH"], knowledge_base_display_names: Optional[List[str]] = None, has_selected_resources: bool = True):
input_label = "Inputs" if language == 'en' else "接受输入"
output_label = "Output type" if language == 'en' else "返回输出类型"
@@ -361,15 +715,90 @@ def join_info_for_generate_system_prompt(prompt_for_generate, sub_agent_info_lis
for tool in tool_info_list])
assistant_description = "\n".join(
[f"- {sub_agent_info['name']}: {sub_agent_info['description']}" for sub_agent_info in sub_agent_info_list])
- # Generate content using template
- content = Template(prompt_for_generate["USER_PROMPT"], undefined=StrictUndefined).render({
+
+ # Build template context
+ template_context = {
"task_description": task_description,
"tool_description": tool_description,
- "assistant_description": assistant_description
- })
+ "assistant_description": assistant_description,
+ # Always include knowledge_base_names to avoid StrictUndefined errors in template.
+ # An empty string is falsy, so the {% if knowledge_base_names %} block will be skipped.
+ "knowledge_base_names": "",
+ # Flag indicating whether tools or sub-agents are selected;
+ # templates use this to suppress boilerplate in constraint/few_shots sections
+ "has_selected_resources": has_selected_resources,
+ }
+
+ # Always add knowledge_base_names to context (empty string when not available).
+ # This is necessary because Jinja2 StrictUndefined raises an error for any
+ # undefined variable, even inside an {% if %} block.
+ if knowledge_base_display_names:
+ kb_names_str = ", ".join(
+ f'"{name}"' for name in knowledge_base_display_names)
+ else:
+ kb_names_str = ""
+ template_context["knowledge_base_names"] = kb_names_str
+
+ # Generate content using template
+ content = Template(
+ prompt_for_generate["user_prompt"], undefined=StrictUndefined).render(template_context)
return content
+def join_info_for_optimize_prompt_section(
+ prompt_for_optimize,
+ section_type: str,
+ section_title: str,
+ task_description: str,
+ current_content: str,
+ feedback: str,
+ tool_info_list,
+ sub_agent_info_list,
+ language: str = LANGUAGE["ZH"],
+ knowledge_base_display_names: Optional[List[str]] = None,
+):
+ input_label = "Inputs" if language == LANGUAGE["EN"] else "接受输入"
+ output_label = "Output type" if language == LANGUAGE["EN"] else "返回输出类型"
+
+ tool_description = "\n".join(
+ [f"- {tool['name']}: {tool['description']} \n {input_label}: {tool['inputs']}\n {output_label}: {tool['output_type']}"
+ for tool in tool_info_list]
+ )
+ assistant_description = "\n".join(
+ [f"- {sub_agent_info['name']}: {sub_agent_info['description']}" for sub_agent_info in sub_agent_info_list]
+ )
+
+ if knowledge_base_display_names:
+ kb_names_str = ", ".join(
+ f'"{name}"' for name in knowledge_base_display_names)
+ else:
+ kb_names_str = ""
+
+ template_context = {
+ "section_type": section_type,
+ "section_title": section_title,
+ "task_description": task_description,
+ "current_content": current_content,
+ "feedback": feedback,
+ "tool_description": tool_description,
+ "assistant_description": assistant_description,
+ "knowledge_base_names": kb_names_str,
+ }
+
+ return Template(
+ prompt_for_optimize["OPTIMIZE_USER_PROMPT"],
+ undefined=StrictUndefined
+ ).render(template_context)
+
+
+def _default_prompt_section_title(section_type: str, language: str) -> str:
+ localized_titles = PROMPT_SECTION_TYPE_TITLES.get(
+ language,
+ PROMPT_SECTION_TYPE_TITLES[LANGUAGE["ZH"]]
+ )
+ return localized_titles.get(section_type, section_type)
+
+
def get_enabled_tool_description_for_generate_prompt(agent_id: int, tenant_id: str):
# Get tool information
logger.info("Fetching tool instances")
@@ -379,6 +808,74 @@ def get_enabled_tool_description_for_generate_prompt(agent_id: int, tenant_id: s
return tool_info_list
+def get_knowledge_base_display_names(tool_info_list: List[dict], agent_id: int, tenant_id: str) -> Optional[List[str]]:
+ """
+ Extract knowledge base display names from tool configurations.
+ This is used to ensure few-shot examples use actual configured knowledge base names.
+
+ Args:
+ tool_info_list: List of tool info dictionaries
+ agent_id: Agent ID for querying tool instances
+ tenant_id: Tenant ID for database queries
+
+ Returns:
+ List of knowledge base display names if knowledge_base_search tool is configured, None otherwise
+ """
+ # Check if knowledge_base_search tool is in the list
+ kb_tool_ids = [tool['tool_id'] for tool in tool_info_list if tool.get(
+ 'name') == 'knowledge_base_search']
+ if not kb_tool_ids:
+ logger.debug("No knowledge_base_search tool found in tool list")
+ return None
+
+ # Get the index_names from ToolInstance for knowledge_base_search tool
+ all_index_names = []
+ for kb_tool_id in kb_tool_ids:
+ try:
+ tool_instance = query_tool_instances_by_id(
+ agent_id=agent_id,
+ tool_id=kb_tool_id,
+ tenant_id=tenant_id
+ )
+ if tool_instance and tool_instance.get('params', {}).get('index_names'):
+ index_names = tool_instance['params']['index_names']
+ if isinstance(index_names, list):
+ all_index_names.extend(index_names)
+ elif isinstance(index_names, str):
+ # Handle JSON string format
+ try:
+ all_index_names.extend(json.loads(index_names))
+ except json.JSONDecodeError:
+ logger.warning(
+ f"Failed to parse index_names JSON: {index_names}")
+ except Exception as e:
+ logger.warning(
+ f"Failed to get tool instance for tool_id {kb_tool_id}: {e}")
+
+ if not all_index_names:
+ logger.debug(
+ "No index_names configured for knowledge_base_search tool")
+ return None
+
+ # Remove duplicates while preserving order
+ unique_index_names = list(dict.fromkeys(all_index_names))
+
+ # Convert to display names
+ knowledge_name_map = get_knowledge_name_map_by_index_names(
+ unique_index_names)
+
+ # Return list of display names (knowledge_name) for each configured index_name
+ display_names = []
+ for index_name in unique_index_names:
+ display_name = knowledge_name_map.get(index_name, index_name)
+ if display_name and display_name not in display_names:
+ display_names.append(display_name)
+
+ logger.debug(
+ f"Converted index_names {unique_index_names} to display_names: {display_names}")
+ return display_names if display_names else None
+
+
def get_enabled_sub_agent_description_for_generate_prompt(agent_id: int, tenant_id: str):
logger.info("Fetching sub-agents information")
@@ -392,3 +889,299 @@ def get_enabled_sub_agent_description_for_generate_prompt(agent_id: int, tenant_
sub_agent_info_list.append(sub_agent_info)
return sub_agent_info_list
+
+
+# ── Jiuwen SDK 集成 ───────────────────────────────────────────────────────────
+
+
+@dataclass
+class OptimizeRequest:
+ """优化请求的统一数据结构"""
+ agent_id: int
+ model_id: int
+ task_description: str
+ section_type: str
+ section_title: str
+ current_content: str
+ feedback: str
+ mode: str = "general"
+ start_pos: Opt[int] = None
+ end_pos: Opt[int] = None
+ tool_ids: Opt[list[int]] = None
+ sub_agent_ids: Opt[list[int]] = None
+ knowledge_base_display_names: Opt[list[str]] = None
+
+
+@dataclass
+class OptimizeResult:
+ """优化结果的统一数据结构"""
+ optimized_content: str
+ source: str
+ section_type: str = ""
+ section_title: str = ""
+ original_content: str = ""
+
+
+class PromptOptimizationService:
+ """提示词优化服务 — 统一入口,模式二选一"""
+
+ def optimize_from_debug(self, agent_id: int, feedback: str, selected, history=None) -> OptimizeResult:
+ """基于调试对话自动优化整个 system prompt(完整模板)。
+
+ Args:
+ selected: OptimizeFromDebugSelected (pydantic model) or any object with user_question/assistant_answer.
+ history: Optional[List[HistoryItem]]
+ """
+ if not (feedback or "").strip():
+ raise AppException(
+ ErrorCode.COMMON_MISSING_REQUIRED_FIELD,
+ "Optimization feedback is required.",
+ )
+
+ if not self.is_jiuwen_mode_available():
+ raise NexentCapabilityError(
+ "Auto optimize from debug requires Jiuwen SDK to be enabled."
+ )
+
+ agent_info = search_agent_info_by_agent_id(
+ agent_id=agent_id, tenant_id=self.tenant_id, version_no=0)
+
+ duty = (agent_info.get("duty_prompt") or "").strip()
+ constraint = (agent_info.get("constraint_prompt") or "").strip()
+ few_shots = (agent_info.get("few_shots_prompt") or "").strip()
+
+ original_full_prompt = "\n\n".join(
+ [
+ "# Duty\n" + duty,
+ "# Constraint\n" + constraint,
+ "# FewShots\n" + few_shots,
+ ]
+ ).strip()
+
+ if not original_full_prompt:
+ raise AppException(
+ ErrorCode.COMMON_MISSING_REQUIRED_FIELD,
+ "Agent system prompt is empty.",
+ )
+
+ user_question = getattr(selected, "user_question", None) or (
+ selected.get("user_question") if isinstance(selected, dict) else "")
+ assistant_answer = getattr(selected, "assistant_answer", None) or (
+ selected.get("assistant_answer") if isinstance(selected, dict) else "")
+
+ bad_case_obj = type("_BadCase", (), {})
+ bc = bad_case_obj()
+ bc.question = user_question or ""
+ bc.answer = assistant_answer or ""
+ bc.label = ""
+ bc.reason = feedback
+
+ adapter_cls = _get_jiuwen_adapter_class()
+ if adapter_cls is None:
+ raise JiuwenSDKError("Jiuwen SDK adapter is unavailable")
+
+ adapter = adapter_cls(
+ model_id=self.model_id, tenant_id=self.tenant_id)
+
+ optimized_full_prompt = adapter.optimize_badcase(
+ prompt=original_full_prompt,
+ bad_cases=[bc],
+ language=self.language,
+ )
+
+ return OptimizeResult(
+ optimized_content=optimized_full_prompt,
+ source="jiuwen",
+ section_type="full_prompt",
+ section_title="system_prompt",
+ original_content=original_full_prompt,
+ )
+
+ def __init__(self, model_id: int, tenant_id: str, language: str):
+ self.model_id = model_id
+ self.tenant_id = tenant_id
+ self.language = language
+
+ def is_jiuwen_mode_available(self) -> bool:
+ """判断 Jiuwen SDK 模式是否可用"""
+ if not ENABLE_JIUWEN_SDK:
+ return False
+
+ return _get_jiuwen_adapter_class() is not None
+
+ def optimize(self, request: OptimizeRequest) -> OptimizeResult:
+ """统一优化入口 — 优先 Jiuwen SDK,失败则降级 nexent 原生"""
+ if self.is_jiuwen_mode_available():
+ logger.info(
+ f"[prompt-optimize] mode={request.mode}, using Jiuwen SDK")
+ try:
+ return self._optimize_with_jiuwen(request)
+ except JiuwenSDKError as e:
+ logger.warning(f"Jiuwen SDK 模式失败,降级到 nexent 原生: {e}")
+ return self._optimize_with_nexent(request)
+ else:
+ return self._optimize_with_nexent(request)
+
+ def _optimize_with_jiuwen(self, request: OptimizeRequest) -> OptimizeResult:
+ """Jiuwen SDK 模式"""
+ logger.info(
+ f"[jiuwen-optimize] mode={request.mode}, start_pos={request.start_pos}, "
+ f"end_pos={request.end_pos}, prompt_len={len(request.current_content)}, "
+ f"feedback_len={len(request.feedback)}"
+ )
+ adapter_cls = _get_jiuwen_adapter_class()
+ if adapter_cls is None:
+ raise JiuwenSDKError("Jiuwen SDK adapter is unavailable")
+
+ adapter = adapter_cls(
+ model_id=self.model_id,
+ tenant_id=self.tenant_id,
+ )
+ result = adapter.optimize(
+ prompt=request.current_content,
+ feedback=request.feedback,
+ mode=request.mode,
+ start_pos=request.start_pos,
+ end_pos=request.end_pos,
+ language=self.language,
+ )
+
+ # Jiuwen insert/select mode returns a fragment by design.
+ # We reassemble the full prompt here so frontend always receives full optimized content.
+ if request.mode == "insert":
+ if request.start_pos is None or not isinstance(request.start_pos, int):
+ raise JiuwenSDKError("insert mode requires start_pos")
+ if request.start_pos < 0 or request.start_pos > len(request.current_content):
+ raise JiuwenSDKError("insert mode start_pos out of bounds")
+ optimized_full = (
+ request.current_content[: request.start_pos]
+ + result
+ + request.current_content[request.start_pos:]
+ )
+ elif request.mode == "select":
+ if request.start_pos is None or request.end_pos is None:
+ raise JiuwenSDKError(
+ "select mode requires start_pos and end_pos")
+ if not isinstance(request.start_pos, int) or not isinstance(request.end_pos, int):
+ raise JiuwenSDKError(
+ "select mode start_pos/end_pos must be int")
+ if request.start_pos < 0 or request.end_pos < 0 or request.start_pos >= request.end_pos:
+ raise JiuwenSDKError("select mode start_pos/end_pos invalid")
+ if request.end_pos > len(request.current_content):
+ raise JiuwenSDKError("select mode end_pos out of bounds")
+ optimized_full = (
+ request.current_content[: request.start_pos]
+ + result
+ + request.current_content[request.end_pos:]
+ )
+ else:
+ optimized_full = result
+
+ return OptimizeResult(
+ optimized_content=optimized_full,
+ source="jiuwen",
+ section_type=request.section_type,
+ section_title=request.section_title,
+ original_content=request.current_content,
+ )
+
+ def _optimize_with_nexent(self, request: OptimizeRequest) -> OptimizeResult:
+ """nexent 原生模式 — 只支持 general 模式"""
+ if request.mode != "general":
+ raise NexentCapabilityError(
+ f"nexent 原生模式只支持 general 模式,"
+ f"当前请求 mode={request.mode} 不支持,请启用 Jiuwen SDK"
+ )
+
+ result = optimize_prompt_section_impl(
+ agent_id=request.agent_id,
+ model_id=self.model_id,
+ task_description=request.task_description,
+ tenant_id=self.tenant_id,
+ language=self.language,
+ section_type=request.section_type,
+ section_title=request.section_title,
+ current_content=request.current_content,
+ feedback=request.feedback,
+ tool_ids=request.tool_ids,
+ sub_agent_ids=request.sub_agent_ids,
+ knowledge_base_display_names=request.knowledge_base_display_names,
+ )
+ return OptimizeResult(
+ optimized_content=result["optimized_content"],
+ source="nexent",
+ section_type=result["section_type"],
+ section_title=result["section_title"],
+ original_content=result["original_content"],
+ )
+
+ def optimize_badcase(
+ self,
+ current_content: str,
+ bad_cases: list,
+ agent_id: int,
+ section_type: str,
+ section_title: str,
+ tool_ids: Opt[list[int]] = None,
+ sub_agent_ids: Opt[list[int]] = None,
+ knowledge_base_display_names: Opt[list[str]] = None,
+ ) -> OptimizeResult:
+ """坏案例优化入口 — 优先 Jiuwen SDK,失败则降级"""
+ if self.is_jiuwen_mode_available():
+ logger.info("[prompt-badcase] using Jiuwen SDK")
+ try:
+ return self._optimize_badcase_with_jiuwen(
+ current_content, bad_cases, section_type, section_title
+ )
+ except JiuwenSDKError as e:
+ logger.warning(f"Jiuwen SDK badcase 模式失败,降级到 nexent 原生: {e}")
+ return self._optimize_badcase_with_nexent(
+ current_content, bad_cases, agent_id, section_type, section_title,
+ tool_ids, sub_agent_ids, knowledge_base_display_names,
+ )
+ else:
+ return self._optimize_badcase_with_nexent(
+ current_content, bad_cases, agent_id, section_type, section_title,
+ tool_ids, sub_agent_ids, knowledge_base_display_names,
+ )
+
+ def _optimize_badcase_with_jiuwen(
+ self, current_content: str, bad_cases: list, section_type: str, section_title: str
+ ) -> OptimizeResult:
+ """Jiuwen SDK 坏案例优化"""
+ adapter_cls = _get_jiuwen_adapter_class()
+ if adapter_cls is None:
+ raise JiuwenSDKError("Jiuwen SDK adapter is unavailable")
+
+ adapter = adapter_cls(
+ model_id=self.model_id,
+ tenant_id=self.tenant_id,
+ )
+ result = adapter.optimize_badcase(
+ prompt=current_content,
+ bad_cases=bad_cases,
+ language=self.language,
+ )
+ return OptimizeResult(
+ optimized_content=result,
+ source="jiuwen",
+ section_type=section_type,
+ section_title=section_title,
+ original_content=current_content,
+ )
+
+ def _optimize_badcase_with_nexent(
+ self,
+ current_content: str,
+ bad_cases: list,
+ agent_id: int,
+ section_type: str,
+ section_title: str,
+ tool_ids: Opt[list[int]] = None,
+ sub_agent_ids: Opt[list[int]] = None,
+ knowledge_base_display_names: Opt[list[str]] = None,
+ ) -> OptimizeResult:
+ """nexent 原生模式不支持坏案例优化"""
+ raise NexentCapabilityError(
+ "nexent 原生模式不支持 badcase 优化,请启用 Jiuwen SDK"
+ )
diff --git a/backend/services/prompt_template_service.py b/backend/services/prompt_template_service.py
new file mode 100644
index 000000000..14224a099
--- /dev/null
+++ b/backend/services/prompt_template_service.py
@@ -0,0 +1,322 @@
+import logging
+from typing import Optional
+
+from consts.const import DEFAULT_TENANT_ID, DEFAULT_USER_ID
+from consts.const import LANGUAGE
+from consts.exceptions import DuplicateError, NotFoundException, ValidationError
+from consts.model import PromptTemplateRequest
+from database.prompt_template_db import (
+ create_prompt_template,
+ delete_prompt_template,
+ get_prompt_template_by_id,
+ get_prompt_template_by_name,
+ get_prompt_template_by_template_id,
+ query_prompt_templates_by_user,
+ upsert_prompt_template_by_id,
+ update_prompt_template,
+)
+from utils.prompt_template_utils import (
+ get_prompt_generate_prompt_template,
+ merge_prompt_generate_templates,
+ normalize_prompt_generate_template_content,
+)
+
+logger = logging.getLogger("prompt_template_service")
+
+SYSTEM_PROMPT_TEMPLATE_ID = 0
+SYSTEM_PROMPT_TEMPLATE_NAME = "system_default"
+PROMPT_TEMPLATE_TYPE_AGENT_GENERATE = "agent_generate"
+SYSTEM_PROMPT_TEMPLATE_DESCRIPTION = "System default prompt template"
+SYSTEM_PROMPT_TEMPLATE_TENANT_ID = DEFAULT_TENANT_ID
+SYSTEM_PROMPT_TEMPLATE_USER_ID = DEFAULT_USER_ID
+
+
+def _normalize_prompt_template_entity(template: Optional[dict]) -> Optional[dict]:
+ """Normalize prompt template entity content keys to lowercase."""
+ if not template:
+ return template
+
+ normalized_template = dict(template)
+ normalized_template["template_content_zh"] = normalize_prompt_generate_template_content(
+ normalized_template.get("template_content_zh")
+ )
+ template_content_en = normalize_prompt_generate_template_content(
+ normalized_template.get("template_content_en")
+ )
+ normalized_template["template_content_en"] = template_content_en or None
+ return normalized_template
+
+
+def build_system_default_prompt_template_payload() -> dict:
+ """Build the canonical system default prompt template payload from YAML files."""
+ system_template_zh = normalize_prompt_generate_template_content(
+ get_prompt_generate_prompt_template(LANGUAGE["ZH"])
+ )
+ system_template_en = normalize_prompt_generate_template_content(
+ get_prompt_generate_prompt_template(LANGUAGE["EN"])
+ )
+ return {
+ "template_id": SYSTEM_PROMPT_TEMPLATE_ID,
+ "template_name": SYSTEM_PROMPT_TEMPLATE_NAME,
+ "description": SYSTEM_PROMPT_TEMPLATE_DESCRIPTION,
+ "template_type": PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ "tenant_id": SYSTEM_PROMPT_TEMPLATE_TENANT_ID,
+ "user_id": SYSTEM_PROMPT_TEMPLATE_USER_ID,
+ "template_content_zh": system_template_zh,
+ "template_content_en": system_template_en,
+ "created_by": SYSTEM_PROMPT_TEMPLATE_USER_ID,
+ "updated_by": SYSTEM_PROMPT_TEMPLATE_USER_ID,
+ "delete_flag": "N",
+ }
+
+
+def sync_system_default_prompt_template() -> dict:
+ """Sync the YAML-backed system default prompt template into the database."""
+ payload = build_system_default_prompt_template_payload()
+ prompt_template = upsert_prompt_template_by_id(
+ template_id=SYSTEM_PROMPT_TEMPLATE_ID,
+ template_data=payload,
+ user_id=SYSTEM_PROMPT_TEMPLATE_USER_ID,
+ )
+ prompt_template["is_system_default"] = True
+ return _normalize_prompt_template_entity(prompt_template)
+
+
+def get_system_default_prompt_template() -> dict:
+ """Return the system default prompt generation template from the database."""
+ prompt_template = get_prompt_template_by_template_id(
+ template_id=SYSTEM_PROMPT_TEMPLATE_ID,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if not prompt_template:
+ prompt_template = sync_system_default_prompt_template()
+ else:
+ prompt_template["is_system_default"] = True
+ return _normalize_prompt_template_entity({
+ **prompt_template,
+ "is_system_default": True,
+ })
+
+
+def _normalize_template_request(request: PromptTemplateRequest) -> dict:
+ """Normalize prompt template request payload."""
+ template_name = (request.template_name or "").strip()
+ if not template_name:
+ raise ValidationError("template_name is required")
+
+ if request.template_type != PROMPT_TEMPLATE_TYPE_AGENT_GENERATE:
+ raise ValidationError("Unsupported template type")
+
+ zh_content = normalize_prompt_generate_template_content(
+ request.template_content_zh.model_dump()
+ )
+ if len(zh_content) == 0:
+ raise ValidationError("template_content_zh is required")
+
+ en_content = None
+ if request.template_content_en is not None:
+ en_content = normalize_prompt_generate_template_content(
+ request.template_content_en.model_dump()
+ )
+ if len(en_content) == 0:
+ en_content = None
+
+ return {
+ "template_name": template_name,
+ "description": (request.description or "").strip() or None,
+ "template_type": request.template_type,
+ "template_content_zh": zh_content,
+ "template_content_en": en_content,
+ }
+
+
+def list_prompt_templates_impl(tenant_id: str, user_id: str) -> list[dict]:
+ """List all prompt templates for the current user."""
+ system_default_template = sync_system_default_prompt_template()
+ templates = query_prompt_templates_by_user(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ return [system_default_template, *[
+ _normalize_prompt_template_entity({
+ **template,
+ "is_system_default": False,
+ })
+ for template in templates
+ if template.get("template_id") != SYSTEM_PROMPT_TEMPLATE_ID
+ ]]
+
+
+def get_prompt_template_detail_impl(template_id: int, tenant_id: str, user_id: str) -> dict:
+ """Get prompt template detail."""
+ if template_id == SYSTEM_PROMPT_TEMPLATE_ID:
+ return get_system_default_prompt_template()
+
+ template = get_prompt_template_by_id(
+ template_id=template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if not template:
+ raise NotFoundException("Prompt template not found")
+
+ template["is_system_default"] = False
+ return _normalize_prompt_template_entity(template)
+
+
+def create_prompt_template_impl(
+ request: PromptTemplateRequest,
+ tenant_id: str,
+ user_id: str,
+) -> dict:
+ """Create a prompt template."""
+ normalized_request = _normalize_template_request(request)
+ existing_template = get_prompt_template_by_name(
+ template_name=normalized_request["template_name"],
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if existing_template:
+ raise DuplicateError("Prompt template name already exists")
+
+ created_template = create_prompt_template({
+ **normalized_request,
+ "tenant_id": tenant_id,
+ "user_id": user_id,
+ "created_by": user_id,
+ "updated_by": user_id,
+ })
+ created_template["is_system_default"] = False
+ return _normalize_prompt_template_entity(created_template)
+
+
+def update_prompt_template_impl(
+ template_id: int,
+ request: PromptTemplateRequest,
+ tenant_id: str,
+ user_id: str,
+) -> dict:
+ """Update a prompt template."""
+ if template_id == SYSTEM_PROMPT_TEMPLATE_ID:
+ raise ValidationError("System default prompt template cannot be updated")
+
+ existing_template = get_prompt_template_by_id(
+ template_id=template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if not existing_template:
+ raise NotFoundException("Prompt template not found")
+
+ normalized_request = _normalize_template_request(request)
+ duplicate_template = get_prompt_template_by_name(
+ template_name=normalized_request["template_name"],
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if duplicate_template and duplicate_template["template_id"] != template_id:
+ raise DuplicateError("Prompt template name already exists")
+
+ updated_template = update_prompt_template(
+ template_id=template_id,
+ template_data=normalized_request,
+ user_id=user_id,
+ )
+ updated_template["is_system_default"] = False
+ return _normalize_prompt_template_entity(updated_template)
+
+
+def delete_prompt_template_impl(template_id: int, tenant_id: str, user_id: str) -> dict:
+ """Delete a prompt template."""
+ if template_id == SYSTEM_PROMPT_TEMPLATE_ID:
+ raise ValidationError("System default prompt template cannot be deleted")
+
+ existing_template = get_prompt_template_by_id(
+ template_id=template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if not existing_template:
+ raise NotFoundException("Prompt template not found")
+
+ deleted_count = delete_prompt_template(template_id=template_id, user_id=user_id)
+ return {
+ "template_id": template_id,
+ "deleted": deleted_count > 0,
+ }
+
+
+def resolve_prompt_generate_template(
+ tenant_id: str,
+ user_id: str,
+ language: str,
+ prompt_template_id: Optional[int] = None,
+) -> dict:
+ """Resolve prompt generation template for the current user and language."""
+ system_default_template = sync_system_default_prompt_template()
+ system_template = (
+ system_default_template.get("template_content_en")
+ if language == LANGUAGE["EN"]
+ else system_default_template.get("template_content_zh")
+ )
+ fallback_system_template = system_default_template.get("template_content_zh")
+
+ if not prompt_template_id or prompt_template_id == SYSTEM_PROMPT_TEMPLATE_ID:
+ return merge_prompt_generate_templates(system_template, fallback_system_template)
+
+ prompt_template = get_prompt_template_by_id(
+ template_id=prompt_template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if not prompt_template:
+ logger.warning(
+ "Prompt template %s not found for tenant %s user %s, falling back to system default",
+ prompt_template_id,
+ tenant_id,
+ user_id,
+ )
+ return merge_prompt_generate_templates(system_template, fallback_system_template)
+
+ custom_language_template = (
+ prompt_template.get("template_content_en")
+ if language == LANGUAGE["EN"]
+ else prompt_template.get("template_content_zh")
+ )
+ return merge_prompt_generate_templates(
+ custom_language_template,
+ prompt_template.get("template_content_zh"),
+ system_template,
+ fallback_system_template,
+ )
+
+
+def get_prompt_template_summary(
+ template_id: Optional[int],
+ tenant_id: str,
+ user_id: str,
+) -> tuple[Optional[int], Optional[str]]:
+ """Resolve prompt template identity for saving on agent."""
+ if template_id is None:
+ return None, None
+
+ if template_id == SYSTEM_PROMPT_TEMPLATE_ID:
+ return SYSTEM_PROMPT_TEMPLATE_ID, SYSTEM_PROMPT_TEMPLATE_NAME
+
+ prompt_template = get_prompt_template_by_id(
+ template_id=template_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ template_type=PROMPT_TEMPLATE_TYPE_AGENT_GENERATE,
+ )
+ if not prompt_template:
+ raise NotFoundException("Prompt template not found")
+
+ return prompt_template["template_id"], prompt_template["template_name"]
diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py
index b9fb7ab7b..497dcfe99 100644
--- a/backend/services/providers/dashscope_provider.py
+++ b/backend/services/providers/dashscope_provider.py
@@ -6,6 +6,75 @@
from services.providers.base import AbstractModelProvider, _classify_provider_error
+DASHSCOPE_IMAGE_GENERATION_KEYWORDS = (
+ "image",
+ "wanx",
+ "aitryon",
+ "tryon",
+ "flux",
+ "stable-diffusion",
+ "sdxl",
+)
+DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS = (
+ "qwen-vl",
+ "qwen2-vl",
+ "qwen2.5-vl",
+ "qwen3-vl",
+ "qwen3.5-vl",
+ "qwen3.6-vl",
+ "-vl",
+ "vl-",
+ "vision",
+ "visual",
+ "ocr",
+ "qwen3.6",
+ "qwen-3.6",
+)
+DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS = ("omni", "video-understanding", "video-ocr")
+
+
+def _modality_set(value) -> set:
+ if not value:
+ return set()
+ if isinstance(value, str):
+ return {value.lower()}
+ return {str(item).lower() for item in value}
+
+
+def _has_keyword(text: str, keywords: tuple) -> bool:
+ return any(keyword in text for keyword in keywords)
+
+
+def _is_dashscope_explicit_image_understanding_model(model_id: str) -> bool:
+ return _has_keyword(model_id, DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS)
+
+
+def _is_dashscope_image_generation_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool:
+ if _is_dashscope_explicit_image_understanding_model(model_id):
+ return False
+ return "image" in res_mods or _has_keyword(model_id, DASHSCOPE_IMAGE_GENERATION_KEYWORDS)
+
+
+def _is_dashscope_video_understanding_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool:
+ searchable_text = f"{model_id} {desc.lower()}"
+ if "video" in req_mods and "text" in res_mods:
+ return True
+ return _has_keyword(searchable_text, DASHSCOPE_VIDEO_UNDERSTANDING_KEYWORDS)
+
+
+def _is_dashscope_image_understanding_model(model_id: str, desc: str, req_mods: set, res_mods: set) -> bool:
+ searchable_text = f"{model_id} {desc.lower()}"
+ if _is_dashscope_image_generation_model(model_id, desc, req_mods, res_mods):
+ return False
+ if _is_dashscope_video_understanding_model(model_id, desc, req_mods, res_mods):
+ return False
+ if ("image" in req_mods or "video" in req_mods) and "text" in res_mods:
+ return True
+ return _is_dashscope_explicit_image_understanding_model(model_id) or _has_keyword(
+ searchable_text, DASHSCOPE_IMAGE_UNDERSTANDING_KEYWORDS
+ )
+
+
class DashScopeModelProvider(AbstractModelProvider):
"""Concrete implementation for DashScope (Aliyun) provider."""
@@ -57,6 +126,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
categorized_models = {
"chat": [], # Maps to "llm"
"vlm": [], # Maps to "vlm"
+ "vlm2": [], # Maps to image generation models
+ "vlm3": [], # Maps to video understanding models
"embedding": [], # Maps to "embedding" / "multi_embedding"
"rerank": [], # Maps to "rerank"
"tts": [], # Maps to "tts"
@@ -68,9 +139,11 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
# Extract key fields for logical determination (lowercased for robustness)
m_id = model_obj.get('model', '').lower()
desc = model_obj.get('description', '')
- metadata = model_obj.get('inference_metadata', {})
+ metadata = model_obj.get('inference_metadata') or {}
req_mod = metadata.get('request_modality', [])
res_mod = metadata.get('response_modality', [])
+ req_mods = _modality_set(req_mod)
+ res_mods = _modality_set(res_mod)
model_obj.setdefault("object", model_obj.get("object", "model"))
model_obj.setdefault("owned_by", model_obj.get("owned_by", "dashscope"))
cleaned_model = {
@@ -107,8 +180,17 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
continue
# 5. VLM
- vision_mods = {'Image', 'Video'}
- if (set(req_mod) & vision_mods) or (set(res_mod) & vision_mods) or '视觉' in desc:
+ if _is_dashscope_video_understanding_model(m_id, desc, req_mods, res_mods):
+ cleaned_model.update({"model_tag": "chat", "model_type": "vlm3"})
+ categorized_models['vlm3'].append(cleaned_model)
+ continue
+
+ if _is_dashscope_image_generation_model(m_id, desc, req_mods, res_mods):
+ cleaned_model.update({"model_tag": "chat", "model_type": "vlm2"})
+ categorized_models['vlm2'].append(cleaned_model)
+ continue
+
+ if _is_dashscope_image_understanding_model(m_id, desc, req_mods, res_mods):
cleaned_model.update({"model_tag": "chat", "model_type": "vlm"})
categorized_models['vlm'].append(cleaned_model)
continue
@@ -124,7 +206,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
elif target_model_type in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
elif target_model_type in categorized_models:
- return categorized_models[target_model_type]
+ return [
+ {**model, "model_type": target_model_type}
+ for model in categorized_models[target_model_type]
+ ]
else:
return []
except (httpx.HTTPStatusError, httpx.ConnectTimeout, httpx.ConnectError, Exception) as e:
diff --git a/backend/services/providers/silicon_provider.py b/backend/services/providers/silicon_provider.py
index ea41cc95d..1875b3949 100644
--- a/backend/services/providers/silicon_provider.py
+++ b/backend/services/providers/silicon_provider.py
@@ -1,4 +1,5 @@
import httpx
+import re
from typing import Dict, List
from consts.const import DEFAULT_LLM_MAX_TOKENS
@@ -6,6 +7,62 @@
from services.providers.base import AbstractModelProvider, _classify_provider_error
+SILICON_VLM_MODEL_KEYWORDS = (
+ "-vl",
+ "_vl",
+ "/vl",
+ ".vl",
+ "vl-",
+ "vision",
+ "visual",
+ "internvl",
+ "deepseek-vl",
+ "deepseekvl",
+ "glm-4v",
+ "minicpm-v",
+ "llava",
+ "kimi-vl",
+ "kimi-k2.5",
+ "kimi-k2.6",
+ "qvq",
+ "omni",
+ "qwen3.5",
+ "qwen3.6",
+)
+
+SILICON_VLM_METADATA_KEYWORDS = ("image", "video", "vision", "visual")
+
+
+def _contains_silicon_vlm_metadata(value) -> bool:
+ if isinstance(value, str):
+ lower_value = value.lower()
+ return any(keyword in lower_value for keyword in SILICON_VLM_METADATA_KEYWORDS)
+ if isinstance(value, list):
+ return any(_contains_silicon_vlm_metadata(item) for item in value)
+ if isinstance(value, dict):
+ return any(_contains_silicon_vlm_metadata(item) for item in value.values())
+ return False
+
+
+def _is_silicon_vlm_model(model: Dict) -> bool:
+ if _contains_silicon_vlm_metadata(model):
+ return True
+
+ model_id = str(model.get("id", "")).lower()
+ model_name = str(model.get("name", "")).lower()
+ searchable_text = f"{model_id} {model_name}"
+ if any(keyword in searchable_text for keyword in SILICON_VLM_MODEL_KEYWORDS):
+ return True
+
+ return bool(re.search(r"glm-\d+(?:\.\d+)?v", searchable_text))
+
+
+def _is_silicon_omni_model(model: Dict) -> bool:
+ model_id = str(model.get("id", "")).lower()
+ model_name = str(model.get("name", "")).lower()
+ return "omni" in f"{model_id} {model_name}"
+
+
class SiliconModelProvider(AbstractModelProvider):
"""Concrete implementation for SiliconFlow provider."""
@@ -25,32 +82,39 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
headers = {"Authorization": f"Bearer {model_api_key}"}
+ provider_model_type = "vlm" if model_type in ("vlm2", "vlm3") else model_type
+
# Choose endpoint by model type
- if model_type in ("llm", "vlm"):
+ if provider_model_type in ("llm", "vlm"):
silicon_url = f"{SILICON_GET_URL}?sub_type=chat"
- elif model_type in ("embedding", "multi_embedding"):
+ elif provider_model_type in ("embedding", "multi_embedding"):
silicon_url = f"{SILICON_GET_URL}?sub_type=embedding"
- elif model_type == "rerank":
+ elif provider_model_type == "rerank":
silicon_url = f"{SILICON_GET_URL}?sub_type=reranker"
else:
- silicon_url = SILICON_GET_URL
+ return []
async with httpx.AsyncClient(verify=False) as client:
response = await client.get(silicon_url, headers=headers)
response.raise_for_status()
model_list: List[Dict] = response.json()["data"]
+ if model_type == "vlm3":
+ model_list = [item for item in model_list if _is_silicon_omni_model(item)]
+ elif provider_model_type == "vlm":
+ model_list = [item for item in model_list if _is_silicon_vlm_model(item)]
+
# Annotate models with canonical fields expected downstream
- if model_type in ("llm", "vlm"):
+ if provider_model_type in ("llm", "vlm"):
for item in model_list:
item["model_tag"] = "chat"
item["model_type"] = model_type
item["max_tokens"] = DEFAULT_LLM_MAX_TOKENS
- elif model_type in ("embedding", "multi_embedding"):
+ elif provider_model_type in ("embedding", "multi_embedding"):
for item in model_list:
item["model_tag"] = "embedding"
item["model_type"] = model_type
- elif model_type == "rerank":
+ elif provider_model_type == "rerank":
for item in model_list:
item["model_tag"] = "rerank"
item["model_type"] = model_type
diff --git a/backend/services/providers/tokenpony_provider.py b/backend/services/providers/tokenpony_provider.py
index ab4446c1b..be2bb9c71 100644
--- a/backend/services/providers/tokenpony_provider.py
+++ b/backend/services/providers/tokenpony_provider.py
@@ -9,6 +9,64 @@
from services.providers.base import AbstractModelProvider, _classify_provider_error
+TOKENPONY_IMAGE_UNDERSTANDING_KEYWORDS = (
+ "qwen-vl",
+ "qwen2-vl",
+ "qwen2.5-vl",
+ "qwen3-vl",
+ "qwen3.5-vl",
+ "qwen3.6-vl",
+ "-vl",
+ "vl-",
+ "vision",
+ "visual",
+ "ocr",
+ "gpt-4o",
+ "qwen3.6",
+ "qwen-3.6",
+)
+TOKENPONY_IMAGE_GENERATION_KEYWORDS = (
+ "image",
+ "dall",
+ "flux",
+ "stable-diffusion",
+ "sdxl",
+ "midjourney",
+ "wanx",
+ "kolors",
+ "seedream",
+ "ideogram",
+ "recraft",
+)
+TOKENPONY_VIDEO_UNDERSTANDING_KEYWORDS = ("omni", "video")
+
+
+def _has_keyword(text: str, keywords: tuple) -> bool:
+ return any(keyword in text for keyword in keywords)
+
+
+def _is_tokenpony_explicit_image_understanding_model(model_id: str) -> bool:
+ return _has_keyword(model_id, TOKENPONY_IMAGE_UNDERSTANDING_KEYWORDS)
+
+
+def _is_tokenpony_image_generation_model(model_id: str) -> bool:
+ if _is_tokenpony_explicit_image_understanding_model(model_id):
+ return False
+ return _has_keyword(model_id, TOKENPONY_IMAGE_GENERATION_KEYWORDS)
+
+
+def _is_tokenpony_video_understanding_model(model_id: str) -> bool:
+ return _has_keyword(model_id, TOKENPONY_VIDEO_UNDERSTANDING_KEYWORDS)
+
+
+def _is_tokenpony_image_understanding_model(model_id: str) -> bool:
+ if _is_tokenpony_image_generation_model(model_id):
+ return False
+ if _is_tokenpony_video_understanding_model(model_id):
+ return False
+ return _is_tokenpony_explicit_image_understanding_model(model_id)
+
+
class TokenPonyModelProvider(AbstractModelProvider):
"""Concrete implementation for TokenPony provider."""
@@ -46,6 +104,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
categorized_models = {
"chat": [], # Maps to "llm"
"vlm": [], # Maps to "vlm"
+ "vlm2": [], # Maps to image generation models
+ "vlm3": [], # Maps to video understanding models
"embedding": [], # Maps to "embedding" / "multi_embedding"
"rerank": [], # Maps to "rerank"
"tts": [], # Maps to "tts"
@@ -86,9 +146,14 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
cleaned_model.update({"model_tag": "tts", "model_type": "tts"})
categorized_models['tts'].append(cleaned_model)
- # 5. VLM (Vision Language Model / Image & Video Generation)
-
- elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision']):
+ # 5. Multimodal models
+ elif _is_tokenpony_video_understanding_model(m_id):
+ cleaned_model.update({"model_tag": "chat", "model_type": "vlm3"})
+ categorized_models['vlm3'].append(cleaned_model)
+ elif _is_tokenpony_image_generation_model(m_id):
+ cleaned_model.update({"model_tag": "chat", "model_type": "vlm2"})
+ categorized_models['vlm2'].append(cleaned_model)
+ elif _is_tokenpony_image_understanding_model(m_id):
cleaned_model.update({"model_tag": "chat", "model_type": "vlm"})
categorized_models['vlm'].append(cleaned_model)
@@ -104,7 +169,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
elif target_model_type in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
elif target_model_type in categorized_models:
- return categorized_models[target_model_type]
+ return [
+ {**model, "model_type": target_model_type}
+ for model in categorized_models[target_model_type]
+ ]
else:
return []
diff --git a/backend/services/redis_service.py b/backend/services/redis_service.py
index efd2c0a7b..1ffcf921c 100644
--- a/backend/services/redis_service.py
+++ b/backend/services/redis_service.py
@@ -1,6 +1,7 @@
import json
import logging
-from typing import Dict, Any, Optional
+import re
+from typing import Dict, Any, Optional, Tuple, Set, List
import redis
@@ -23,8 +24,8 @@ def client(self) -> redis.Redis:
if not REDIS_URL:
raise ValueError("REDIS_URL environment variable is not set")
self._client = redis.from_url(
- REDIS_URL,
- socket_timeout=5,
+ REDIS_URL,
+ socket_timeout=5,
socket_connect_timeout=5,
decode_responses=True
)
@@ -215,7 +216,7 @@ def delete_document_records(self, index_name: str, path_or_url: str) -> Dict[str
return result
- def _recursively_delete_task_and_parents(self, task_id: str) -> tuple[int, set]:
+ def _recursively_delete_task_and_parents(self, task_id: str) -> Tuple[int, Set[str]]:
"""
Iteratively delete a Celery task and all its parent tasks from Redis.
A single task chain is deleted, and the IDs of the deleted tasks are returned.
@@ -309,16 +310,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int:
# Check for failed tasks where metadata is in the exception message
if task_index_name is None and 'exc_message' in result:
- try:
- exc_str = str(result['exc_message'])
- if '{' in exc_str and '}' in exc_str:
- json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1]
- cleaned_json_part = json_part.replace('\\"', '"')
- error_data = json.loads(cleaned_json_part)
- task_index_name = error_data.get('index_name')
- except (json.JSONDecodeError, TypeError, IndexError) as e:
- key_str = key.decode('utf-8') if isinstance(key, bytes) else key
- logger.warning(f"Could not parse exception metadata for task key {key_str}: {e}")
+ error_data = self._extract_error_metadata_from_exc_message(
+ result.get("exc_message")
+ )
+ if error_data:
+ task_index_name = error_data.get('index_name')
if task_index_name == index_name:
key_str = key.decode('utf-8') if isinstance(key, bytes) else key
@@ -366,15 +362,11 @@ def _cleanup_celery_tasks(self, index_name: str) -> int:
)
if task_index_name is None and 'exc_message' in result:
- try:
- exc_str = str(result['exc_message'])
- if '{' in exc_str and '}' in exc_str:
- json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1]
- cleaned_json_part = json_part.replace('\\"', '"')
- error_data = json.loads(cleaned_json_part)
- task_index_name = error_data.get('index_name')
- except (json.JSONDecodeError, TypeError, IndexError):
- pass
+ error_data = self._extract_error_metadata_from_exc_message(
+ result.get("exc_message")
+ )
+ if error_data:
+ task_index_name = error_data.get('index_name')
if task_index_name == index_name:
key_str = key.decode('utf-8') if isinstance(key, bytes) else key
@@ -497,16 +489,12 @@ def _cleanup_document_celery_tasks(self, index_name: str, path_or_url: str) -> i
# Check for failed tasks where metadata is in the exception message
if task_index_name is None and 'exc_message' in result:
- try:
- exc_str = str(result['exc_message'])
- if '{' in exc_str and '}' in exc_str:
- json_part = exc_str[exc_str.find('{'):exc_str.rfind('}')+1]
- cleaned_json_part = json_part.replace('\\"', '"')
- error_data = json.loads(cleaned_json_part)
- task_index_name = error_data.get('index_name')
- task_source = error_data.get('source') or error_data.get('path_or_url')
- except (json.JSONDecodeError, TypeError, IndexError) as e:
- logger.warning(f"Could not parse exception metadata for task {task_id}: {e}")
+ error_data = self._extract_error_metadata_from_exc_message(
+ result.get("exc_message")
+ )
+ if error_data:
+ task_index_name = error_data.get('index_name')
+ task_source = error_data.get('source') or error_data.get('path_or_url')
# Match both index name and document path/source
if task_index_name == index_name and task_source == path_or_url:
@@ -666,13 +654,13 @@ def save_error_info(self, task_id: str, error_reason: str, ttl_days: int = 30) -
if not error_reason:
logger.error(f"Cannot save error info for task {task_id}: error_reason is empty")
return False
-
+
ttl_seconds = ttl_days * 24 * 60 * 60
reason_key = f"error:reason:{task_id}"
# Save error reason
result = self.client.setex(reason_key, ttl_seconds, error_reason)
-
+
if result:
logger.info(f"Successfully saved error info to Redis for task {task_id}, key: {reason_key}")
# Verify the save by reading it back
@@ -707,13 +695,13 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks:
if not task_id:
logger.error("Cannot save progress info: task_id is empty")
return False
-
+
progress_key = f"progress:{task_id}"
progress_data = {
'processed_chunks': processed_chunks,
'total_chunks': total_chunks
}
-
+
ttl_seconds = ttl_hours * 3600
progress_json = json.dumps(progress_data)
self.client.setex(
@@ -728,6 +716,122 @@ def save_progress_info(self, task_id: str, processed_chunks: int, total_chunks:
logger.error(f"Failed to save progress info for task {task_id}: {str(e)}")
return False
+ def increment_progress_info(self, task_id: str, delta_processed: int, total_chunks: Optional[int] = None, ttl_hours: int = 24) -> bool:
+ """
+ Atomically increment processed chunks for a task.
+ """
+ if not task_id:
+ logger.error("Cannot increment progress info: task_id is empty")
+ return False
+ if delta_processed <= 0:
+ return True
+
+ progress_key = f"progress:{task_id}"
+ ttl_seconds = ttl_hours * 3600
+ max_retries = 5
+
+ for attempt in range(max_retries):
+ pipe = self.client.pipeline()
+ try:
+ pipe.watch(progress_key)
+ raw = pipe.get(progress_key)
+ current_processed, current_total = self._parse_progress(raw, total_chunks)
+ new_processed, current_total = self._compute_next_progress(
+ current_processed=current_processed,
+ delta_processed=delta_processed,
+ current_total=current_total,
+ total_chunks=total_chunks,
+ )
+
+ payload = json.dumps({
+ "processed_chunks": new_processed,
+ "total_chunks": current_total,
+ })
+
+ pipe.multi()
+ pipe.setex(progress_key, ttl_seconds, payload)
+ pipe.execute()
+ logger.info(
+ f"[REDIS PROGRESS] Incremented progress for task {task_id}: "
+ f"+{delta_processed}, now {new_processed}/{current_total}"
+ )
+ return True
+ except redis.WatchError:
+ continue
+ except Exception as exc:
+ logger.warning(f"Failed to increment progress for task {task_id}: {exc}")
+ return False
+ finally:
+ pipe.reset()
+
+ logger.warning(f"Failed to increment progress for task {task_id}: too many concurrent updates")
+ return False
+
+ def _parse_progress(self, raw: Any, total_chunks: Optional[int]) -> Tuple[int, int]:
+ """
+ Parse persisted progress payload from Redis with tolerant fallback.
+ """
+ default_total = int(total_chunks or 0)
+ if not raw:
+ return 0, default_total
+
+ if isinstance(raw, bytes):
+ raw = raw.decode("utf-8")
+
+ try:
+ data = json.loads(raw)
+ processed = int(data.get("processed_chunks", 0) or 0)
+ total = default_total if total_chunks else int(data.get("total_chunks", 0) or 0)
+ return processed, total
+ except Exception:
+ return 0, default_total
+
+ def _compute_next_progress(
+ self,
+ current_processed: int,
+ delta_processed: int,
+ current_total: int,
+ total_chunks: Optional[int],
+ ) -> Tuple[int, int]:
+ """
+ Compute new processed/total values, clamping to known total when available.
+ """
+ next_processed = current_processed + int(delta_processed)
+ next_total = int(current_total or 0)
+
+ if next_total <= 0 and total_chunks:
+ next_total = int(total_chunks)
+
+ if next_total > 0:
+ next_processed = min(next_processed, next_total)
+
+ return next_processed, next_total
+
+ def _extract_error_metadata_from_exc_message(self, exc_message: Any) -> Optional[Dict[str, Any]]:
+ """
+ Try to parse embedded JSON metadata from exception message with tolerant escaping.
+ """
+ try:
+ exc_str = str(exc_message or "")
+ if "{" not in exc_str or "}" not in exc_str:
+ return None
+ json_part = exc_str[exc_str.find("{"): exc_str.rfind("}") + 1]
+ candidates = [
+ json_part,
+ json_part.replace('\\"', '"'),
+ re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_part),
+ ]
+ for candidate in candidates:
+ try:
+ parsed = json.loads(candidate)
+ if isinstance(parsed, dict):
+ return parsed
+ except Exception:
+ continue
+ return None
+ except Exception:
+ return None
+
def get_progress_info(self, task_id: str) -> Optional[Dict[str, int]]:
"""
Get progress information for a specific task
@@ -770,6 +874,79 @@ def get_error_info(self, task_id: str) -> Optional[str]:
f"Failed to get error info for task {task_id}: {str(e)}")
return None
+ def batch_get_progress_info(self, task_ids: List[str]) -> Dict[str, Optional[Dict[str, int]]]:
+ """
+ Batch get progress information for multiple tasks in a single Redis call.
+
+ Args:
+ task_ids: List of Celery task IDs
+
+ Returns:
+ Dict mapping task_id to progress info dict, or None if not found
+ """
+ if not task_ids:
+ return {}
+
+ try:
+ # Build list of keys
+ progress_keys = [f"progress:{tid}" for tid in task_ids]
+ # Use pipeline for batch operation
+ pipe = self.client.pipeline()
+ for key in progress_keys:
+ pipe.get(key)
+ results = pipe.execute()
+
+ # Build result dict
+ result = {}
+ for i, task_id in enumerate(task_ids):
+ progress_data = results[i]
+ if progress_data:
+ try:
+ if isinstance(progress_data, bytes):
+ progress_data = progress_data.decode('utf-8')
+ result[task_id] = json.loads(progress_data)
+ except (json.JSONDecodeError, TypeError):
+ result[task_id] = None
+ else:
+ result[task_id] = None
+ return result
+ except Exception as e:
+ logger.warning(f"Failed to batch get progress info: {str(e)}")
+ return {tid: None for tid in task_ids}
+
+ def batch_get_error_info(self, task_ids: List[str]) -> Dict[str, Optional[str]]:
+ """
+ Batch get error information for multiple tasks in a single Redis call.
+
+ Args:
+ task_ids: List of Celery task IDs
+
+ Returns:
+ Dict mapping task_id to error reason string, or None if not found
+ """
+ if not task_ids:
+ return {}
+
+ try:
+ # Build list of keys
+ error_keys = [f"error:reason:{tid}" for tid in task_ids]
+ # Use pipeline for batch operation
+ pipe = self.client.pipeline()
+ for key in error_keys:
+ pipe.get(key)
+ results = pipe.execute()
+
+ # Build result dict
+ result = {}
+ for i, task_id in enumerate(task_ids):
+ reason = results[i]
+ # With decode_responses=True, reason is already a string
+ result[task_id] = reason if reason else None
+ return result
+ except Exception as e:
+ logger.warning(f"Failed to batch get error info: {str(e)}")
+ return {tid: None for tid in task_ids}
+
# Global Redis service instance
_redis_service = None
diff --git a/backend/services/remote_mcp_service.py b/backend/services/remote_mcp_service.py
index ab0f0b04f..7e77a9c43 100644
--- a/backend/services/remote_mcp_service.py
+++ b/backend/services/remote_mcp_service.py
@@ -1,50 +1,78 @@
import logging
import os
import tempfile
-
+import asyncio
+import socket
+import random
from fastmcp import Client
from fastmcp.client.transports import StreamableHttpTransport, SSETransport
-
-from consts.const import CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ
-from consts.exceptions import MCPConnectionError, MCPNameIllegal
+from consts.const import CAN_EDIT_ALL_USER_ROLES, PERMISSION_EDIT, PERMISSION_READ, NEXENT_MCP_DOCKER_IMAGE
+from consts.exceptions import (
+ MCPConnectionError,
+ MCPNameIllegal,
+ MCPContainerError,
+ McpNotFoundError,
+ McpValidationError,
+ McpNameConflictError,
+ McpPortConflictError,
+)
+from consts.model import MCPConfigRequest
from database.remote_mcp_db import (
create_mcp_record,
- delete_mcp_record_by_name_and_url,
delete_mcp_record_by_container_id,
get_mcp_records_by_tenant,
check_mcp_name_exists,
+ check_enabled_mcp_name_exists,
update_mcp_status_by_name_and_url,
update_mcp_record_by_name_and_url,
+ update_mcp_record_manage_fields_by_id,
+ update_mcp_record_enabled_by_id,
+ update_mcp_record_container_fields_by_id,
+ update_mcp_record_status_by_id,
+ delete_mcp_record_by_id,
get_mcp_authorization_token_by_name_and_url,
get_mcp_record_by_id_and_tenant,
+ get_mcp_custom_headers_by_name_and_url,
)
from database.user_tenant_db import get_user_tenant_by_user_id
from services.mcp_container_service import MCPContainerManager
+from utils.http_client_utils import create_httpx_client
logger = logging.getLogger("remote_mcp_service")
-async def mcp_server_health(remote_mcp_server: str, authorization_token: str | None = None) -> bool:
+# ---------------------------------------------------------------------------
+# Health Check
+# ---------------------------------------------------------------------------
+
+async def mcp_server_health(remote_mcp_server: str, authorization_token: str | None = None, custom_headers: dict | None = None) -> bool:
+ """Check if an MCP server is healthy and reachable."""
try:
- # Select transport based on URL ending
url_stripped = remote_mcp_server.strip()
- headers = {"Authorization": authorization_token} if authorization_token else {}
+ headers = {}
+ if authorization_token:
+ headers["Authorization"] = authorization_token
+ if custom_headers:
+ headers.update(custom_headers)
if url_stripped.endswith("/sse"):
transport = SSETransport(
url=url_stripped,
- headers=headers
+ headers=headers,
+ httpx_client_factory=create_httpx_client
)
elif url_stripped.endswith("/mcp"):
transport = StreamableHttpTransport(
url=url_stripped,
- headers=headers
+ headers=headers,
+ httpx_client_factory=create_httpx_client
)
else:
# Default to StreamableHttpTransport for unrecognized formats
transport = StreamableHttpTransport(
url=url_stripped,
- headers=headers
+ headers=headers,
+ httpx_client_factory=create_httpx_client
)
client = Client(transport=transport)
@@ -52,11 +80,99 @@ async def mcp_server_health(remote_mcp_server: str, authorization_token: str | N
connected = client.is_connected()
return connected
except BaseException as e:
- logger.error(
- f"Remote MCP server health check failed: {e}", exc_info=True)
- # Prevent library-level exits (e.g., SystemExit) from crashing the service
- raise MCPConnectionError("MCP connection failed")
+ logger.error(f"Remote MCP server health check failed: {e}", exc_info=True)
+ error_message = str(e).strip() or repr(e)
+ if isinstance(e, (asyncio.TimeoutError, TimeoutError)) or "timeout" in error_message.lower():
+ raise MCPConnectionError("MCP_HEALTH_TIMEOUT")
+ raise MCPConnectionError(error_message)
+
+
+# ---------------------------------------------------------------------------
+# Helper Functions
+# ---------------------------------------------------------------------------
+
+def _is_container_record(record: dict | None) -> bool:
+ """Check if the MCP record is container-based.
+
+ A record is considered container-based if it has:
+ - container_id (Docker container ID)
+ - config_json (container configuration)
+ """
+ if not record:
+ return False
+ return record.get("container_id") is not None or record.get("config_json") is not None
+
+
+# ---------------------------------------------------------------------------
+# Port Management Functions
+# ---------------------------------------------------------------------------
+
+def check_container_port_conflict_records(port: int) -> bool:
+ """Check if there are enabled MCP records that already use the given container port."""
+ from database.remote_mcp_db import get_mcp_records_by_container_port
+ return not get_mcp_records_by_container_port(container_port=port)
+
+def check_runtime_host_port_available(port: int) -> bool:
+ """Return True when the host port is not occupied by a listener."""
+ probe_targets = [(socket.AF_INET, "127.0.0.1")]
+ if socket.has_ipv6:
+ probe_targets.append((socket.AF_INET6, "::1"))
+
+ try:
+ host_infos = socket.getaddrinfo("host.docker.internal", port, socket.AF_UNSPEC, socket.SOCK_STREAM)
+ for family, _, _, _, sockaddr in host_infos:
+ probe_targets.append((family, sockaddr[0]))
+ except OSError:
+ pass
+
+ for family, host in probe_targets:
+ try:
+ with socket.socket(family, socket.SOCK_STREAM) as probe_socket:
+ probe_socket.settimeout(0.2)
+ connect_result = probe_socket.connect_ex((host, port) if family == socket.AF_INET else (host, port, 0, 0))
+ if connect_result == 0:
+ logger.info(f"Host port {port} is already in use on {host}")
+ return False
+ except OSError:
+ continue
+
+ try:
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as bind_probe:
+ if hasattr(socket, "SO_EXCLUSIVEADDRUSE"):
+ bind_probe.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1)
+ else:
+ bind_probe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 0)
+ bind_probe.bind(("0.0.0.0", port))
+ bind_probe.listen(1)
+ return True
+ except OSError as exc:
+ logger.info(f"Host port {port} is already in use: {exc}")
+ return False
+
+
+def check_container_port_conflict(*, port: int) -> bool:
+ """Check if a port is available for MCP container."""
+ no_conflict_records = check_container_port_conflict_records(port=port)
+ runtime_available = check_runtime_host_port_available(port)
+ return no_conflict_records and runtime_available
+
+
+def suggest_container_port() -> int:
+ """Suggest an available port for MCP container."""
+ min_port = 2000
+ max_port = 50000
+ count = 0
+ while count < 1000:
+ port = random.randint(min_port, max_port)
+ if check_container_port_conflict(port=port):
+ return port
+ count += 1
+ raise McpPortConflictError("No available port found")
+
+# ---------------------------------------------------------------------------
+# Add Functions
+# ---------------------------------------------------------------------------
async def add_remote_mcp_server_list(
tenant_id: str,
@@ -65,48 +181,233 @@ async def add_remote_mcp_server_list(
remote_mcp_server_name: str,
container_id: str | None = None,
authorization_token: str | None = None,
+ custom_headers: dict | None = None,
+ source: str | None = "local",
+ container_port: int | None = None,
):
+ """Add a remote MCP server to the list.
- # check if MCP name already exists
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ remote_mcp_server: MCP server URL
+ remote_mcp_server_name: MCP service name
+ container_id: Docker container ID (optional)
+ authorization_token: Authorization token (optional)
+ custom_headers: Custom HTTP headers (optional)
+
+ Raises:
+ MCPNameIllegal: If MCP name already exists
+ MCPConnectionError: If MCP server is not reachable
+ """
if check_mcp_name_exists(mcp_name=remote_mcp_server_name, tenant_id=tenant_id):
- logger.error(
- f"MCP name already exists, tenant_id: {tenant_id}, remote_mcp_server_name: {remote_mcp_server_name}")
+ logger.error(f"MCP name already exists: {remote_mcp_server_name}")
raise MCPNameIllegal("MCP name already exists")
- # check if the address is available
- if not await mcp_server_health(remote_mcp_server=remote_mcp_server, authorization_token=authorization_token):
+ if not await mcp_server_health(remote_mcp_server=remote_mcp_server, authorization_token=authorization_token, custom_headers=custom_headers):
raise MCPConnectionError("MCP connection failed")
- # update the PG database record
insert_mcp_data = {
"mcp_name": remote_mcp_server_name,
"mcp_server": remote_mcp_server,
"status": True,
"container_id": container_id,
"authorization_token": authorization_token,
+ "custom_headers": custom_headers,
+ "source": source,
+ "container_port": container_port,
}
- create_mcp_record(mcp_data=insert_mcp_data,
- tenant_id=tenant_id, user_id=user_id)
+ create_mcp_record(mcp_data=insert_mcp_data, tenant_id=tenant_id, user_id=user_id)
-async def delete_remote_mcp_server_list(tenant_id: str,
- user_id: str,
- remote_mcp_server: str,
- remote_mcp_server_name: str):
- # delete the record in the PG database
- delete_mcp_record_by_name_and_url(mcp_name=remote_mcp_server_name,
- mcp_server=remote_mcp_server,
- tenant_id=tenant_id,
- user_id=user_id)
+async def add_mcp_service(
+ *,
+ tenant_id: str,
+ user_id: str,
+ name: str,
+ description: str | None,
+ source: str,
+ server_url: str,
+ tags: list | None,
+ authorization_token: str | None,
+ custom_headers: dict | None = None,
+ container_config: dict | None,
+ registry_json: dict | None,
+ enabled: bool = False,
+ container_id: str | None = None,
+ container_port: int | None = None,
+) -> None:
+ """Add an MCP service record.
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ name: MCP service name
+ description: MCP service description
+ source: Source type (local/mcp_registry/community)
+ server_url: MCP server URL
+ tags: MCP tags
+ authorization_token: Authorization token for MCP server
+ custom_headers: Custom HTTP headers
+ container_config: Container configuration
+ registry_json: Registry metadata JSON
+ enabled: Whether the MCP is enabled
+ container_id: Docker container ID
+ container_port: Container port
+ """
+ status: bool | None = None
+ normalized_container_id = container_id if isinstance(container_id, str) and container_id else None
+ is_container = container_id is not None or container_config is not None
+ config_json = container_config if is_container and isinstance(container_config, dict) else None
+
+ if enabled:
+ if check_mcp_name_exists(mcp_name=name, tenant_id=tenant_id):
+ logger.error(f"MCP name already exists: {name}")
+ raise MCPNameIllegal("MCP name already exists")
-async def update_remote_mcp_server_list(
- update_data,
+ if not await mcp_server_health(remote_mcp_server=server_url, authorization_token=authorization_token, custom_headers=custom_headers):
+ raise MCPConnectionError("MCP connection failed")
+
+ status = True
+
+ create_mcp_record(
+ mcp_data={
+ "mcp_name": name,
+ "mcp_server": server_url,
+ "status": status,
+ "container_id": normalized_container_id,
+ "container_port": container_port,
+ "authorization_token": authorization_token,
+ "custom_headers": custom_headers,
+ "source": source,
+ "registry_json": registry_json,
+ "enabled": enabled,
+ "tags": tags,
+ "description": description,
+ "config_json": config_json,
+ },
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
+
+async def add_container_mcp_service(
+ *,
tenant_id: str,
user_id: str,
-):
+ name: str,
+ description: str | None,
+ source: str,
+ tags: list | None,
+ authorization_token: str | None,
+ registry_json: dict | None,
+ port: int,
+ mcp_config: MCPConfigRequest,
+) -> dict:
+ """Add a container-based MCP service.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ name: MCP service name
+ description: MCP service description
+ source: Source type
+ tags: MCP tags
+ authorization_token: Authorization token
+ registry_json: Registry metadata JSON
+ port: Host port for the container
+ mcp_config: MCP server configuration
+
+ Returns:
+ Container information dictionary
"""
- Update an existing remote MCP server record.
+ service_name = name
+ if check_mcp_name_exists(mcp_name=service_name, tenant_id=tenant_id):
+ raise McpNameConflictError("Enabled MCP name already exists")
+
+ if not check_container_port_conflict(port=port):
+ raise McpPortConflictError(f"Port {port} is already in use")
+
+ servers = mcp_config.mcpServers
+ if len(servers) != 1:
+ raise McpValidationError("Exactly one mcpServers entry is required")
+
+ _, config = next(iter(servers.items()))
+ command = config.command
+ if not command:
+ raise McpValidationError("command is required")
+ if command.strip().lower() == "docker":
+ raise McpValidationError("Docker command is not supported")
+
+ env_vars = dict(config.env or {})
+ auth_token = authorization_token
+ if auth_token:
+ env_vars["authorization_token"] = auth_token
+
+ full_command = [
+ "python",
+ "-m",
+ "mcp_proxy",
+ "--host",
+ "0.0.0.0",
+ "--port",
+ str(port),
+ "--transport",
+ "streamablehttp",
+ "--",
+ command,
+ *(config.args or []),
+ ]
+
+ container_manager = MCPContainerManager()
+ try:
+ container_info = await container_manager.start_mcp_container(
+ service_name=service_name,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ env_vars=env_vars,
+ host_port=port,
+ image=NEXENT_MCP_DOCKER_IMAGE,
+ full_command=full_command,
+ )
+ logger.info(f"Started MCP container with info: {container_info}")
+
+ container_config = mcp_config.model_dump(exclude_none=True)
+
+ await add_mcp_service(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=service_name,
+ description=description,
+ source=source,
+ server_url=container_info.get("mcp_url"),
+ tags=tags,
+ authorization_token=auth_token,
+ container_config=container_config,
+ registry_json=registry_json,
+ enabled=True,
+ container_id=container_info.get("container_id"),
+ container_port=container_info.get("host_port"),
+ )
+ except Exception as exc:
+ logger.warning(f"Failed to start container MCP service: {exc}")
+ raise
+
+ return {
+ "service_name": service_name,
+ "mcp_url": container_info.get("mcp_url"),
+ "container_id": container_info.get("container_id"),
+ "container_name": container_info.get("container_name"),
+ "host_port": container_info.get("host_port"),
+ }
+
+
+# ---------------------------------------------------------------------------
+# Update Functions
+# ---------------------------------------------------------------------------
+
+async def update_remote_mcp_server_list(update_data, tenant_id: str, user_id: str) -> None:
+ """Update an existing remote MCP server record.
Args:
update_data: MCPUpdateRequest containing current and new values
@@ -114,40 +415,31 @@ async def update_remote_mcp_server_list(
user_id: User ID
Raises:
- MCPNameIllegal: If the new MCP name already exists (and is different from current)
+ MCPNameIllegal: If the new MCP name already exists
MCPConnectionError: If the new MCP server URL is not accessible
"""
- # Check if the current record exists by verifying the name exists for this tenant
if not check_mcp_name_exists(mcp_name=update_data.current_service_name, tenant_id=tenant_id):
- logger.error(
- f"MCP name does not exist, tenant_id: {tenant_id}, current_mcp_server_name: {update_data.current_service_name}")
raise MCPNameIllegal("MCP name does not exist")
- # If the new name is different from the current name, check if it already exists
if update_data.new_service_name != update_data.current_service_name:
if check_mcp_name_exists(mcp_name=update_data.new_service_name, tenant_id=tenant_id):
- logger.error(
- f"New MCP name already exists, tenant_id: {tenant_id}, new_mcp_server_name: {update_data.new_service_name}")
raise MCPNameIllegal("New MCP name already exists")
- # User authorization token
authorization_token = update_data.new_authorization_token
+ custom_headers = getattr(update_data, 'custom_headers', None)
- # Check if the new server URL is accessible
try:
status = await mcp_server_health(
remote_mcp_server=update_data.new_mcp_url,
- authorization_token=authorization_token
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
)
except BaseException:
status = False
if not status:
- logger.error(
- f"New MCP server health check failed: {update_data.new_mcp_url}")
raise MCPConnectionError("New MCP server connection failed")
- # Update the database record
update_mcp_record_by_name_and_url(
update_data=update_data,
tenant_id=tenant_id,
@@ -156,7 +448,309 @@ async def update_remote_mcp_server_list(
)
-async def get_remote_mcp_server_list(tenant_id: str, user_id: str | None = None, is_need_auth: bool = True) -> list[dict]:
+def update_mcp_service(
+ *,
+ tenant_id: str,
+ user_id: str,
+ mcp_id: int,
+ new_name: str,
+ description: str | None,
+ server_url: str,
+ authorization_token: str | None,
+ custom_headers: dict | None,
+ tags: list | None,
+) -> None:
+ """Update an MCP service record by ID.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ mcp_id: MCP record ID
+ new_name: New MCP service name
+ description: MCP service description
+ server_url: New MCP server URL
+ authorization_token: Authorization token
+ custom_headers: Custom HTTP headers
+ tags: MCP tags
+
+ Raises:
+ McpNotFoundError: If MCP record is not found
+ """
+ current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not current_record:
+ raise McpNotFoundError("MCP record not found")
+
+ is_container = _is_container_record(current_record)
+ config_json = None
+ if is_container:
+ config_json = current_record.get("config_json") if isinstance(current_record.get("config_json"), dict) else None
+
+ update_mcp_record_manage_fields_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ name=new_name,
+ description=description,
+ server_url=server_url,
+ source=(current_record.get("source") or "local"),
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
+ config_json=config_json,
+ tags=tags,
+ )
+
+
+async def update_mcp_service_enabled(
+ *,
+ tenant_id: str,
+ user_id: str,
+ mcp_id: int,
+ enabled: bool,
+) -> None:
+ """Enable or disable an MCP service.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ mcp_id: MCP record ID
+ enabled: True to enable, False to disable
+
+ Raises:
+ McpNotFoundError: If MCP record is not found
+ McpNameConflictError: If an enabled service with the same name exists
+ McpPortConflictError: If the container port is not available
+ MCPConnectionError: If MCP connection fails
+ """
+ current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not current_record:
+ raise McpNotFoundError("MCP record not found")
+
+ if enabled:
+ current_name = current_record.get("mcp_name")
+ if current_name:
+ records = get_mcp_records_by_tenant(tenant_id=tenant_id)
+ for record in records:
+ if int(record.get("mcp_id") or 0) == mcp_id:
+ continue
+ record_name = record.get("mcp_name")
+ is_enabled = bool(record.get("enabled"))
+ if is_enabled and record_name == current_name:
+ raise McpNameConflictError("An enabled service already uses this name")
+
+ authorization_token = current_record.get("authorization_token")
+ custom_headers = current_record.get("custom_headers") if isinstance(current_record.get("custom_headers"), dict) else None
+
+ if _is_container_record(current_record):
+ if enabled:
+ port = current_record.get("container_port")
+ if port is None:
+ raise McpValidationError("Container port is missing, cannot rebuild container")
+ if not check_runtime_host_port_available(port):
+ raise McpPortConflictError(f"Port {port} is already in use")
+
+ config_json = current_record.get("config_json")
+ if not isinstance(config_json, dict):
+ raise McpValidationError("Container configuration is missing, cannot rebuild container")
+
+ try:
+ mcp_config = MCPConfigRequest(**config_json)
+ except Exception as exc:
+ raise McpValidationError(f"Invalid container configuration: {exc}")
+
+ servers = mcp_config.mcpServers
+ if not servers or len(servers) != 1:
+ raise McpValidationError("Exactly one mcpServers entry is required")
+ _, config = next(iter(servers.items()))
+ command = config.command
+ if not command:
+ raise McpValidationError("command is required")
+
+ env_vars = dict(config.env or {})
+ if authorization_token:
+ env_vars["authorization_token"] = authorization_token
+
+ full_command = [
+ "python",
+ "-m",
+ "mcp_proxy",
+ "--host",
+ "0.0.0.0",
+ "--port",
+ str(port),
+ "--transport",
+ "streamablehttp",
+ "--",
+ command,
+ *(config.args or []),
+ ]
+
+ container_manager = MCPContainerManager()
+ container_info = await container_manager.start_mcp_container(
+ service_name=current_record.get("mcp_name"),
+ tenant_id=tenant_id,
+ user_id=user_id,
+ env_vars=env_vars,
+ host_port=port,
+ image=NEXENT_MCP_DOCKER_IMAGE,
+ full_command=full_command,
+ )
+
+ next_server_url = container_info.get("mcp_url")
+ next_container_id = container_info.get("container_id")
+ next_container_port = container_info.get("host_port") or port
+
+ health_ok = False
+ MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS = 10
+ MCP_CONTAINER_HEALTH_CHECK_DELAY_SECONDS = 0.5
+ for attempt in range(MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS):
+ try:
+ health_ok = await mcp_server_health(
+ remote_mcp_server=next_server_url,
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
+ )
+ except MCPConnectionError:
+ health_ok = False
+ if health_ok:
+ break
+ if attempt < MCP_CONTAINER_HEALTH_CHECK_ATTEMPTS - 1:
+ await asyncio.sleep(MCP_CONTAINER_HEALTH_CHECK_DELAY_SECONDS)
+
+ if not health_ok:
+ if next_container_id:
+ try:
+ await MCPContainerManager().stop_mcp_container(next_container_id)
+ except Exception as exc:
+ logger.warning(f"Failed to stop unhealthy container {next_container_id}: {exc}")
+ update_mcp_record_container_fields_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ container_id=None,
+ container_port=port,
+ mcp_server=next_server_url,
+ status=False,
+ )
+ raise MCPConnectionError("MCP connection failed")
+
+ update_mcp_record_container_fields_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ container_id=next_container_id,
+ container_port=next_container_port,
+ mcp_server=next_server_url,
+ status=True,
+ )
+ else:
+ current_container_id = current_record.get("container_id")
+ if current_container_id and current_record.get("config_json"):
+ try:
+ manager = MCPContainerManager()
+ await manager.stop_mcp_container(current_container_id)
+ except Exception as exc:
+ logger.warning(f"Failed to stop container {current_container_id}: {exc}")
+ update_mcp_record_container_fields_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ container_id=None,
+ container_port=current_record.get("container_port"),
+ mcp_server=current_record.get("mcp_server"),
+ status=None,
+ )
+ elif enabled:
+ server_url = current_record.get("mcp_server")
+ health_ok = await mcp_server_health(
+ remote_mcp_server=server_url,
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
+ )
+ update_mcp_record_status_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ status=bool(health_ok),
+ )
+ if not health_ok:
+ raise MCPConnectionError("MCP connection failed")
+
+ update_mcp_record_enabled_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ enabled=enabled,
+ )
+
+
+# ---------------------------------------------------------------------------
+# Delete Functions
+# ---------------------------------------------------------------------------
+
+async def delete_mcp_service(
+ *,
+ tenant_id: str,
+ user_id: str,
+ mcp_id: int,
+) -> None:
+ """Delete an MCP service by ID.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ mcp_id: MCP record ID
+
+ Raises:
+ McpNotFoundError: If MCP record is not found
+ """
+ current_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not current_record:
+ raise McpNotFoundError("MCP record not found")
+ container_id = current_record.get("container_id")
+ if container_id:
+ try:
+ manager = MCPContainerManager()
+ await manager.stop_mcp_container(container_id=container_id)
+ except Exception as exc:
+ logger.warning(f"Failed to stop container: {exc}, but continue to delete MCP record")
+
+ delete_mcp_record_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
+
+async def delete_mcp_by_container_id(tenant_id: str, user_id: str, container_id: str) -> None:
+ """Soft delete MCP record associated with a specific container ID."""
+ delete_mcp_record_by_container_id(
+ container_id=container_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+
+
+# ---------------------------------------------------------------------------
+# List Functions
+# ---------------------------------------------------------------------------
+
+async def get_remote_mcp_server_list(
+ tenant_id: str,
+ user_id: str | None = None,
+ is_need_auth: bool = True,
+) -> list[dict]:
+ """Get list of remote MCP servers with full details.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID for permission checking
+ is_need_auth: Whether to include authorization tokens
+
+ Returns:
+ List of MCP server records with all fields including container_id, description,
+ enabled, source, update_time, tags, container_port, registry_json, config_json,
+ container_status, and authorization_token
+ """
mcp_records = get_mcp_records_by_tenant(tenant_id=tenant_id)
mcp_records_list = []
can_edit_all = False
@@ -165,23 +759,60 @@ async def get_remote_mcp_server_list(tenant_id: str, user_id: str | None = None,
user_role = str(user_tenant_record.get("user_role") or "").upper()
can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES
+ container_status_map = {}
+ try:
+ manager = MCPContainerManager()
+ for container in manager.list_mcp_containers(tenant_id=tenant_id):
+ container_id = container.get("container_id")
+ status = container.get("status")
+ if not container_id:
+ continue
+ if status == "running":
+ container_status_map[container_id] = "running"
+ elif status:
+ container_status_map[container_id] = "stopped"
+ except Exception as exc:
+ logger.warning(f"Failed to load container runtime status: {exc}")
+
for record in mcp_records:
created_by = record.get("created_by") or record.get("user_id")
if user_id is None:
permission = PERMISSION_READ
else:
- permission = PERMISSION_EDIT if can_edit_all or str(
- created_by) == str(user_id) else PERMISSION_READ
+ permission = PERMISSION_EDIT if can_edit_all or str(created_by) == str(user_id) else PERMISSION_READ
+
+ config_json = record.get("config_json")
+ container_id = record.get("container_id")
+
+ is_container = container_id is not None or config_json is not None
+
+ container_status = None
+ if is_container:
+ if container_id:
+ container_status = container_status_map.get(container_id, "stopped")
+ else:
+ container_status = "stopped"
record_dict = {
"remote_mcp_server_name": record["mcp_name"],
"remote_mcp_server": record["mcp_server"],
- "status": record["status"],
+ "status": record.get("status"),
"permission": permission,
"mcp_id": record.get("mcp_id"),
+ "container_id": container_id,
+ "description": record.get("description"),
+ "enabled": record.get("enabled"),
+ "source": record.get("source"),
+ "update_time": record.get("update_time"),
+ "tags": record.get("tags") or [],
+ "container_port": record.get("container_port"),
+ "registry_json": record.get("registry_json"),
+ "config_json": record.get("config_json"),
+ "container_status": container_status,
}
if is_need_auth:
record_dict["authorization_token"] = record.get("authorization_token")
+ record_dict["custom_headers"] = record.get("custom_headers")
mcp_records_list.append(record_dict)
return mcp_records_list
@@ -192,13 +823,15 @@ def attach_mcp_container_permissions(
tenant_id: str,
user_id: str | None = None,
) -> list[dict]:
- """
- Attach permission (EDIT/READ) to each MCP container entry.
+ """Attach permission (EDIT/READ) to each MCP container entry.
+
+ Args:
+ containers: List of container records
+ tenant_id: Tenant ID
+ user_id: User ID for permission checking
- Rules:
- - If user's role is in CAN_EDIT_ALL_USER_ROLES => EDIT for all containers
- - Otherwise => EDIT only if the container is associated with an MCP record created by this user
- - If association cannot be determined => default to READ
+ Returns:
+ List of containers with permission field added
"""
if not containers:
return []
@@ -208,19 +841,17 @@ def attach_mcp_container_permissions(
user_role = str(user_tenant_record.get("user_role") or "").upper()
can_edit_all = user_role in CAN_EDIT_ALL_USER_ROLES
- created_by_by_container_id: dict[str, str] = {}
+ created_by_by_container_id = {}
try:
for record in get_mcp_records_by_tenant(tenant_id=tenant_id) or []:
cid = record.get("container_id")
if not cid:
continue
- created_by_by_container_id[str(cid)] = str(
- record.get("created_by") or record.get("user_id") or ""
- )
+ created_by_by_container_id[str(cid)] = str(record.get("created_by") or record.get("user_id") or "")
except Exception as e:
logger.warning(f"Failed to load MCP records for permission mapping: {e}")
- enriched: list[dict] = []
+ enriched = []
for container in containers:
container_id = str(container.get("container_id") or "")
created_by = created_by_by_container_id.get(container_id, "")
@@ -228,77 +859,196 @@ def attach_mcp_container_permissions(
if user_id is None:
permission = PERMISSION_READ
else:
- permission = PERMISSION_EDIT if can_edit_all or (
- created_by and str(created_by) == str(user_id)
- ) else PERMISSION_READ
+ permission = PERMISSION_EDIT if can_edit_all or (created_by and str(created_by) == str(user_id)) else PERMISSION_READ
enriched.append({**container, "permission": permission})
return enriched
-async def check_mcp_health_and_update_db(mcp_url, service_name, tenant_id, user_id):
- # Get authorization token from database
+async def get_mcp_record_by_id(mcp_id: int, tenant_id: str) -> dict | None:
+ """Get MCP record by ID.
+
+ Args:
+ mcp_id: MCP record ID
+ tenant_id: Tenant ID
+
+ Returns:
+ Dictionary containing mcp_name, mcp_server, authorization_token, and custom_headers, or None if not found
+ """
+ mcp_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not mcp_record:
+ return None
+
+ return {
+ "mcp_name": mcp_record.get("mcp_name"),
+ "mcp_server": mcp_record.get("mcp_server"),
+ "authorization_token": mcp_record.get("authorization_token"),
+ "custom_headers": mcp_record.get("custom_headers"),
+ }
+
+
+# ---------------------------------------------------------------------------
+# Health Check Functions
+# ---------------------------------------------------------------------------
+
+async def check_mcp_health_and_update_db(mcp_url, service_name, tenant_id, user_id) -> None:
+ """Check MCP health and update database status.
+
+ Args:
+ mcp_url: MCP server URL
+ service_name: MCP service name
+ tenant_id: Tenant ID
+ user_id: User ID
+
+ Raises:
+ MCPConnectionError: If MCP connection fails
+ """
authorization_token = get_mcp_authorization_token_by_name_and_url(
mcp_name=service_name,
mcp_server=mcp_url,
tenant_id=tenant_id
)
+ custom_headers = get_mcp_custom_headers_by_name_and_url(
+ mcp_name=service_name,
+ mcp_server=mcp_url,
+ tenant_id=tenant_id
+ )
- # check the health of the MCP server
try:
status = await mcp_server_health(
remote_mcp_server=mcp_url,
- authorization_token=authorization_token
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
)
except BaseException:
status = False
- # update the status of the MCP server in the database
+
update_mcp_status_by_name_and_url(
mcp_name=service_name,
mcp_server=mcp_url,
tenant_id=tenant_id,
user_id=user_id,
- status=status)
+ status=status
+ )
if not status:
raise MCPConnectionError("MCP connection failed")
-async def delete_mcp_by_container_id(tenant_id: str, user_id: str, container_id: str):
- """
- Soft delete MCP record associated with a specific container ID.
+async def check_mcp_service_health(
+ *,
+ tenant_id: str,
+ user_id: str,
+ mcp_id: int,
+) -> str:
+ """Check MCP service health by ID.
+
+ Args:
+ tenant_id: Tenant ID
+ user_id: User ID
+ mcp_id: MCP record ID
+
+ Returns:
+ "healthy" if MCP is reachable
- This is used when stopping a containerized MCP so that the MCP record and
- its container are removed together.
+ Raises:
+ McpNotFoundError: If MCP record is not found
+ McpValidationError: If MCP server URL is empty
+ MCPConnectionError: If MCP connection fails
"""
- delete_mcp_record_by_container_id(
- container_id=container_id,
+ record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not record:
+ raise McpNotFoundError("MCP record not found")
+
+ server_url = record.get("mcp_server")
+ if not server_url:
+ raise McpValidationError("MCP server URL is empty")
+
+ authorization_token = record.get("authorization_token")
+ custom_headers = record.get("custom_headers")
+
+ try:
+ status = await mcp_server_health(
+ remote_mcp_server=server_url,
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
+ )
+ except MCPConnectionError:
+ update_mcp_record_status_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ status=False,
+ )
+ raise
+ except Exception as exc:
+ logger.error(f"MCP health check failed: {exc}")
+ update_mcp_record_status_by_id(
+ mcp_id=mcp_id,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ status=False,
+ )
+ raise MCPConnectionError(str(exc) or "MCP connection failed")
+
+ update_mcp_record_status_by_id(
+ mcp_id=mcp_id,
tenant_id=tenant_id,
user_id=user_id,
+ status=status,
)
+ if not status:
+ raise MCPConnectionError("MCP connection failed")
+
+ return "healthy"
-async def get_mcp_record_by_id(mcp_id: int, tenant_id: str) -> dict | None:
- """
- Get MCP record by ID
+
+# ---------------------------------------------------------------------------
+# Tool Functions
+# ---------------------------------------------------------------------------
+
+async def list_mcp_service_tools_by_id(*, tenant_id: str, mcp_id: int) -> list[dict]:
+ """Get tools from an MCP service by ID.
Args:
- mcp_id: MCP record ID
tenant_id: Tenant ID
+ mcp_id: MCP record ID
Returns:
- Dictionary containing mcp_name, mcp_server, and authorization_token, or None if not found
+ List of tool dictionaries
+
+ Raises:
+ McpNotFoundError: If MCP record is not found
+ McpValidationError: If MCP record is missing connection fields
+ MCPConnectionError: If MCP connection fails
"""
- mcp_record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
- if not mcp_record:
- return None
+ record = get_mcp_record_by_id_and_tenant(mcp_id=mcp_id, tenant_id=tenant_id)
+ if not record:
+ raise McpNotFoundError("MCP record not found")
- return {
- "mcp_name": mcp_record.get("mcp_name"),
- "mcp_server": mcp_record.get("mcp_server"),
- "authorization_token": mcp_record.get("authorization_token"),
- }
+ service_name = record.get("mcp_name")
+ server_url = record.get("mcp_server")
+ if not service_name or not server_url:
+ raise McpValidationError("MCP record is missing runtime connection fields")
+ authorization_token = record.get("authorization_token")
+ custom_headers = record.get("custom_headers")
+
+ from services.tool_configuration_service import get_tool_from_remote_mcp_server
+ tools_info = await get_tool_from_remote_mcp_server(
+ mcp_server_name=service_name,
+ remote_mcp_server=server_url,
+ tenant_id=tenant_id,
+ authorization_token=authorization_token,
+ custom_headers=custom_headers,
+ )
+ return [tool.__dict__ for tool in tools_info]
+
+
+# ---------------------------------------------------------------------------
+# Image Upload Functions
+# ---------------------------------------------------------------------------
async def upload_and_start_mcp_image(
tenant_id: str,
@@ -308,69 +1058,56 @@ async def upload_and_start_mcp_image(
port: int,
service_name: str | None = None,
env_vars: str | None = None,
-):
- """
- Upload MCP Docker image and start container.
+) -> dict:
+ """Upload MCP Docker image and start container.
Args:
- tenant_id: Tenant ID for isolation
- user_id: User ID for isolation
+ tenant_id: Tenant ID
+ user_id: User ID
file_content: Raw file content bytes
filename: Original filename
port: Host port to expose the MCP server on
- service_name: Optional name for the MCP service (auto-generated if not provided)
+ service_name: Optional name for the MCP service
env_vars: Optional environment variables as JSON string
Returns:
- Dictionary with service details including mcp_url, container_id, etc.
+ Dictionary with service details
Raises:
MCPContainerError: If container operations fail
MCPNameIllegal: If service name already exists
ValueError: If file validation fails
"""
- # Validate file type
if not filename.lower().endswith('.tar'):
raise ValueError("Only .tar files are allowed")
- # Validate file size (limit to 1GB)
file_size = len(file_content)
- if file_size > 1024 * 1024 * 1024: # 1GB limit
+ if file_size > 1024 * 1024 * 1024:
raise ValueError("File size exceeds 1GB limit")
- # Parse environment variables
parsed_env_vars = None
if env_vars:
+ import json
try:
- import json
parsed_env_vars = json.loads(env_vars)
if not isinstance(parsed_env_vars, dict):
raise ValueError("Environment variables must be a JSON object")
except (json.JSONDecodeError, ValueError) as e:
raise ValueError(f"Invalid environment variables format: {str(e)}")
- # Generate service name if not provided
final_service_name = service_name
if not final_service_name:
- # Remove .tar extension from filename
final_service_name = os.path.splitext(filename)[0]
- # Check if MCP service name already exists
if check_mcp_name_exists(mcp_name=final_service_name, tenant_id=tenant_id):
raise MCPNameIllegal("MCP service name already exists")
- # Save file to temporary location (delete=False, manual cleanup)
with tempfile.NamedTemporaryFile(delete=False, suffix='.tar') as temp_file:
temp_file.write(file_content)
temp_file_path = temp_file.name
try:
- # Initialize container manager
container_manager = MCPContainerManager()
-
- # Start container from uploaded image
- # Note: uploaded image should be a complete MCP server implementation
- # that can be started directly without additional commands (uses image's CMD/ENTRYPOINT)
container_info = await container_manager.start_mcp_container_from_tar(
tar_file_path=temp_file_path,
service_name=final_service_name,
@@ -378,22 +1115,18 @@ async def upload_and_start_mcp_image(
user_id=user_id,
env_vars=parsed_env_vars,
host_port=port,
- full_command=None, # Uploaded image should contain the MCP server
+ full_command=None,
)
finally:
- # Manual cleanup of temporary file
try:
os.unlink(temp_file_path)
except Exception as e:
- logger.warning(
- f"Failed to clean up temporary file {temp_file_path}: {e}")
+ logger.warning(f"Failed to clean up temporary file {temp_file_path}: {e}")
- # Extract authorization_token from env_vars for database registration
authorization_token = None
if parsed_env_vars:
authorization_token = parsed_env_vars.get("authorization_token")
- # Register to remote MCP server list
await add_remote_mcp_server_list(
tenant_id=tenant_id,
user_id=user_id,
@@ -401,6 +1134,7 @@ async def upload_and_start_mcp_image(
remote_mcp_server_name=final_service_name,
container_id=container_info["container_id"],
authorization_token=authorization_token,
+ container_port=port
)
return {
diff --git a/backend/services/skill_service.py b/backend/services/skill_service.py
index cf47b4df4..f5b7d1c7c 100644
--- a/backend/services/skill_service.py
+++ b/backend/services/skill_service.py
@@ -1,19 +1,32 @@
"""Skill management service."""
+import aiofiles
+import argparse
+import ast
+import asyncio
+import inspect
import io
import json
import logging
import os
-from typing import Any, Dict, List, Optional, Union
+import uuid
+import zipfile
+import re
+import threading
+from typing import Any, Dict, List, Optional, Tuple, Union
import yaml
from nexent.skills import SkillManager
from nexent.skills.skill_loader import SkillLoader
-from consts.const import CONTAINER_SKILLS_PATH, ROOT_DIR
+from nexent.core.utils.observer import MessageObserver
+from nexent.core.agents.agent_model import ModelConfig
+from consts.const import CONTAINER_SKILLS_PATH, OFFICIAL_SKILLS_ZIP_PATH, ROOT_DIR
from consts.exceptions import SkillException
from database import skill_db
-from database.db_models import SkillInfo
+from agents.skill_creation_agent import create_skill_from_request
+from utils.prompt_template_utils import get_skill_creation_simple_prompt_template
+from utils.content_classifier_utils import ContentClassifier
logger = logging.getLogger(__name__)
@@ -246,6 +259,51 @@ def _commented_tree_to_plain(node: Any) -> Any:
return node
+def _ruamel_tree_to_plain(node: Any) -> Any:
+ """Convert ruamel CommentedMap/Seq to plain dict/list with NO comment merging.
+
+ Used for parsing config.yaml into config_values where the value must be clean
+ (e.g. ``/mnt/nexent`` not ``/mnt/nexent # Initial workspace path``).
+ """
+ from ruamel.yaml.comments import CommentedMap, CommentedSeq
+
+ if isinstance(node, CommentedMap):
+ return {k: _ruamel_tree_to_plain(v) for k, v in node.items()}
+ if isinstance(node, CommentedSeq):
+ return [_ruamel_tree_to_plain(v) for v in node]
+ return node
+
+
+def _parse_yaml_ruamel_plain(text: str) -> Dict[str, Any]:
+ """Parse YAML with ruamel round-trip and return plain dict (no comment merging).
+
+ Used for ``config.yaml`` → ``config_values`` where scalar values must be clean.
+ """
+ from ruamel.yaml import YAML
+ from ruamel.yaml.comments import CommentedMap
+
+ y = YAML(typ="rt")
+ try:
+ root = y.load(text)
+ except Exception as exc:
+ raise SkillException(f"Invalid YAML in config/config.yaml: {exc}") from exc
+ if root is None:
+ return {}
+ if isinstance(root, CommentedMap):
+ plain = _ruamel_tree_to_plain(root)
+ elif isinstance(root, dict):
+ plain = root
+ else:
+ raise SkillException(
+ "config/config.yaml must contain a JSON or YAML object (mapping), not a list or scalar"
+ )
+ if not isinstance(plain, dict):
+ raise SkillException(
+ "config/config.yaml must contain a JSON or YAML object (mapping), not a list or scalar"
+ )
+ return _params_dict_to_storable(plain)
+
+
def _parse_yaml_with_ruamel_merge_eol_comments(text: str) -> Dict[str, Any]:
"""Parse YAML with ruamel; merge ``#`` into scalar values only (``value # tip`` for the UI).
@@ -279,6 +337,189 @@ def _parse_yaml_with_ruamel_merge_eol_comments(text: str) -> Dict[str, Any]:
return _params_dict_to_storable(plain)
+def _get_skill_inputs_from_code(scripts_dir: str) -> List[Dict[str, Any]]:
+ """Extract argparse parameters from skill scripts using AST analysis.
+
+ Walks every ``scripts/*.py`` file (skipping ``_*.py``) and uses AST to find
+ all ``parser.add_argument(...)`` calls anywhere in the file, including inside
+ function bodies and ``if __name__ == "__main__":`` blocks.
+
+ Mirrors ``get_local_tools()`` in tool_configuration_service.py.
+
+ Args:
+ scripts_dir: Absolute path to the skill's ``scripts/`` directory.
+
+ Returns:
+ List of input parameter dicts with name, type, required, description, default.
+ """
+ inputs: List[Dict[str, Any]] = []
+ seen_names: set = set()
+
+ if not os.path.isdir(scripts_dir):
+ return inputs
+
+ for filename in os.listdir(scripts_dir):
+ if not filename.endswith(".py") or filename.startswith("_"):
+ continue
+
+ script_path = os.path.join(scripts_dir, filename)
+ try:
+ source = open(script_path, "r", encoding="utf-8").read()
+ except (OSError, IOError):
+ continue
+
+ try:
+ tree = ast.parse(source, filename=filename)
+ except SyntaxError:
+ continue
+
+ for node in ast.walk(tree):
+ if not isinstance(node, ast.Call):
+ continue
+ if not _is_add_argument_call(node):
+ continue
+
+ parsed = _extract_arg_from_add_argument(node)
+ if not parsed:
+ continue
+
+ param_name = parsed["name"]
+ if param_name in ("help", "h") or param_name in seen_names:
+ continue
+ seen_names.add(param_name)
+
+ inputs.append({
+ "name": param_name,
+ "type": parsed["type"],
+ "required": parsed["required"],
+ "description_en": parsed.get("description_en", ""),
+ })
+
+ return inputs
+
+
+def _is_add_argument_call(node: ast.Call) -> bool:
+ """Return True if node is a call to ``.add_argument(...)``."""
+ if not isinstance(node.func, ast.Attribute):
+ return False
+ if node.func.attr != "add_argument":
+ return False
+ if isinstance(node.func.value, ast.Name) and node.func.value.id == "parser":
+ return True
+ if isinstance(node.func.value, ast.Attribute):
+ return True
+ return False
+
+
+def _extract_arg_from_add_argument(node: ast.Call) -> Optional[Dict[str, Any]]:
+ """Extract parameter metadata from an ``add_argument`` Call AST node."""
+ args = node.args
+ kwargs = {kw.arg: kw.value for kw in node.keywords}
+
+ # Positional arg 0 = name or first positional arg (--name / name)
+ name_node = args[0] if args else kwargs.get("name")
+ if name_node is None:
+ return None
+ param_name = _ast_literal_eval(name_node)
+ if not param_name or not isinstance(param_name, str):
+ return None
+
+ # --name style
+ if param_name.startswith("--"):
+ param_name = param_name[2:]
+ elif param_name.startswith("-"):
+ param_name = param_name[1:]
+
+ # Determine type
+ param_type = "string"
+ type_node = kwargs.get("type")
+ if type_node is not None:
+ type_name = _get_type_name(type_node)
+ if type_name in ("int", "integer"):
+ param_type = "number"
+ elif type_name in ("float",):
+ param_type = "number"
+ elif type_name in ("bool",):
+ param_type = "boolean"
+
+ # Description
+ help_node = kwargs.get("help")
+ description = ""
+ if help_node is not None:
+ val = _ast_literal_eval(help_node)
+ if isinstance(val, str):
+ description = val
+
+ # Required / default
+ required = False
+ default: Any = None
+
+ if kwargs.get("required") is not None:
+ req_val = _ast_literal_eval(kwargs["required"])
+ if req_val is True:
+ required = True
+
+ default_node = kwargs.get("default")
+ if default_node is not None:
+ default = _ast_literal_eval(default_node)
+ if default is None or (isinstance(default, str) and default == ""):
+ required = False
+ elif not required:
+ required = False
+
+ return {
+ "name": param_name,
+ "type": param_type,
+ "required": required,
+ "description_en": description,
+ }
+
+
+def _get_type_name(node: ast.AST) -> str:
+ """Get the type name string from a type-related AST node."""
+ if isinstance(node, ast.Name):
+ return node.id
+ if isinstance(node, ast.Attribute):
+ return node.attr
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
+ return node.func.id
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute):
+ return node.func.attr
+ return ""
+
+
+def _ast_literal_eval(node: ast.AST) -> Any:
+ """Safely evaluate a literal AST node (Name, Constant, Str, Num, etc.) to a Python value."""
+ if isinstance(node, (ast.Constant, ast.Num)):
+ return getattr(node, "value", None)
+ if isinstance(node, ast.Str): # Python < 3.8 compat
+ return node.s
+ if isinstance(node, ast.Name):
+ name = node.id
+ if name == "None":
+ return None
+ if name == "True":
+ return True
+ if name == "False":
+ return False
+ return name
+ if isinstance(node, (ast.List, ast.Tuple)):
+ elts = [_ast_literal_eval(e) for e in node.elts]
+ return list(elts) if isinstance(node, ast.List) else tuple(elts)
+ if isinstance(node, ast.Dict):
+ return {_ast_literal_eval(k): _ast_literal_eval(v) for k, v in node.keys}
+ if isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)):
+ val = _ast_literal_eval(node.operand)
+ if isinstance(val, (int, float)):
+ return -val if isinstance(node.op, ast.USub) else val
+ if isinstance(node, ast.BinOp):
+ left = _ast_literal_eval(node.left)
+ right = _ast_literal_eval(node.right)
+ if isinstance(left, str) and isinstance(right, str) and isinstance(node.op, ast.Add):
+ return left + right
+ return None
+
+
def _parse_yaml_fallback_pyyaml(text: str) -> Dict[str, Any]:
"""Parse YAML with PyYAML (comments are dropped)."""
try:
@@ -305,7 +546,7 @@ def _parse_skill_params_from_config_bytes(raw: bytes) -> Dict[str, Any]:
data = json.loads(text)
except json.JSONDecodeError:
try:
- return _parse_yaml_with_ruamel_merge_eol_comments(text)
+ return _parse_yaml_ruamel_plain(text)
except ImportError:
logger.warning("ruamel.yaml not installed; YAML comments will be dropped on parse")
return _parse_yaml_fallback_pyyaml(text)
@@ -325,6 +566,66 @@ def _parse_skill_params_from_config_bytes(raw: bytes) -> Dict[str, Any]:
return _params_dict_to_storable(data)
+def _parse_skill_schema_from_yaml_bytes(raw: bytes) -> List[Dict[str, Any]]:
+ """Parse config/schema.yaml bytes into List[SkillParam].
+
+ Expected YAML structure:
+ param_name:
+ type: string | number | boolean | array | object
+ required: true | false
+ description_en: "English description"
+ description_zh: "Chinese description"
+ depends_on: other_param_name
+
+ Returns a list of param dicts with name, type, required, description_en,
+ description_zh, depends_on — matching frontend SkillParam interface.
+ """
+ text = raw.decode("utf-8-sig").strip()
+ if not text:
+ logger.warning("[schema] Empty raw bytes for schema.yaml")
+ return []
+ data: Any = None
+ parse_method = "unknown"
+ try:
+ data = json.loads(text)
+ parse_method = "json"
+ except json.JSONDecodeError:
+ try:
+ data = _parse_yaml_with_ruamel_merge_eol_comments(text)
+ parse_method = "ruamel"
+ except ImportError:
+ data = _parse_yaml_fallback_pyyaml(text)
+ parse_method = "pyyaml"
+ except SkillException:
+ raise
+ except Exception:
+ try:
+ data = _parse_yaml_fallback_pyyaml(text)
+ parse_method = "pyyaml"
+ except Exception as exc:
+ logger.warning("[schema] All YAML parsers failed: %s", exc)
+ return []
+
+ if not isinstance(data, dict):
+ logger.warning("[schema] Parsed data is not a dict (type=%s, parse_method=%s)", type(data).__name__, parse_method)
+ return []
+
+ result: List[Dict[str, Any]] = []
+ for param_name, meta in data.items():
+ if not isinstance(meta, dict):
+ logger.debug("[schema] Skipping param '%s': meta is not a dict (%s)", param_name, type(meta).__name__)
+ continue
+ result.append({
+ "name": param_name,
+ "type": meta.get("type", "string"),
+ "required": bool(meta.get("required", False)),
+ "description_en": meta.get("description_en", meta.get("description", "")),
+ "description_zh": meta.get("description_zh", ""),
+ "depends_on": meta.get("depends_on"),
+ })
+ return result
+
+
def _read_params_from_zip_config_yaml(
zip_bytes: bytes,
preferred_skill_root: Optional[str] = None,
@@ -346,11 +647,127 @@ def _read_params_from_zip_config_yaml(
return params
+def _find_zip_member_schema_yaml(
+ file_list: List[str],
+ preferred_skill_root: Optional[str] = None,
+) -> Optional[str]:
+ """Return the ZIP entry path for .../config/schema.yaml (any depth; case-insensitive)."""
+ for entry in file_list:
+ norm = _normalize_zip_entry_path(entry)
+ # Match .../config/schema.yaml at any depth
+ parts = norm.split("/")
+ if len(parts) >= 2 and parts[-2] == "config" and parts[-1] == "schema.yaml":
+ logger.debug("[schema] Found schema.yaml via config/ prefix match: %s", entry)
+ return entry
+ # Fallback: if preferred_root is given, also check /config/schema.yaml
+ if preferred_skill_root and norm == f"{preferred_skill_root}/config/schema.yaml":
+ logger.debug("[schema] Found schema.yaml via preferred_root match: %s", entry)
+ return entry
+ logger.debug("[schema] No schema.yaml found in ZIP entries (preferred_root=%s, entry_count=%d)", preferred_skill_root, len(file_list))
+ return None
+
+
+def _read_schema_yaml_from_zip(
+ zip_bytes: bytes,
+ preferred_skill_root: Optional[str] = None,
+) -> Optional[List[Dict[str, Any]]]:
+ """If the archive contains config/schema.yaml, parse it into List[SkillParam]; else None."""
+ import zipfile
+
+ zip_stream = io.BytesIO(zip_bytes)
+ with zipfile.ZipFile(zip_stream, "r") as zf:
+ member = _find_zip_member_schema_yaml(
+ zf.namelist(),
+ preferred_skill_root=preferred_skill_root,
+ )
+ if not member:
+ return None
+ raw = zf.read(member)
+ parsed = _parse_skill_schema_from_yaml_bytes(raw)
+ if not parsed:
+ logger.debug("[schema] Parsed result is empty from ZIP member %s", member)
+ return parsed
+
+
+def _get_skill_inputs_from_zip(
+ zip_bytes: bytes,
+ preferred_skill_root: Optional[str] = None,
+) -> List[Dict[str, Any]]:
+ """Extract argparse parameters from scripts/*.py inside a ZIP archive.
+
+ Mirrors ``_get_skill_inputs_from_code`` but reads from ZIP bytes instead of filesystem.
+
+ Args:
+ zip_bytes: ZIP archive content.
+ preferred_skill_root: Preferred folder name inside ZIP containing scripts/.
+
+ Returns:
+ List of input parameter dicts with name, type, required, description, default.
+ """
+ zip_stream = io.BytesIO(zip_bytes)
+ inputs: List[Dict[str, Any]] = []
+ seen_names: set = set()
+
+ try:
+ with zipfile.ZipFile(zip_stream, "r") as zf:
+ file_list = zf.namelist()
+ scripts_root = preferred_skill_root or ""
+
+ for member in file_list:
+ normalized = member.replace("\\", "/").strip()
+ if not normalized.endswith(".py") or "/_" in normalized or normalized.endswith("/_"):
+ continue
+ if not normalized.startswith(scripts_root + "/scripts/"):
+ if scripts_root:
+ continue
+ parts = normalized.split("/")
+ if len(parts) < 2 or parts[-2] != "scripts":
+ continue
+
+ try:
+ source = zf.read(member).decode("utf-8")
+ except (OSError, UnicodeDecodeError):
+ continue
+
+ try:
+ tree = ast.parse(source, filename=member)
+ except SyntaxError:
+ continue
+
+ for node in ast.walk(tree):
+ if not isinstance(node, ast.Call):
+ continue
+ if not _is_add_argument_call(node):
+ continue
+ parsed = _extract_arg_from_add_argument(node)
+ if not parsed:
+ continue
+ param_name = parsed["name"]
+ if param_name in ("help", "h") or param_name in seen_names:
+ continue
+ seen_names.add(param_name)
+ inputs.append({
+ "name": param_name,
+ "type": parsed["type"],
+ "required": parsed["required"],
+ "description_en": parsed.get("description_en", ""),
+ })
+ except zipfile.BadZipFile:
+ return inputs
+
+ return inputs
+
+
def _local_skill_config_yaml_path(skill_name: str, local_skills_dir: str) -> str:
"""Absolute path to //config/config.yaml."""
return os.path.join(local_skills_dir, skill_name, "config", "config.yaml")
+def _local_skill_schema_yaml_path(skill_name: str, local_skills_dir: str) -> str:
+ """Absolute path to //config/schema.yaml."""
+ return os.path.join(local_skills_dir, skill_name, "config", "schema.yaml")
+
+
def _write_skill_params_to_local_config_yaml(
skill_name: str,
params: Dict[str, Any],
@@ -380,24 +797,28 @@ def _remove_local_skill_config_yaml(skill_name: str, local_skills_dir: str) -> N
logger.info("Removed %s (params cleared in DB)", path)
-def get_skill_manager() -> SkillManager:
- """Get or create the global SkillManager instance."""
- global _skill_manager
- if _skill_manager is None:
- _skill_manager = SkillManager(CONTAINER_SKILLS_PATH)
- return _skill_manager
+def get_skill_manager(tenant_id: Optional[str] = None) -> SkillManager:
+ """Create a SkillManager instance with optional tenant-based directory isolation.
+
+ Args:
+ tenant_id: Tenant ID for directory isolation. When provided, skills
+ are stored under CONTAINER_SKILLS_PATH / tenant_id /
+ """
+ return SkillManager(base_skills_dir=CONTAINER_SKILLS_PATH, tenant_id=tenant_id)
class SkillService:
"""Skill management service for backend operations."""
- def __init__(self, skill_manager: Optional[SkillManager] = None):
+ def __init__(self, skill_manager: Optional[SkillManager] = None, tenant_id: Optional[str] = None):
"""Initialize SkillService.
Args:
- skill_manager: Optional SkillManager instance, uses global if not provided
+ skill_manager: Optional SkillManager instance, uses tenant-aware global if not provided
+ tenant_id: Tenant ID for skill isolation. Required when no skill_manager is provided.
"""
- self.skill_manager = skill_manager or get_skill_manager()
+ self.tenant_id = tenant_id
+ self.skill_manager = skill_manager or get_skill_manager(tenant_id)
def _resolve_local_skills_dir_for_overlay(self) -> Optional[str]:
"""Directory where skill folders live: ``SKILLS_PATH``, else ``ROOT_DIR/skills`` if present."""
@@ -410,12 +831,15 @@ def _resolve_local_skills_dir_for_overlay(self) -> Optional[str]:
return candidate
return None
- def _overlay_params_from_local_config_yaml(self, skill: Dict[str, Any]) -> Dict[str, Any]:
- """Prefer ``//config/config.yaml`` for ``params`` in API responses.
+ def _enrich_configs_from_yaml(self, skill: Dict[str, Any]) -> Dict[str, Any]:
+ """Read local config files and overlay onto skill.
+
+ config/config.yaml → config_values (runtime defaults dict)
+ config/schema.yaml → config_schemas (parameter metadata list)
- The database stores comment-free JSON (no legacy ``_comment`` keys, no `` # `` suffixes).
- On-disk YAML may use ``#`` lines; when the file exists, parse with ruamel (inline tips
- on scalars only) and use for ``params``; otherwise use DB.
+ If a file does not exist, the corresponding DB key is removed so the
+ response never contains stale data (e.g. {"configs": null} instead of
+ the old DB value).
"""
out = dict(skill)
local_dir = self._resolve_local_skills_dir_for_overlay()
@@ -424,70 +848,89 @@ def _overlay_params_from_local_config_yaml(self, skill: Dict[str, Any]) -> Dict[
name = out.get("name")
if not name:
return out
- path = _local_skill_config_yaml_path(name, local_dir)
- if not os.path.isfile(path):
- return out
- try:
- with open(path, "rb") as f:
- raw = f.read()
- out["params"] = _parse_skill_params_from_config_bytes(raw)
- logger.info("Using local config.yaml params (scalar inline comment tooltips) for skill %s", name)
- except Exception as exc:
- logger.warning(
- "Could not use local config.yaml for skill %s params (using DB): %s",
- name,
- exc,
- )
+ config_path = _local_skill_config_yaml_path(name, local_dir)
+ if os.path.isfile(config_path):
+ try:
+ with open(config_path, "rb") as f:
+ raw = f.read()
+ out["config_values"] = _parse_skill_params_from_config_bytes(raw)
+ except Exception as exc:
+ logger.warning("Could not parse local config.yaml for skill %s: %s", name, exc)
+ else:
+ out.pop("config_values", None)
+ # schema.yaml takes precedence over DB config_schemas
+ schema_path = _local_skill_schema_yaml_path(name, local_dir)
+ if os.path.isfile(schema_path):
+ try:
+ with open(schema_path, "rb") as f:
+ raw = f.read()
+ parsed = _parse_skill_schema_from_yaml_bytes(raw)
+ out["config_schemas"] = parsed
+ except Exception as exc:
+ logger.warning("Could not parse local schema.yaml for skill %s: %s", name, exc)
+ else:
+ out.pop("config_schemas", None)
return out
def list_skills(self, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]:
- """List all skills for tenant.
+ """List all skills for a tenant.
Args:
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ tenant_id: Tenant ID for filtering skills. Uses instance tenant_id if not provided.
Returns:
List of skill info dicts
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
try:
- skills = skill_db.list_skills()
- return [self._overlay_params_from_local_config_yaml(s) for s in skills]
+ skills = skill_db.list_skills(effective_tenant_id)
+ enriched = [self._enrich_configs_from_yaml(s) for s in skills]
+ return enriched
except Exception as e:
logger.error(f"Error listing skills: {e}")
raise SkillException(f"Failed to list skills: {str(e)}") from e
def get_skill(self, skill_name: str, tenant_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
- """Get a specific skill.
+ """Get a specific skill within a tenant.
Args:
skill_name: Name of the skill
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided.
Returns:
Skill dict or None if not found
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
try:
- skill = skill_db.get_skill_by_name(skill_name)
+ skill = skill_db.get_skill_by_name(skill_name, effective_tenant_id)
if skill:
- return self._overlay_params_from_local_config_yaml(skill)
+ return self._enrich_configs_from_yaml(skill)
return None
except Exception as e:
logger.error(f"Error getting skill {skill_name}: {e}")
raise SkillException(f"Failed to get skill: {str(e)}") from e
- def get_skill_by_id(self, skill_id: int) -> Optional[Dict[str, Any]]:
- """Get a specific skill by ID.
+ def get_skill_by_id(self, skill_id: int, tenant_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
+ """Get a specific skill by ID within a tenant.
Args:
skill_id: ID of the skill
+ tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided.
Returns:
Skill dict or None if not found
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
try:
- skill = skill_db.get_skill_by_id(skill_id)
+ skill = skill_db.get_skill_by_id(skill_id, effective_tenant_id)
if skill:
- return self._overlay_params_from_local_config_yaml(skill)
+ return self._enrich_configs_from_yaml(skill)
return None
except Exception as e:
logger.error(f"Error getting skill by ID {skill_id}: {e}")
@@ -499,11 +942,11 @@ def create_skill(
tenant_id: Optional[str] = None,
user_id: Optional[str] = None
) -> Dict[str, Any]:
- """Create a new skill.
+ """Create a new skill for a tenant.
Args:
skill_data: Skill data including name, description, content, etc.
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided.
user_id: User ID of the creator
Returns:
@@ -512,12 +955,16 @@ def create_skill(
Raises:
SkillException: If skill already exists locally or in database (409)
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
+
skill_name = skill_data.get("name")
if not skill_name:
raise SkillException("Skill name is required")
# Check if skill already exists in database
- existing = skill_db.get_skill_by_name(skill_name)
+ existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id)
if existing:
raise SkillException(f"Skill '{skill_name}' already exists")
@@ -533,17 +980,17 @@ def create_skill(
try:
# Create database record first
- result = skill_db.create_skill(skill_data)
+ result = skill_db.create_skill(skill_data, effective_tenant_id)
# Create local skill file (SKILL.md)
self.skill_manager.save_skill(skill_data)
- # Mirror DB params to config/config.yaml when present (same layout as ZIP uploads).
- if self.skill_manager.local_skills_dir and skill_data.get("params") is not None:
+ # Mirror DB config_schemas to config/config.yaml when present (same layout as ZIP uploads).
+ if self.skill_manager.base_skills_dir and skill_data.get("config_schemas") is not None:
try:
_write_skill_params_to_local_config_yaml(
skill_name,
- _params_dict_to_storable(skill_data["params"]),
+ _params_dict_to_storable(skill_data["config_schemas"]),
self.skill_manager.local_skills_dir,
)
except Exception as exc:
@@ -554,7 +1001,7 @@ def create_skill(
)
logger.info(f"Created skill '{skill_name}' with local files")
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
except SkillException:
raise
except Exception as e:
@@ -566,6 +1013,7 @@ def create_skill_from_file(
file_content: Union[bytes, str, io.BytesIO],
skill_name: Optional[str] = None,
file_type: str = "auto",
+ source: str = "自定义",
tenant_id: Optional[str] = None,
user_id: Optional[str] = None
) -> Dict[str, Any]:
@@ -579,12 +1027,14 @@ def create_skill_from_file(
file_content: File content as bytes, string, or BytesIO
skill_name: Optional skill name (extracted from ZIP if not provided)
file_type: File type hint - "md", "zip", or "auto" (detect)
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ source: Source identifier for the skill (e.g., "自定义", "官方", "导入")
+ tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided.
user_id: User ID of the creator
Returns:
Created skill dict
"""
+ effective_tenant_id = tenant_id or self.tenant_id
content_bytes: bytes
if isinstance(file_content, str):
content_bytes = file_content.encode("utf-8")
@@ -600,14 +1050,15 @@ def create_skill_from_file(
file_type = "md"
if file_type == "zip":
- return self._create_skill_from_zip(content_bytes, skill_name, user_id, tenant_id)
+ return self._create_skill_from_zip(content_bytes, skill_name, source, user_id, effective_tenant_id)
else:
- return self._create_skill_from_md(content_bytes, skill_name, user_id, tenant_id)
+ return self._create_skill_from_md(content_bytes, skill_name, source, user_id, effective_tenant_id)
def _create_skill_from_md(
self,
content_bytes: bytes,
skill_name: Optional[str] = None,
+ source: str = "自定义",
user_id: Optional[str] = None,
tenant_id: Optional[str] = None
) -> Dict[str, Any]:
@@ -624,7 +1075,7 @@ def _create_skill_from_md(
raise SkillException("Skill name is required")
# Check if skill already exists in database
- existing = skill_db.get_skill_by_name(name)
+ existing = skill_db.get_skill_by_name(name, tenant_id)
if existing:
raise SkillException(f"Skill '{name}' already exists")
@@ -639,27 +1090,30 @@ def _create_skill_from_md(
"description": skill_data.get("description", ""),
"content": skill_data.get("content", ""),
"tags": skill_data.get("tags", []),
- "source": "custom",
+ "source": source,
"tool_ids": tool_ids,
"allowed-tools": allowed_tools, # Preserve for local file sync
}
+ # Note: scripts/ reflection is only possible for ZIP uploads (scripts exist in ZIP bytes).
+ # For MD-only uploads there are no scripts to reflect at create time.
# Set created_by and updated_by if user_id is provided
if user_id:
skill_dict["created_by"] = user_id
skill_dict["updated_by"] = user_id
- result = skill_db.create_skill(skill_dict)
+ result = skill_db.create_skill(skill_dict, tenant_id)
# Write SKILL.md to local storage
self.skill_manager.save_skill(skill_dict)
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
def _create_skill_from_zip(
self,
zip_bytes: bytes,
skill_name: Optional[str] = None,
+ source: str = "自定义",
user_id: Optional[str] = None,
tenant_id: Optional[str] = None
) -> Dict[str, Any]:
@@ -716,7 +1170,7 @@ def _create_skill_from_zip(
raise SkillException("Skill name is required")
# Check if skill already exists in database
- existing = skill_db.get_skill_by_name(name)
+ existing = skill_db.get_skill_by_name(name, tenant_id)
if existing:
raise SkillException(f"Skill '{name}' already exists")
@@ -746,32 +1200,46 @@ def _create_skill_from_zip(
"description": skill_data.get("description", ""),
"content": skill_data.get("content", ""),
"tags": skill_data.get("tags", []),
- "source": "custom",
+ "source": source,
"tool_ids": tool_ids,
"allowed-tools": allowed_tools, # Preserve for local file sync
}
preferred_root = detected_skill_name or name
+
+ # Priority: schema.yaml (list metadata) > scripts AST (list) > config.yaml (dict defaults)
+ schema_from_zip = _read_schema_yaml_from_zip(zip_bytes, preferred_root)
+ inputs_from_scripts = _get_skill_inputs_from_zip(
+ zip_bytes,
+ preferred_skill_root=preferred_root,
+ )
params_from_zip = _read_params_from_zip_config_yaml(
zip_bytes,
preferred_skill_root=preferred_root,
)
+
+ if schema_from_zip:
+ skill_dict["config_schemas"] = schema_from_zip
+ elif inputs_from_scripts:
+ skill_dict["config_schemas"] = inputs_from_scripts
+
+ # config.yaml always goes into config_values (runtime defaults dict)
if params_from_zip is not None:
- skill_dict["params"] = params_from_zip
+ skill_dict["config_values"] = params_from_zip
# Set created_by and updated_by if user_id is provided
if user_id:
skill_dict["created_by"] = user_id
skill_dict["updated_by"] = user_id
- result = skill_db.create_skill(skill_dict)
+ result = skill_db.create_skill(skill_dict, tenant_id)
# Save SKILL.md to local storage
self.skill_manager.save_skill(skill_dict)
self._upload_zip_files(zip_bytes, name, detected_skill_name)
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
def _delete_local_skill_files(self, skill_name: str) -> None:
"""Delete all files within a skill's local directory, preserving the directory itself.
@@ -783,14 +1251,14 @@ def _delete_local_skill_files(self, skill_name: str) -> None:
local_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name)
logger.info("Starting deletion of local files for skill '%s' from '%s'", skill_name, local_dir)
-
+
if not os.path.isdir(local_dir):
logger.info("Local skill directory does not exist, nothing to delete: %s", local_dir)
return
try:
items = os.listdir(local_dir)
logger.info("Found %d items to delete in '%s'", len(items), local_dir)
-
+
for item in items:
item_path = os.path.join(local_dir, item)
if item_path.endswith("/"):
@@ -822,20 +1290,34 @@ def _upload_zip_files(
zip_stream = io.BytesIO(zip_bytes)
- # Determine if folder renaming is needed
+ try:
+ with zipfile.ZipFile(zip_stream, "r") as zf:
+ file_list = zf.namelist()
+ except zipfile.BadZipFile:
+ raise SkillException("Invalid ZIP archive")
+
+ # Determine if this ZIP has a subdirectory structure or root-level structure.
+ # Root-level: SKILL.md is at root (e.g., "SKILL.md", "script/analyze.py") -> no stripping
+ # Subdirectory: SKILL.md is inside a folder (e.g., "my-skill/SKILL.md") -> strip folder prefix
needs_rename = (
original_folder_name is not None
and original_folder_name != skill_name
)
+ has_root_skill_md = any(
+ not fp.endswith("/")
+ and fp.replace("\\", "/").split("/")[0].lower() == "skill.md"
+ for fp in file_list
+ )
+
logger.info(
- "Starting ZIP extraction for skill '%s': needs_rename=%s, original_folder='%s'",
- skill_name, needs_rename, original_folder_name
+ "Starting ZIP extraction for skill '%s': needs_rename=%s, original_folder='%s', has_root_skill_md=%s",
+ skill_name, needs_rename, original_folder_name, has_root_skill_md
)
+ zip_stream.seek(0)
try:
with zipfile.ZipFile(zip_stream, "r") as zf:
- file_list = zf.namelist()
logger.info("ZIP contains %d entries for skill '%s'", len(file_list), skill_name)
extracted_count = 0
@@ -847,10 +1329,12 @@ def _upload_zip_files(
parts = normalized_path.split("/")
# Calculate target relative path
+ # Only strip the first component when the ZIP has a subdirectory structure
+ # (SKILL.md is inside a folder, not at root level)
if needs_rename and len(parts) >= 2 and parts[0] == original_folder_name:
- # Replace original folder name with skill_name
relative_path = parts[0].replace(original_folder_name, skill_name) + "/" + "/".join(parts[1:])
- elif len(parts) >= 2:
+ elif len(parts) >= 2 and not has_root_skill_md:
+ # Strip first component (ZIP has subdirectory structure without root SKILL.md)
relative_path = "/".join(parts[1:])
else:
relative_path = normalized_path
@@ -861,7 +1345,8 @@ def _upload_zip_files(
file_data = zf.read(file_path)
local_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name)
- local_path = os.path.join(local_dir, relative_path)
+ normalized_relative = relative_path.replace("/", os.sep).replace("\\", os.sep)
+ local_path = os.path.normpath(os.path.join(local_dir, normalized_relative))
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, "wb") as f:
f.write(file_data)
@@ -896,7 +1381,10 @@ def update_skill_from_file(
Returns:
Updated skill dict
"""
- existing = skill_db.get_skill_by_name(skill_name)
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
+ existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id)
if not existing:
raise SkillException(f"Skill not found: {skill_name}")
@@ -915,9 +1403,9 @@ def update_skill_from_file(
file_type = "md"
if file_type == "zip":
- return self._update_skill_from_zip(content_bytes, skill_name, user_id, tenant_id)
+ return self._update_skill_from_zip(content_bytes, skill_name, user_id, effective_tenant_id)
else:
- return self._update_skill_from_md(content_bytes, skill_name, user_id, tenant_id)
+ return self._update_skill_from_md(content_bytes, skill_name, user_id, effective_tenant_id)
def _update_skill_from_md(
self,
@@ -948,7 +1436,7 @@ def _update_skill_from_md(
}
result = skill_db.update_skill(
- skill_name, skill_dict, updated_by=user_id or None
+ skill_name, skill_dict, tenant_id, updated_by=user_id or None
)
# Clean up existing local files before writing new ones
@@ -959,7 +1447,7 @@ def _update_skill_from_md(
skill_dict["allowed-tools"] = allowed_tools
self.skill_manager.save_skill(skill_dict)
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
def _update_skill_from_zip(
self,
@@ -969,7 +1457,7 @@ def _update_skill_from_zip(
tenant_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Update skill from ZIP archive."""
- existing = skill_db.get_skill_by_name(skill_name)
+ existing = skill_db.get_skill_by_name(skill_name, tenant_id)
if not existing:
raise SkillException(f"Skill not found: {skill_name}")
@@ -1025,10 +1513,10 @@ def _update_skill_from_zip(
logger.warning(f"Could not parse SKILL.md from ZIP: {e}")
if params_from_zip is not None:
- skill_dict["params"] = params_from_zip
+ skill_dict["config_values"] = params_from_zip
result = skill_db.update_skill(
- skill_name, skill_dict, updated_by=user_id or None
+ skill_name, skill_dict, tenant_id, updated_by=user_id or None
)
# Clean up existing local files before writing new ones
@@ -1042,7 +1530,7 @@ def _update_skill_from_zip(
# Update other files in local storage
self._upload_zip_files(zip_bytes, skill_name, original_folder_name)
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
def update_skill(
self,
@@ -1051,61 +1539,66 @@ def update_skill(
tenant_id: Optional[str] = None,
user_id: Optional[str] = None
) -> Dict[str, Any]:
- """Update an existing skill.
+ """Update an existing skill for a tenant.
Args:
skill_name: Name of the skill to update
skill_data: Business fields from the application layer (no audit fields).
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided.
user_id: Updater id from server-side auth (JWT / session); sets DB updated_by.
Returns:
Updated skill dict
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
try:
- existing = skill_db.get_skill_by_name(skill_name)
+ existing = skill_db.get_skill_by_name(skill_name, effective_tenant_id)
if not existing:
raise SkillException(f"Skill not found: {skill_name}")
result = skill_db.update_skill(
- skill_name, skill_data, updated_by=user_id or None
+ skill_name, skill_data, effective_tenant_id, updated_by=user_id or None
)
- # Keep config/config.yaml in sync when params are updated (matches ZIP import path).
- if CONTAINER_SKILLS_PATH and "params" in skill_data:
+ # Keep config/config.yaml in sync when config_values are updated (matches ZIP import path).
+ local_dir = self.skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH
+ if local_dir and "config_values" in skill_data:
try:
- raw_params = skill_data["params"]
- if raw_params is None:
- _remove_local_skill_config_yaml(skill_name, CONTAINER_SKILLS_PATH)
+ raw_config_values = skill_data["config_values"]
+ if raw_config_values is None:
+ _remove_local_skill_config_yaml(skill_name, local_dir)
else:
_write_skill_params_to_local_config_yaml(
skill_name,
- _params_dict_to_storable(raw_params),
- CONTAINER_SKILLS_PATH,
+ _params_dict_to_storable(raw_config_values),
+ local_dir,
)
except Exception as exc:
logger.warning(
- "Local config/config.yaml sync failed after params update for %s: %s",
+ "Local config/config.yaml sync failed after config_values update for %s: %s",
skill_name,
exc,
)
# Optional: sync SKILL.md on disk when SKILLS_PATH is configured (DB is source of truth).
- if not CONTAINER_SKILLS_PATH:
+ if not local_dir:
logger.warning(
"SKILLS_PATH is not set; skipped local SKILL.md sync after DB update for %s",
skill_name,
)
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
try:
- allowed_tools = skill_db.get_tool_names_by_skill_name(skill_name)
+ allowed_tools = skill_db.get_tool_names_by_skill_name(skill_name, effective_tenant_id)
local_skill_dict = {
"name": skill_name,
"description": skill_data.get("description", existing.get("description", "")),
"content": skill_data.get("content", existing.get("content", "")),
"tags": skill_data.get("tags", existing.get("tags", [])),
"allowed-tools": allowed_tools,
+ "files": skill_data.get("files", []),
}
self.skill_manager.save_skill(local_skill_dict)
except Exception as exc:
@@ -1115,7 +1608,7 @@ def update_skill(
exc,
)
- return self._overlay_params_from_local_config_yaml(result)
+ return self._enrich_configs_from_yaml(result)
except SkillException:
raise
except Exception as e:
@@ -1125,18 +1618,22 @@ def update_skill(
def delete_skill(
self,
skill_name: str,
+ tenant_id: Optional[str] = None,
user_id: Optional[str] = None
) -> bool:
- """Delete a skill.
+ """Delete a skill for a tenant.
Args:
skill_name: Name of the skill to delete
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ tenant_id: Tenant ID for skill isolation. Uses instance tenant_id if not provided.
user_id: User ID of the user performing the delete
Returns:
True if deleted successfully
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ raise SkillException("tenant_id is required")
try:
# Delete local skill files from filesystem
skill_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name)
@@ -1146,7 +1643,7 @@ def delete_skill(
logger.info(f"Deleted skill directory: {skill_dir}")
# Delete from database (soft delete with updated_by)
- return skill_db.delete_skill(skill_name, updated_by=user_id)
+ return skill_db.delete_skill(skill_name, effective_tenant_id, updated_by=user_id)
except Exception as e:
logger.error(f"Error deleting skill {skill_name}: {e}")
raise SkillException(f"Failed to delete skill: {str(e)}") from e
@@ -1178,7 +1675,7 @@ def get_enabled_skills_for_agent(
result = []
for skill_instance in enabled_skills:
skill_id = skill_instance.get("skill_id")
- skill = skill_db.get_skill_by_id(skill_id)
+ skill = skill_db.get_skill_by_id(skill_id, tenant_id)
if skill:
# Get skill info from ag_skill_info_t (repository returns keys: name, description, content)
merged = {
@@ -1258,7 +1755,7 @@ def build_skills_summary(
for skill_instance in agent_skills:
skill_id = skill_instance.get("skill_id")
- skill = skill_db.get_skill_by_id(skill_id)
+ skill = skill_db.get_skill_by_id(skill_id, tenant_id)
if skill:
if available_skills is not None and skill.get("name") not in available_skills:
continue
@@ -1268,8 +1765,12 @@ def build_skills_summary(
"description": skill.get("description", ""),
})
else:
- # Fallback: use all skills
- all_skills = skill_db.list_skills()
+ # Fallback: use all skills from the current tenant
+ effective_tenant_id = tenant_id or self.tenant_id
+ if effective_tenant_id:
+ all_skills = skill_db.list_skills(effective_tenant_id)
+ else:
+ all_skills = []
skills_to_include = all_skills
if available_skills is not None:
available_set = set(available_skills)
@@ -1305,13 +1806,16 @@ def get_skill_content(self, skill_name: str, tenant_id: Optional[str] = None) ->
Args:
skill_name: Name of the skill to load
- tenant_id: Tenant ID (reserved for future multi-tenant support)
+ tenant_id: Tenant ID for filtering. Uses instance tenant_id if not provided.
Returns:
Skill content in markdown format
"""
+ effective_tenant_id = tenant_id or self.tenant_id
+ if not effective_tenant_id:
+ return ""
try:
- skill = skill_db.get_skill_by_name(skill_name)
+ skill = skill_db.get_skill_by_name(skill_name, effective_tenant_id)
return skill.get("content", "") if skill else ""
except Exception as e:
logger.error(f"Error getting skill content {skill_name}: {e}")
@@ -1355,7 +1859,8 @@ def get_skill_file_content(
"""
try:
local_dir = os.path.join(self.skill_manager.local_skills_dir, skill_name)
- full_path = os.path.join(local_dir, file_path)
+ normalized_file_path = file_path.replace("/", os.sep).replace("\\", os.sep)
+ full_path = os.path.normpath(os.path.join(local_dir, normalized_file_path))
if not os.path.exists(full_path):
logger.warning(f"File not found: {full_path}")
@@ -1443,3 +1948,811 @@ def get_skill_instance(
tenant_id=tenant_id,
version_no=version_no
)
+
+ def create_skill_from_zip_bytes(
+ self,
+ zip_bytes: bytes,
+ skill_name: Optional[str] = None,
+ source: str = "导入",
+ user_id: Optional[str] = None,
+ tenant_id: Optional[str] = None,
+ skip_duplicate_check: bool = False
+ ) -> Dict[str, Any]:
+ """Create a skill from ZIP bytes, optionally skipping the duplicate name check.
+
+ This is the shared implementation used by both the upload endpoint and the
+ agent import flow. When skip_duplicate_check is True, the existence check
+ is bypassed (used during agent import where we pre-validate duplicates).
+
+ Args:
+ zip_bytes: Raw ZIP file bytes
+ skill_name: Optional skill name override
+ source: Source label for the skill
+ user_id: Creator user ID
+ tenant_id: Tenant ID
+ skip_duplicate_check: If True, skip the "skill already exists" check
+
+ Returns:
+ Created skill dict
+ """
+ import zipfile
+
+ zip_stream = io.BytesIO(zip_bytes)
+
+ try:
+ with zipfile.ZipFile(zip_stream, "r") as zf:
+ file_list = zf.namelist()
+ except zipfile.BadZipFile:
+ raise SkillException("Invalid ZIP archive")
+
+ zip_stream.seek(0)
+
+ skill_md_path: Optional[str] = None
+ detected_skill_name: Optional[str] = None
+
+ for file_path in file_list:
+ if file_path.endswith("/"):
+ continue
+ normalized_path = file_path.replace("\\", "/")
+ parts = normalized_path.split("/")
+ if len(parts) == 1 and parts[0].lower() == "skill.md":
+ skill_md_path = file_path
+ break
+
+ if not skill_md_path:
+ for file_path in file_list:
+ if file_path.endswith("/"):
+ continue
+ normalized_path = file_path.replace("\\", "/")
+ parts = normalized_path.split("/")
+ if len(parts) >= 2 and parts[-1].lower() == "skill.md":
+ skill_md_path = file_path
+ detected_skill_name = parts[0]
+ break
+
+ if not skill_md_path:
+ raise SkillException("SKILL.md not found in ZIP archive")
+
+ name = skill_name or detected_skill_name
+ if not name:
+ raise SkillException("Skill name is required")
+
+ if not skip_duplicate_check:
+ existing = skill_db.get_skill_by_name(name, tenant_id)
+ if existing:
+ raise SkillException(f"Skill '{name}' already exists")
+
+ with zipfile.ZipFile(zip_stream, "r") as zf:
+ skill_content = zf.read(skill_md_path).decode("utf-8")
+
+ try:
+ skill_data = SkillLoader.parse(skill_content)
+ except ValueError as e:
+ raise SkillException(f"Invalid SKILL.md in ZIP: {e}")
+
+ if not name:
+ name = skill_data.get("name")
+
+ if not name:
+ raise SkillException("Skill name is required")
+
+ allowed_tools = skill_data.get("allowed_tools", [])
+ tool_ids = []
+ if allowed_tools:
+ tool_ids = skill_db.get_tool_ids_by_names(allowed_tools, tenant_id)
+
+ skill_dict = {
+ "name": name,
+ "description": skill_data.get("description", ""),
+ "content": skill_data.get("content", ""),
+ "tags": skill_data.get("tags", []),
+ "source": source,
+ "tool_ids": tool_ids,
+ "allowed-tools": allowed_tools,
+ }
+
+ preferred_root = detected_skill_name or name
+
+ schema_from_zip = _read_schema_yaml_from_zip(zip_bytes, preferred_root)
+ inputs_from_scripts = _get_skill_inputs_from_zip(
+ zip_bytes,
+ preferred_skill_root=preferred_root,
+ )
+ params_from_zip = _read_params_from_zip_config_yaml(
+ zip_bytes,
+ preferred_skill_root=preferred_root,
+ )
+
+ if schema_from_zip:
+ skill_dict["config_schemas"] = schema_from_zip
+ elif inputs_from_scripts:
+ skill_dict["config_schemas"] = inputs_from_scripts
+
+ if params_from_zip is not None:
+ skill_dict["config_values"] = params_from_zip
+
+ if user_id:
+ skill_dict["created_by"] = user_id
+ skill_dict["updated_by"] = user_id
+
+ result = skill_db.create_skill(skill_dict, tenant_id)
+
+ self.skill_manager.save_skill(skill_dict)
+
+ self._upload_zip_files(zip_bytes, name, detected_skill_name)
+
+ return self._enrich_configs_from_yaml(result)
+
+ def export_skills_by_names(
+ self,
+ skill_names: List[str],
+ tenant_id: Optional[str] = None
+ ) -> List[Dict[str, str]]:
+ """Export skills as ZIP files by name.
+
+ Packages the entire skill directory (SKILL.md, scripts/, assets/, config/)
+ into a ZIP for each skill name.
+
+ Args:
+ skill_names: List of skill names to export
+ tenant_id: Tenant ID for skill lookup
+
+ Returns:
+ List of dicts with skill_name and skill_zip_base64
+ """
+ import base64
+
+ effective_tenant_id = tenant_id or self.tenant_id
+ results: List[Dict[str, str]] = []
+
+ for skill_name in skill_names:
+ skill_dir = os.path.join(
+ self.skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH,
+ skill_name
+ )
+ if not os.path.isdir(skill_dir):
+ logger.warning(f"Skill directory not found for export: {skill_name}")
+ continue
+
+ zip_buffer = io.BytesIO()
+ with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
+ for root, dirs, files in os.walk(skill_dir):
+ for file in files:
+ file_path = os.path.join(root, file)
+ rel_path = os.path.relpath(file_path, skill_dir)
+ arcname = os.path.join(skill_name, rel_path)
+ zf.write(file_path, arcname)
+
+ zip_buffer.seek(0)
+ zip_base64 = base64.b64encode(zip_buffer.read()).decode("utf-8")
+ results.append({
+ "skill_name": skill_name,
+ "skill_zip_base64": zip_base64
+ })
+
+ return results
+
+
+def classify_streaming_content(
+ content: str,
+ classifier: Any
+) -> List[Dict[str, Any]]:
+ """Classify streaming content using the ContentClassifier.
+
+ Args:
+ content: Raw streaming content to classify
+ classifier: ContentClassifier instance
+
+ Returns:
+ List of classified event dictionaries
+ """
+ return classifier.classify(content)
+
+
+class SkillCreationStreamService:
+ """Service for handling skill creation streaming operations."""
+
+ def __init__(self, skill_service: Optional["SkillService"] = None):
+ """Initialize the stream service.
+
+ Args:
+ skill_service: Optional SkillService instance for accessing skill manager
+ """
+ self.skill_service = skill_service or SkillService()
+
+ def get_skill_manager_local_dir(self) -> str:
+ """Get local_skills_dir from SkillManager.
+
+ Returns:
+ Local skills directory path
+ """
+ return self.skill_service.skill_manager.local_skills_dir or ""
+
+ def create_classifier(self) -> "ContentClassifier":
+ """Create a new ContentClassifier instance.
+
+ Returns:
+ New ContentClassifier instance
+ """
+ from utils.content_classifier_utils import ContentClassifier
+ return ContentClassifier()
+
+ def classify_content(
+ self,
+ content: str,
+ classifier: "ContentClassifier"
+ ) -> List[Dict[str, Any]]:
+ """Classify streaming content using the provided classifier.
+
+ Args:
+ content: Raw streaming content to classify
+ classifier: ContentClassifier instance
+
+ Returns:
+ List of classified event dictionaries
+ """
+ return classifier.classify(content)
+
+
+def create_skill_creation_stream_generator(
+ observer: Any,
+ classifier: "ContentClassifier",
+) -> Any:
+ """Create a generator that processes observer messages and yields SSE events.
+
+ Args:
+ observer: MessageObserver instance with cached messages
+ classifier: ContentClassifier instance for content classification
+
+ Yields:
+ SSE-formatted event strings
+ """
+ import json
+ from consts.const import STREAMABLE_CONTENT_TYPES
+
+ cached = observer.get_cached_message()
+ for msg in cached:
+ if isinstance(msg, str):
+ try:
+ data = json.loads(msg)
+ msg_type = data.get("type", "")
+ content = data.get("content", "")
+
+ if msg_type == "step_count":
+ yield f"data: {json.dumps({'type': 'step_count', 'content': content}, ensure_ascii=False)}\n\n"
+ elif msg_type in STREAMABLE_CONTENT_TYPES:
+ for event in classifier.classify(content):
+ yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n"
+ except (json.JSONDecodeError, Exception):
+ pass
+
+
+def format_final_answer_sse(classifier: "ContentClassifier", final_result: str) -> List[str]:
+ """Format final answer content into SSE event strings.
+
+ Args:
+ classifier: ContentClassifier instance for content classification
+ final_result: Final answer content to format
+
+ Returns:
+ List of SSE-formatted event strings
+ """
+ import json
+
+ events = []
+ for event in classifier.classify(final_result):
+ events.append(f"data: {json.dumps(event, ensure_ascii=False)}\n\n")
+ return events
+
+
+# ========== Skill Creation Task Manager ==========
+
+
+class SkillCreationTaskManager:
+ """Singleton manager to track active skill creation threads and their stop events."""
+
+ _instance: Optional["SkillCreationTaskManager"] = None
+ _lock = threading.Lock()
+
+ def __new__(cls) -> "SkillCreationTaskManager":
+ if cls._instance is None:
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance._tasks: Dict[str, Tuple[threading.Thread, threading.Event]] = {}
+ cls._instance._tasks_lock = threading.Lock()
+ return cls._instance
+
+ def register_task(self, task_id: str, thread: threading.Thread, stop_event: threading.Event) -> None:
+ """Register a new skill creation task.
+
+ Args:
+ task_id: Unique identifier for the task
+ thread: The thread running the skill creation
+ stop_event: Event to signal stop request
+ """
+ with self._tasks_lock:
+ self._tasks[task_id] = (thread, stop_event)
+ logger.info(f"Registered skill creation task: {task_id}")
+
+ def unregister_task(self, task_id: str) -> None:
+ """Unregister a completed skill creation task.
+
+ Args:
+ task_id: Unique identifier for the task
+ """
+ with self._tasks_lock:
+ if task_id in self._tasks:
+ del self._tasks[task_id]
+ logger.info(f"Unregistered skill creation task: {task_id}")
+
+ def stop_task(self, task_id: str) -> bool:
+ """Signal a skill creation task to stop.
+
+ Args:
+ task_id: Unique identifier for the task
+
+ Returns:
+ True if the task was found and stop was signaled, False otherwise
+ """
+ with self._tasks_lock:
+ if task_id in self._tasks:
+ _, stop_event = self._tasks[task_id]
+ stop_event.set()
+ logger.info(f"Stop signal sent for skill creation task: {task_id}")
+ return True
+ return False
+
+ def is_task_running(self, task_id: str) -> bool:
+ """Check if a task is still running.
+
+ Args:
+ task_id: Unique identifier for the task
+
+ Returns:
+ True if the task exists and is still alive
+ """
+ with self._tasks_lock:
+ if task_id in self._tasks:
+ thread, _ = self._tasks[task_id]
+ return thread.is_alive()
+ return False
+
+
+# Singleton instance
+skill_creation_task_manager = SkillCreationTaskManager()
+
+
+# ========== Skill Creation Stream Service ==========
+
+
+def stream_skill_creation(
+ user_request: str,
+ language: str,
+ model_config: "ModelConfig",
+ existing_skill: Optional[Dict[str, Any]] = None,
+ complexity: str = "simple",
+) -> tuple[str, Any]:
+ """Stream skill creation process as an async generator.
+
+ This function handles all the business logic for skill creation:
+ - Loads prompt template
+ - Creates observer, stop_event, and classifier
+ - Registers the task with the task manager
+ - Starts the agent thread
+ - Yields SSE events until completion
+
+ Args:
+ user_request: User's skill description request
+ language: Language code (e.g., "zh", "en")
+ model_config: Model configuration
+ existing_skill: Optional existing skill for modification
+ complexity: Skill complexity level ("simple" or "complicated")
+
+ Returns:
+ Tuple of (task_id, generator_function)
+ The task_id should be passed to the caller for stop functionality
+ """
+ task_id = str(uuid.uuid4())
+
+ async def generate():
+ is_task_registered = False
+ observer = None
+ classifier = None
+
+ try:
+ # Load prompt template
+ template = get_skill_creation_simple_prompt_template(
+ language=language,
+ existing_skill=existing_skill,
+ complexity=complexity
+ )
+
+ # Create observer and classifier
+ observer = MessageObserver(lang=language)
+ stop_event = threading.Event()
+ classifier = ContentClassifier()
+
+ # Get local skills directory
+ local_skills_dir = SkillService().skill_manager.local_skills_dir or ""
+
+ def run_task():
+ create_skill_from_request(
+ system_prompt=template.get("system_prompt", ""),
+ user_prompt=user_request,
+ model_config_list=[model_config],
+ observer=observer,
+ stop_event=stop_event,
+ local_skills_dir=local_skills_dir
+ )
+
+ thread = threading.Thread(target=run_task)
+
+ # Register task before starting
+ skill_creation_task_manager.register_task(task_id, thread, stop_event)
+ is_task_registered = True
+
+ thread.start()
+
+ while thread.is_alive():
+ for event in create_skill_creation_stream_generator(observer, classifier):
+ yield event
+ await asyncio.sleep(0.1)
+
+ thread.join()
+
+ for event in create_skill_creation_stream_generator(observer, classifier):
+ yield event
+
+ yield f"data: {json.dumps({'type': 'done'}, ensure_ascii=False)}\n\n"
+
+ except Exception as e:
+ logger.error(f"Error in stream_skill_creation: {e}")
+ yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
+ finally:
+ if is_task_registered:
+ skill_creation_task_manager.unregister_task(task_id)
+
+ return task_id, generate
+
+
+# ============== Skill List Initialization ==============
+
+
+async def init_skill_list_for_tenant(tenant_id: str, user_id: str):
+ """Initialize skill list for a new tenant by scanning local skill directories.
+
+ Mirrors init_tool_list_for_tenant() in tool_configuration_service.py.
+
+ Args:
+ tenant_id: Tenant ID for the new tenant
+ user_id: User ID for tracking who initiated the scan
+
+ Returns:
+ Dictionary containing initialization result
+ """
+ from database import skill_db as skill_db_module
+
+ if skill_db_module.check_skill_list_initialized(tenant_id):
+ logger.info(f"Skill list already initialized for tenant {tenant_id}, skipping")
+ return {"status": "already_initialized", "message": "Skill list already exists"}
+
+ logger.info(f"Initializing skill list for new tenant: {tenant_id}")
+ await update_skill_list(tenant_id=tenant_id, user_id=user_id)
+ return {"status": "success", "message": "Skill list initialized successfully"}
+
+
+async def update_skill_list(tenant_id: str, user_id: str):
+ """Scan local skill directories and update ag_skill_info_t.
+
+ Mirrors update_tool_list() in tool_configuration_service.py.
+
+ Args:
+ tenant_id: Tenant ID for the tenant
+ user_id: User ID for tracking who initiated the scan
+ """
+ from database import skill_db as skill_db_module
+ from nexent.skills import SkillManager
+
+ skill_manager = SkillManager(base_skills_dir=CONTAINER_SKILLS_PATH, tenant_id=tenant_id)
+ # Use the resolved tenant-scoped local path for schema/config file reading
+ local_base = skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH
+ scanned_skills = skill_manager.list_skills()
+
+ skills_to_upsert = []
+ for skill_info in scanned_skills:
+ skill_name = skill_info.get("name")
+ if not skill_name:
+ continue
+
+ skill_data = {
+ "name": skill_name,
+ "description": skill_info.get("description", ""),
+ "tags": skill_info.get("tags", []),
+ "source": "official",
+ }
+
+ try:
+ full_skill = skill_manager.load_skill(skill_name)
+ if full_skill:
+ skill_data["content"] = full_skill.get("content", "")
+
+ # Try schema.yaml first; fall back to AST-parsed scripts
+ schema_path = _local_skill_schema_yaml_path(skill_name, local_base)
+ if os.path.isfile(schema_path):
+ async with aiofiles.open(schema_path, "rb") as f:
+ raw = await f.read()
+ parsed = _parse_skill_schema_from_yaml_bytes(raw)
+ skill_data["config_schemas"] = parsed
+ logger.debug("Loaded config_schemas from schema.yaml for skill %s", skill_name)
+ else:
+ scripts_dir = os.path.join(local_base, skill_name, "scripts")
+ inputs = _get_skill_inputs_from_code(scripts_dir)
+ if inputs:
+ skill_data["config_schemas"] = inputs
+ except Exception as e:
+ logger.warning(f"Could not load full skill content for {skill_name}: {e}")
+ skill_data["content"] = ""
+
+ skills_to_upsert.append(skill_data)
+
+ if skills_to_upsert:
+ skill_db_module.upsert_scanned_skills(skills_to_upsert, user_id, tenant_id)
+ logger.info(f"Upserted {len(skills_to_upsert)} skills for tenant {tenant_id}")
+ else:
+ logger.info(f"No skills found to upsert for tenant {tenant_id}")
+
+
+def install_skills_for_tenant(
+ skill_ids: List[int],
+ tenant_id: str,
+ user_id: Optional[str] = None
+) -> List[int]:
+ """Install specified official skills into a new tenant by copying their records.
+
+ For each skill_id provided, finds the global template skill (official skill with
+ NULL tenant_id) and creates a copy in ag_skill_info_t for the target tenant.
+ Skills that cannot be found as global templates are skipped with a warning.
+
+ Args:
+ skill_ids: List of skill IDs to install for the tenant.
+ tenant_id: Target tenant ID to install skills into.
+ user_id: User ID for created_by/updated_by audit fields.
+
+ Returns:
+ List of skill IDs that were successfully installed.
+ """
+ from database import skill_db as skill_db_module
+
+ if not skill_ids:
+ return []
+
+ installed_ids: List[int] = []
+ for skill_id in skill_ids:
+ try:
+ template = skill_db_module.get_skill_by_id_global(skill_id)
+ if not template:
+ logger.warning(
+ f"Skill template with ID {skill_id} not found for installation "
+ f"into tenant {tenant_id}"
+ )
+ continue
+
+ skill_name = template.get("name", "")
+ if not skill_name:
+ logger.warning(
+ f"Skill template {skill_id} has no name, skipping installation "
+ f"for tenant {tenant_id}"
+ )
+ continue
+
+ existing = skill_db_module.get_skill_by_name(skill_name, tenant_id)
+ if existing:
+ logger.info(
+ f"Skill '{skill_name}' already exists for tenant {tenant_id}, skipping"
+ )
+ installed_ids.append(existing.get("skill_id"))
+ continue
+
+ skill_data = {
+ "name": skill_name,
+ "description": template.get("description", ""),
+ "tags": template.get("tags", []),
+ "content": template.get("content", ""),
+ "config_schemas": template.get("config_schemas"),
+ "config_values": template.get("config_values"),
+ "source": template.get("source", "official"),
+ "created_by": user_id,
+ "updated_by": user_id,
+ }
+ result = skill_db_module.create_skill(skill_data, tenant_id)
+ new_skill_id = result.get("skill_id")
+ if new_skill_id:
+ installed_ids.append(new_skill_id)
+ logger.info(
+ f"Installed skill '{skill_name}' (ID {new_skill_id}) for tenant {tenant_id}"
+ )
+ else:
+ logger.warning(
+ f"create_skill returned no skill_id for '{skill_name}', "
+ f"tenant {tenant_id}"
+ )
+ except Exception as e:
+ logger.error(
+ f"Failed to install skill ID {skill_id} into tenant {tenant_id}: {e}"
+ )
+
+ return installed_ids
+
+
+def install_skills_from_zip_for_tenant(
+ skill_names: List[str],
+ tenant_id: str,
+ user_id: Optional[str] = None,
+ locale: Optional[str] = None
+) -> List[str]:
+ """Install official skills into a new tenant by reading ZIP files from OFFICIAL_SKILLS_ZIP_PATH.
+
+ For each skill_name provided, derives the ZIP filename as .zip,
+ reads the file from OFFICIAL_SKILLS_ZIP_PATH, and creates the skill via
+ create_skill_from_file (which handles ZIP extraction, SKILL.md parsing,
+ and database record creation).
+
+ Skills that cannot be found as ZIP files are skipped with a warning.
+ Skills that already exist for the tenant are skipped (not reinstalled).
+
+ Args:
+ skill_names: List of skill names to install (e.g. ["search-knowledge-base"]).
+ tenant_id: Target tenant ID to install skills into.
+ user_id: User ID for created_by/updated_by audit fields.
+ locale: Frontend locale (e.g. "zh" or "en"). Determines the source label:
+ "zh" → "官方", other locales → "official".
+
+ Returns:
+ List of skill names that were successfully installed.
+ """
+ if not skill_names:
+ return []
+
+ zip_dir = OFFICIAL_SKILLS_ZIP_PATH
+ if not os.path.isdir(zip_dir):
+ logger.warning(f"Official skills zip directory not found: {zip_dir}")
+ return []
+
+ # Derive source label from locale: zh → "官方", otherwise "official"
+ source = "官方" if locale == "zh" else "official"
+
+ installed: List[str] = []
+ service = SkillService(tenant_id=tenant_id)
+
+ for skill_name in skill_names:
+ zip_filename = f"{skill_name}.zip"
+ zip_path = os.path.join(zip_dir, zip_filename)
+
+ if not os.path.isfile(zip_path):
+ logger.warning(
+ f"ZIP file not found for skill '{skill_name}': {zip_path}"
+ )
+ continue
+
+ try:
+ existing = skill_db.get_skill_by_name(skill_name, tenant_id)
+ if existing:
+ logger.info(
+ f"Skill '{skill_name}' already exists for tenant {tenant_id}, skipping"
+ )
+ installed.append(skill_name)
+ continue
+
+ with open(zip_path, "rb") as f:
+ zip_content = f.read()
+
+ result = service.create_skill_from_file(
+ file_content=zip_content,
+ skill_name=skill_name,
+ file_type="zip",
+ source=source,
+ tenant_id=tenant_id,
+ user_id=user_id,
+ )
+ installed_name = result.get("name", skill_name)
+ installed.append(installed_name)
+ logger.info(
+ f"Installed skill '{installed_name}' for tenant {tenant_id} "
+ f"from ZIP {zip_filename}"
+ )
+ except Exception as e:
+ logger.error(
+ f"Failed to install skill '{skill_name}' from ZIP for tenant {tenant_id}: {e}"
+ )
+
+ return installed
+
+
+def get_official_skills_with_status(
+ tenant_id: Optional[str] = None
+) -> List[Dict[str, Any]]:
+ """Return all official skills with their installation status for a tenant.
+
+ Scans the official-skills-zip directory for available official skills
+ (filename without .zip = skill name). For each skill, checks whether
+ it is already installed for the target tenant and whether local resource
+ files exist.
+
+ Args:
+ tenant_id: Tenant ID to check installation status for.
+
+ Returns:
+ List of dicts with skill_id, name, description, source, and status
+ ("installable" | "installed" | "resource_missing").
+ """
+ from database import skill_db as skill_db_module
+
+ result: List[Dict[str, Any]] = []
+
+ zip_dir = OFFICIAL_SKILLS_ZIP_PATH
+ if not os.path.isdir(zip_dir):
+ logger.warning(f"Official skills zip directory not found: {zip_dir}")
+ return result
+
+ try:
+ zip_files = [f for f in os.listdir(zip_dir) if f.lower().endswith(".zip")]
+ except OSError as e:
+ logger.warning(f"Failed to list official skills zip directory: {e}")
+ return result
+
+ for zip_file in sorted(zip_files):
+ skill_name = zip_file[:-4]
+ if not skill_name:
+ continue
+
+ skill_id: Optional[int] = None
+ is_installed = False
+ has_resources = True
+
+ if tenant_id:
+ existing = skill_db_module.get_skill_by_name(skill_name, tenant_id)
+ if existing:
+ skill_id = existing.get("skill_id")
+ is_installed = True
+ skill_manager = SkillManager(
+ base_skills_dir=CONTAINER_SKILLS_PATH,
+ tenant_id=tenant_id
+ )
+ skill_dir = os.path.join(
+ skill_manager.local_skills_dir or CONTAINER_SKILLS_PATH or "",
+ skill_name
+ )
+ has_resources = os.path.isdir(skill_dir)
+
+ if skill_id is None:
+ global_skill = skill_db_module.get_skill_by_name(skill_name, None)
+ if global_skill:
+ skill_id = global_skill.get("skill_id")
+
+ if is_installed and not has_resources:
+ status = "resource_missing"
+ elif is_installed:
+ status = "installed"
+ else:
+ status = "installable"
+
+ description = ""
+ if skill_id:
+ db_skill = skill_db_module.get_skill_by_id(skill_id, tenant_id) if tenant_id else None
+ if db_skill:
+ description = db_skill.get("description", "")
+ if not description:
+ db_global = skill_db_module.get_skill_by_name(skill_name, None)
+ if db_global:
+ description = db_global.get("description", "")
+
+ result.append({
+ "skill_id": skill_id if skill_id is not None else 0,
+ "name": skill_name,
+ "description": description,
+ "source": "official",
+ "status": status,
+ })
+
+ return result
diff --git a/backend/services/tenant_service.py b/backend/services/tenant_service.py
index bb761d2b4..6ed96a849 100644
--- a/backend/services/tenant_service.py
+++ b/backend/services/tenant_service.py
@@ -3,9 +3,12 @@
"""
import asyncio
import logging
+import os
+import shutil
import uuid
from typing import Any, Dict, List, Optional
+from database import skill_db
from database.tenant_config_db import (
get_single_config_info,
insert_config,
@@ -23,8 +26,9 @@
from database.remote_mcp_db import get_mcp_records_by_tenant, delete_mcp_record_by_name_and_url
from database.invitation_db import query_invitations_by_tenant, remove_invitation
from database.tool_db import delete_tools_by_agent_id
-from consts.const import TENANT_NAME, TENANT_ID, DEFAULT_GROUP_ID
+from consts.const import ASSET_OWNER_TENANT_ID, TENANT_NAME, TENANT_ID, DEFAULT_GROUP_ID, CONTAINER_SKILLS_PATH
from consts.exceptions import NotFoundException, ValidationError, UserRegistrationException
+from services.skill_service import install_skills_from_zip_for_tenant
logger = logging.getLogger(__name__)
@@ -47,7 +51,8 @@ def get_tenant_info(tenant_id: str) -> Dict[str, Any]:
# Get tenant name
name_config = get_single_config_info(tenant_id, TENANT_NAME)
if not name_config:
- logger.warning(f"The name of tenant {tenant_id} not found, creating default config.")
+ logger.warning(
+ f"The name of tenant {tenant_id} not found, creating default config.")
# Auto-create TENANT_NAME config with default name
_ensure_tenant_name_config(tenant_id)
# Re-fetch after creation
@@ -92,7 +97,8 @@ def _ensure_tenant_name_config(tenant_id: str) -> bool:
if success:
logger.info(f"Auto-created TENANT_NAME config for tenant {tenant_id}")
else:
- logger.error(f"Failed to auto-create TENANT_NAME config for tenant {tenant_id}")
+ logger.error(
+ f"Failed to auto-create TENANT_NAME config for tenant {tenant_id}")
return success
@@ -133,8 +139,11 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]:
Returns:
Dict[str, Any]: Dictionary containing paginated tenant data and pagination info
"""
- # Get all tenant IDs first
- all_tenant_ids = get_all_tenant_ids()
+ # Exclude virtual ASSET_OWNER tenant from admin tenant listings
+ all_tenant_ids = [
+ tid for tid in get_all_tenant_ids()
+ if tid != ASSET_OWNER_TENANT_ID
+ ]
total = len(all_tenant_ids)
# Calculate pagination
@@ -151,7 +160,8 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]:
tenant_info = get_tenant_info(tenant_id)
tenants.append(tenant_info)
except NotFoundException:
- logging.warning(f"Tenant info of {tenant_id} not found. Returning basic tenant structure.")
+ logging.warning(
+ f"Tenant info of {tenant_id} not found. Returning basic tenant structure.")
tenant_info = {
"tenant_id": tenant_id,
"tenant_name": "",
@@ -168,7 +178,13 @@ def get_tenants_paginated(page: int = 1, page_size: int = 20) -> Dict[str, Any]:
}
-def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[str, Any]:
+def create_tenant(
+ tenant_name: str,
+ created_by: Optional[str] = None,
+ skill_ids: Optional[List[int]] = None,
+ skill_names: Optional[List[str]] = None,
+ locale: Optional[str] = None
+) -> Dict[str, Any]:
"""
Create a new tenant with default group
@@ -191,11 +207,13 @@ def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[st
# Check if tenant name already exists
if check_tenant_name_exists(tenant_name.strip()):
- raise ValidationError(f"Tenant with name '{tenant_name.strip()}' already exists")
+ raise ValidationError(
+ f"Tenant with name '{tenant_name.strip()}' already exists")
try:
# Create default group first
- default_group_id = _create_default_group_for_tenant(tenant_id, created_by)
+ default_group_id = _create_default_group_for_tenant(
+ tenant_id, created_by)
# Create tenant ID configuration
tenant_id_data = {
@@ -231,15 +249,48 @@ def create_tenant(tenant_name: str, created_by: Optional[str] = None) -> Dict[st
}
group_success = insert_config(group_config_data)
if not group_success:
- raise ValidationError("Failed to create tenant default group configuration")
+ raise ValidationError(
+ "Failed to create tenant default group configuration")
+
+ # Install requested skills for the new tenant
+ # Prefer skill_names (ZIP-based installation) over skill_ids (legacy record-copy)
+ installed_skill_names: List[str] = []
+ if skill_names:
+ try:
+ installed_skill_names = install_skills_from_zip_for_tenant(
+ skill_names=skill_names,
+ tenant_id=tenant_id,
+ user_id=created_by,
+ locale=locale
+ )
+ except Exception as e:
+ logger.warning(
+ f"Failed to install skills from ZIP for tenant {tenant_id}: {e}")
+ elif skill_ids:
+ try:
+ from services.skill_service import install_skills_for_tenant as install_by_ids
+ installed_by_ids = install_by_ids(
+ skill_ids=skill_ids,
+ tenant_id=tenant_id,
+ user_id=created_by
+ )
+ logger.info(
+ f"Legacy install_skills_for_tenant installed IDs: {installed_by_ids} "
+ f"for tenant {tenant_id}"
+ )
+ except Exception as e:
+ logger.warning(
+ f"Failed to install skills by IDs for tenant {tenant_id}: {e}")
tenant_info = {
"tenant_id": tenant_id,
"tenant_name": tenant_name.strip(),
- "default_group_id": str(default_group_id)
+ "default_group_id": str(default_group_id),
+ "installed_skill_names": installed_skill_names,
}
- logger.info(f"Created tenant {tenant_id} with name '{tenant_name}' and default group {default_group_id}")
+ logger.info(
+ f"Created tenant {tenant_id} with name '{tenant_name}' and default group {default_group_id}")
return tenant_info
except Exception as e:
@@ -270,13 +321,15 @@ def update_tenant_info(tenant_id: str, tenant_name: str, updated_by: Optional[st
# Check if tenant name already exists (exclude current tenant)
if check_tenant_name_exists(tenant_name.strip(), exclude_tenant_id=tenant_id):
- raise ValidationError(f"Tenant with name '{tenant_name.strip()}' already exists")
+ raise ValidationError(
+ f"Tenant with name '{tenant_name.strip()}' already exists")
# Check if tenant name config exists
name_config = get_single_config_info(tenant_id, TENANT_NAME)
if not name_config:
# Tenant config doesn't exist, create it with the provided name
- logger.info(f"TENANT_NAME config not found for {tenant_id}, creating new config.")
+ logger.info(
+ f"TENANT_NAME config not found for {tenant_id}, creating new config.")
tenant_name_data = {
"tenant_id": tenant_id,
"config_key": TENANT_NAME,
@@ -302,6 +355,57 @@ def update_tenant_info(tenant_id: str, tenant_name: str, updated_by: Optional[st
return updated_tenant
+async def _delete_skills_for_tenant(tenant_id: str, actor: str) -> None:
+ """
+ Delete all skills, skill instances, and local skill files for a tenant.
+
+ This performs cascade cleanup of:
+ - All skill instances (ag_skill_instance_t) for the tenant
+ - All skills (ag_skill_info_t) for the tenant
+ - All local skill directories and files under CONTAINER_SKILLS_PATH/{tenant_id}/
+
+ Args:
+ tenant_id: Tenant ID to delete skills for
+ actor: User ID performing the deletion (for audit trail)
+ """
+ logger.info(f"Deleting skills and local files for tenant {tenant_id}")
+
+ # 1. Soft-delete all skill instances for the tenant (regardless of skill source)
+ try:
+ deleted_count = skill_db.delete_skill_instances_by_tenant(
+ tenant_id, actor)
+ logger.info(
+ f"Soft-deleted {deleted_count} skill instances for tenant {tenant_id}")
+ except Exception as e:
+ logger.warning(
+ f"Failed to soft-delete skill instances for tenant {tenant_id}: {str(e)}")
+
+ # 2. Soft-delete all skills for the tenant
+ skills = skill_db.list_skills(tenant_id)
+ for skill in skills:
+ try:
+ skill_name = skill.get("name")
+ if skill_name:
+ skill_db.delete_skill(skill_name, tenant_id, actor)
+ logger.info(
+ f"Soft-deleted skill '{skill_name}' for tenant {tenant_id}")
+ except Exception as e:
+ logger.warning(
+ f"Failed to soft-delete skill {skill.get('name')}: {str(e)}")
+
+ # 3. Delete the tenant's local skill directory and all its contents
+ if CONTAINER_SKILLS_PATH:
+ tenant_skill_root = os.path.join(CONTAINER_SKILLS_PATH, tenant_id)
+ if os.path.exists(tenant_skill_root):
+ try:
+ shutil.rmtree(tenant_skill_root)
+ logger.info(
+ f"Deleted tenant skill root directory: {tenant_skill_root}")
+ except Exception as e:
+ logger.warning(
+ f"Failed to delete tenant skill root directory {tenant_skill_root}: {str(e)}")
+
+
async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> bool:
"""
Delete tenant and all associated resources
@@ -312,6 +416,7 @@ async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> boo
- All models in the tenant
- All knowledge bases in the tenant
- All agents in the tenant (including tool instances)
+ - All skills, skill instances, and local skill files for the tenant
- All MCP configurations in the tenant
- All invitation codes in the tenant
- All tenant configurations
@@ -332,12 +437,14 @@ async def delete_tenant(tenant_id: str, deleted_by: Optional[str] = None) -> boo
if not name_config:
raise NotFoundException(f"Tenant {tenant_id} does not exist")
- logger.info(f"Starting cascade deletion for tenant {tenant_id} by {deleted_by}")
+ logger.info(
+ f"Starting cascade deletion for tenant {tenant_id} by {deleted_by}")
try:
# 1. Deactivate all users in the tenant (full cleanup including Supabase deletion)
logger.info(f"Deactivating users for tenant {tenant_id}")
- users_result = get_users_by_tenant_id(tenant_id, page=1, page_size=10000)
+ users_result = get_users_by_tenant_id(
+ tenant_id, page=1, page_size=10000)
users = users_result.get("users", [])
if users:
@@ -346,9 +453,11 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
if user_id:
try:
await delete_user_and_cleanup(user_id, tenant_id)
- logger.info(f"Deactivated user {user_id} for tenant {tenant_id}")
+ logger.info(
+ f"Deactivated user {user_id} for tenant {tenant_id}")
except Exception as e:
- logger.warning(f"Failed to deactivate user {user_id}: {str(e)}")
+ logger.warning(
+ f"Failed to deactivate user {user_id}: {str(e)}")
# Concurrently delete all users
await asyncio.gather(*[delete_single_user(user) for user in users])
@@ -360,16 +469,19 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
try:
remove_group(group["group_id"], deleted_by)
except Exception as e:
- logger.warning(f"Failed to delete group {group.get('group_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete group {group.get('group_id')}: {str(e)}")
# 3. Delete all models in the tenant
logger.info(f"Deleting models for tenant {tenant_id}")
models = get_model_records({"tenant_id": tenant_id}, tenant_id)
for model in models:
try:
- delete_model_record(model["model_id"], deleted_by or "system", tenant_id)
+ delete_model_record(
+ model["model_id"], deleted_by or "system", tenant_id)
except Exception as e:
- logger.warning(f"Failed to delete model {model.get('model_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete model {model.get('model_id')}: {str(e)}")
# 4. Delete all knowledge bases in the tenant
logger.info(f"Deleting knowledge bases for tenant {tenant_id}")
@@ -381,7 +493,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
"user_id": deleted_by or "system"
})
except Exception as e:
- logger.warning(f"Failed to delete knowledge base {kb.get('knowledge_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete knowledge base {kb.get('knowledge_id')}: {str(e)}")
# 5. Delete all agents in the tenant (including related data)
logger.info(f"Deleting agents for tenant {tenant_id}")
@@ -390,24 +503,34 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
try:
agent_id = agent.get("agent_id")
# Delete tool instances first
- delete_tools_by_agent_id(agent_id, tenant_id, deleted_by or "system", version_no=0)
+ delete_tools_by_agent_id(
+ agent_id, tenant_id, deleted_by or "system", version_no=0)
# Delete agent relationships
- delete_agent_relationship(agent_id, tenant_id, deleted_by or "system", version_no=0)
+ delete_agent_relationship(
+ agent_id, tenant_id, deleted_by or "system", version_no=0)
# Delete the agent
delete_agent_by_id(agent_id, tenant_id, deleted_by or "system")
except Exception as e:
- logger.warning(f"Failed to delete agent {agent.get('agent_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete agent {agent.get('agent_id')}: {str(e)}")
# Also delete published agents (version_no >= 1)
- agents_published = query_all_agent_info_by_tenant_id(tenant_id, version_no=1)
+ agents_published = query_all_agent_info_by_tenant_id(
+ tenant_id, version_no=1)
for agent in agents_published:
try:
agent_id = agent.get("agent_id")
- delete_tools_by_agent_id(agent_id, tenant_id, deleted_by or "system", version_no=1)
- delete_agent_relationship(agent_id, tenant_id, deleted_by or "system", version_no=1)
+ delete_tools_by_agent_id(
+ agent_id, tenant_id, deleted_by or "system", version_no=1)
+ delete_agent_relationship(
+ agent_id, tenant_id, deleted_by or "system", version_no=1)
delete_agent_by_id(agent_id, tenant_id, deleted_by or "system")
except Exception as e:
- logger.warning(f"Failed to delete published agent {agent.get('agent_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete published agent {agent.get('agent_id')}: {str(e)}")
+
+ # 5b. Delete all skills, skill instances, and local skill files for the tenant
+ _delete_skills_for_tenant(tenant_id, deleted_by or "system")
# 6. Delete all MCP configurations in the tenant
logger.info(f"Deleting MCP records for tenant {tenant_id}")
@@ -421,7 +544,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
deleted_by or "system"
)
except Exception as e:
- logger.warning(f"Failed to delete MCP {mcp.get('mcp_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete MCP {mcp.get('mcp_id')}: {str(e)}")
# 7. Delete all invitation codes in the tenant
logger.info(f"Deleting invitations for tenant {tenant_id}")
@@ -430,7 +554,8 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
try:
remove_invitation(invitation["invitation_id"], deleted_by)
except Exception as e:
- logger.warning(f"Failed to delete invitation {invitation.get('invitation_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete invitation {invitation.get('invitation_id')}: {str(e)}")
# 8. Delete all tenant configurations (must be done last)
logger.info(f"Deleting tenant configurations for tenant {tenant_id}")
@@ -440,9 +565,11 @@ async def delete_single_user(user: Dict[str, Any]) -> None:
try:
delete_config_by_tenant_config_id(config["tenant_config_id"])
except Exception as e:
- logger.warning(f"Failed to delete config {config.get('tenant_config_id')}: {str(e)}")
+ logger.warning(
+ f"Failed to delete config {config.get('tenant_config_id')}: {str(e)}")
- logger.info(f"Successfully deleted tenant {tenant_id} and all associated resources")
+ logger.info(
+ f"Successfully deleted tenant {tenant_id} and all associated resources")
return True
except Exception as e:
@@ -476,5 +603,6 @@ def _create_default_group_for_tenant(tenant_id: str, created_by: Optional[str] =
return group_id
except Exception as e:
- logger.error(f"Failed to create default group for tenant {tenant_id}: {str(e)}")
+ logger.error(
+ f"Failed to create default group for tenant {tenant_id}: {str(e)}")
raise ValidationError(f"Failed to create default group: {str(e)}")
diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py
index e3a4cfa4f..6e6260544 100644
--- a/backend/services/tool_configuration_service.py
+++ b/backend/services/tool_configuration_service.py
@@ -15,7 +15,6 @@
from consts.const import DATA_PROCESS_SERVICE, LOCAL_MCP_SERVER, MCP_MANAGEMENT_API
from consts.exceptions import MCPConnectionError, NotFoundException, ToolExecutionException
from consts.model import ToolInstanceInfoRequest, ToolInfo, ToolSourceEnum, ToolValidateRequest
-from database.client import minio_client
from database.outer_api_tool_db import (
upsert_openapi_service,
query_openapi_services_by_tenant,
@@ -25,6 +24,7 @@
get_mcp_authorization_token_by_name_and_url,
get_mcp_records_by_tenant,
get_mcp_server_by_name_and_tenant,
+ get_mcp_custom_headers_by_name_and_url,
)
from database.tool_db import (
check_tool_list_initialized,
@@ -34,39 +34,47 @@
search_last_tool_instance_by_tool_id,
update_tool_table_from_scan_tool_list,
)
+from database.knowledge_db import get_knowledge_name_map_by_index_names
from mcpadapt.smolagents_adapter import _sanitize_function_name
-from services.file_management_service import get_llm_model
-from services.vectordatabase_service import get_embedding_model, get_rerank_model, get_vector_db_core
+from services.file_management_service import get_llm_model, validate_urls_access
+from services.vectordatabase_service import get_embedding_model_by_index_name, get_rerank_model
+from utils.http_client_utils import create_httpx_client
from database.client import minio_client
-from services.image_service import get_vlm_model
-from services.vectordatabase_service import get_embedding_model, get_vector_db_core
+from services.image_service import get_video_understanding_model, get_vlm_model
+from nexent.monitor import set_monitoring_context, set_monitoring_operation
+from services.vectordatabase_service import get_vector_db_core
from utils.langchain_utils import discover_langchain_modules
from utils.tool_utils import get_local_tools_classes, get_local_tools_description_zh
logger = logging.getLogger("tool_configuration_service")
-def _create_mcp_transport(url: str, authorization_token: Optional[str] = None):
+def _create_mcp_transport(url: str, authorization_token: Optional[str] = None, custom_headers: Optional[Dict[str, Any]] = None):
"""
Create appropriate MCP transport based on URL ending.
Args:
url: MCP server URL
authorization_token: Optional authorization token
+ custom_headers: Optional custom HTTP headers
Returns:
Transport instance (SSETransport or StreamableHttpTransport)
"""
url_stripped = url.strip()
- headers = {"Authorization": authorization_token} if authorization_token else {}
+ headers = {}
+ if authorization_token:
+ headers["Authorization"] = authorization_token
+ if custom_headers:
+ headers.update(custom_headers)
if url_stripped.endswith("/sse"):
- return SSETransport(url=url_stripped, headers=headers)
+ return SSETransport(url=url_stripped, headers=headers, httpx_client_factory=create_httpx_client)
elif url_stripped.endswith("/mcp"):
- return StreamableHttpTransport(url=url_stripped, headers=headers)
+ return StreamableHttpTransport(url=url_stripped, headers=headers, httpx_client_factory=create_httpx_client)
else:
# Default to StreamableHttpTransport for unrecognized formats
- return StreamableHttpTransport(url=url_stripped, headers=headers)
+ return StreamableHttpTransport(url=url_stripped, headers=headers, httpx_client_factory=create_httpx_client)
def python_type_to_json_schema(annotation: Any) -> str:
@@ -129,11 +137,15 @@ def get_local_tools() -> List[ToolInfo]:
if hasattr(param.default, 'exclude') and param.default.exclude:
continue
+ # Check if default is a Pydantic FieldInfo (has .default attribute)
+ is_pydantic_field = hasattr(param.default, 'default')
+
# Get description in both languages
- param_description = param.default.description if hasattr(param.default, 'description') else ""
+ param_description = param.default.description if is_pydantic_field else ""
# First try to get from param.default.description_zh (FieldInfo)
- param_description_zh = param.default.description_zh if hasattr(param.default, 'description_zh') else None
+ # Note: Pydantic Field doesn't have description_zh attribute, so use getattr with default
+ param_description_zh = getattr(param.default, 'description_zh', None) if is_pydantic_field else None
# Fallback to init_param_descriptions if not found
if param_description_zh is None and param_name in init_param_descriptions:
@@ -145,11 +157,21 @@ def get_local_tools() -> List[ToolInfo]:
"description": param_description,
"description_zh": param_description_zh
}
- if param.default.default is PydanticUndefined:
- param_info["optional"] = False
+
+ # Handle both Pydantic FieldInfo and simple defaults
+ if is_pydantic_field:
+ if param.default.default is PydanticUndefined:
+ param_info["optional"] = False
+ else:
+ param_info["default"] = param.default.default
+ param_info["optional"] = True
else:
- param_info["default"] = param.default.default
- param_info["optional"] = True
+ # Simple default value (not a FieldInfo)
+ if param.default == inspect.Parameter.empty:
+ param_info["optional"] = False
+ else:
+ param_info["default"] = param.default
+ param_info["optional"] = True
init_params_list.append(param_info)
@@ -261,13 +283,15 @@ async def get_all_mcp_tools(tenant_id: str) -> List[ToolInfo]:
mcp_info = get_mcp_records_by_tenant(tenant_id=tenant_id)
tools_info = []
for record in mcp_info:
- # only update connected server
- if record["status"]:
+ # Only scan MCP services that are explicitly enabled and currently healthy.
+ if bool(record.get("enabled")) and bool(record.get("status")):
try:
tools_info.extend(await get_tool_from_remote_mcp_server(
mcp_server_name=record["mcp_name"],
remote_mcp_server=record["mcp_server"],
- tenant_id=tenant_id
+ tenant_id=tenant_id,
+ authorization_token=record.get("authorization_token"),
+ custom_headers=record.get("custom_headers"),
))
except Exception as e:
logger.error(f"mcp connection error: {str(e)}")
@@ -339,7 +363,8 @@ async def get_tool_from_remote_mcp_server(
mcp_server_name: str,
remote_mcp_server: str,
tenant_id: Optional[str] = None,
- authorization_token: Optional[str] = None
+ authorization_token: Optional[str] = None,
+ custom_headers: Optional[Dict[str, Any]] = None
):
"""
Get the tool information from the remote MCP server, avoid blocking the event loop
@@ -349,6 +374,7 @@ async def get_tool_from_remote_mcp_server(
remote_mcp_server: URL of the MCP server
tenant_id: Optional tenant ID for database lookup of authorization_token
authorization_token: Optional authorization token for authentication (if not provided and tenant_id is given, will be fetched from database)
+ custom_headers: Optional custom HTTP headers
"""
# Get authorization token from database if not provided
if authorization_token is None and tenant_id:
@@ -358,10 +384,18 @@ async def get_tool_from_remote_mcp_server(
tenant_id=tenant_id
)
+ # Get custom headers from database if not provided
+ if custom_headers is None and tenant_id:
+ custom_headers = get_mcp_custom_headers_by_name_and_url(
+ mcp_name=mcp_server_name,
+ mcp_server=remote_mcp_server,
+ tenant_id=tenant_id
+ )
+
tools_info = []
try:
- transport = _create_mcp_transport(remote_mcp_server, authorization_token)
+ transport = _create_mcp_transport(remote_mcp_server, authorization_token, custom_headers)
client = Client(transport=transport, timeout=10)
async with client:
# List available operations
@@ -381,8 +415,9 @@ async def get_tool_from_remote_mcp_server(
input_schema["properties"][k]["type"] = "string"
sanitized_tool_name = _sanitize_function_name(tool.name)
+ tool_description = tool.description or ""
tool_info = ToolInfo(name=sanitized_tool_name,
- description=tool.description,
+ description=tool_description,
params=[],
source=ToolSourceEnum.MCP.value,
inputs=str(input_schema["properties"]),
@@ -481,7 +516,8 @@ async def list_all_tools(tenant_id: str):
param["description_zh"] = sdk_param.get("description_zh")
break
- # Merge inputs description_zh from SDK
+ # Use SDK full input schema for local tools to keep runtime inputs
+ # aligned with current tool code (instead of stale DB snapshots).
inputs_str = tool.get("inputs", "{}")
try:
inputs = json.loads(inputs_str) if isinstance(inputs_str, str) else inputs_str
@@ -514,7 +550,6 @@ async def list_all_tools(tenant_id: str):
"category": tool.get("category")
}
formatted_tools.append(formatted_tool)
-
return formatted_tools
@@ -534,7 +569,8 @@ async def _call_mcp_tool(
mcp_url: str,
tool_name: str,
inputs: Optional[Dict[str, Any]],
- authorization_token: Optional[str] = None
+ authorization_token: Optional[str] = None,
+ custom_headers: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Common method to call MCP tool with connection handling.
@@ -544,6 +580,7 @@ async def _call_mcp_tool(
tool_name: Name of the tool to call
inputs: Parameters to pass to the tool
authorization_token: Optional authorization token for authentication
+ custom_headers: Optional custom HTTP headers
Returns:
Dict containing tool execution result
@@ -551,7 +588,7 @@ async def _call_mcp_tool(
Raises:
MCPConnectionError: If MCP connection fails
"""
- transport = _create_mcp_transport(mcp_url, authorization_token)
+ transport = _create_mcp_transport(mcp_url, authorization_token, custom_headers)
client = Client(transport=transport)
async with client:
# Check if connected
@@ -615,16 +652,22 @@ async def _validate_mcp_tool_remote(
if not actual_mcp_url:
raise NotFoundException(f"MCP server not found for name: {usage}")
- # Get authorization token from database
+ # Get authorization token and custom headers from database
authorization_token = None
+ custom_headers = None
if tenant_id:
authorization_token = get_mcp_authorization_token_by_name_and_url(
mcp_name=usage,
mcp_server=actual_mcp_url,
tenant_id=tenant_id
)
+ custom_headers = get_mcp_custom_headers_by_name_and_url(
+ mcp_name=usage,
+ mcp_server=actual_mcp_url,
+ tenant_id=tenant_id
+ )
- return await _call_mcp_tool(actual_mcp_url, tool_name, inputs, authorization_token)
+ return await _call_mcp_tool(actual_mcp_url, tool_name, inputs, authorization_token, custom_headers)
def _get_tool_class_by_name(tool_name: str) -> Optional[type]:
@@ -680,6 +723,8 @@ def _validate_local_tool(
if not tool_class:
raise NotFoundException(f"Tool class not found for {tool_name}")
+ runtime_inputs = dict(inputs or {})
+
# Parse instantiation parameters first
instantiation_params = params or {}
# Get signature and extract default values for all parameters
@@ -702,7 +747,20 @@ def _validate_local_tool(
instantiation_params[param_name] = param.default
if tool_name == "knowledge_base_search":
- embedding_model = get_embedding_model(tenant_id=tenant_id)
+ index_names = instantiation_params.get("index_names", [])
+ is_multimodal = instantiation_params.pop("multimodal", False)
+
+ # Must have embedding model for knowledge base search
+ if not index_names or not tenant_id:
+ raise ToolExecutionException(
+ "Embedding model is required for knowledge_base_search but index_names or tenant_id is missing")
+
+ embedding_model, model_id, _ = get_embedding_model_by_index_name(tenant_id, index_names[0])
+ if not embedding_model:
+ raise ToolExecutionException(
+ f"No embedding model found for index '{index_names[0]}'. "
+ f"Please configure an embedding model for this knowledge base.")
+
vdb_core = get_vector_db_core()
# Get rerank configuration
@@ -712,11 +770,21 @@ def _validate_local_tool(
if rerank and rerank_model_name:
rerank_model = get_rerank_model(tenant_id=tenant_id, model_name=rerank_model_name)
+ # Build display_name to index_name mapping for LLM parameter conversion
+ display_name_to_index_map = {}
+ if index_names:
+ knowledge_name_map = get_knowledge_name_map_by_index_names(index_names)
+ for idx_name, kb_name in knowledge_name_map.items():
+ display_name_to_index_map[kb_name] = idx_name
+
params = {
**instantiation_params,
'vdb_core': vdb_core,
'embedding_model': embedding_model,
'rerank_model': rerank_model,
+ 'display_name_to_index_map': display_name_to_index_map,
+ # Internal access control: restrict results to specific document paths (path_or_urls)
+ 'document_paths': instantiation_params.get('document_paths'),
}
tool_instance = tool_class(**params)
elif tool_name in ["dify_search", "datamate_search"]:
@@ -732,15 +800,49 @@ def _validate_local_tool(
'rerank_model': rerank_model,
}
tool_instance = tool_class(**params)
+ elif tool_name in ("haotian_search", "aidp_search"):
+ # Haotian and AIDP share the same instantiation shape: drop the
+ # backend-only rerank keys and explicitly set observer=None
+ # (otherwise Python falls back to the FieldInfo default, which
+ # later triggers "'FieldInfo' has no attribute 'lang'" in
+ # forward()).
+ filtered_params = {k: v for k, v in instantiation_params.items()
+ if k not in ["observer", "rerank_model", "rerank"]}
+ filtered_params["observer"] = None
+ tool_instance = tool_class(**filtered_params)
elif tool_name == "analyze_image":
if not tenant_id or not user_id:
raise ToolExecutionException(
f"Tenant ID and User ID are required for {tool_name} validation")
+ # get_vlm_model reads the first multimodal slot, now shown as image understanding.
image_to_text_model = get_vlm_model(tenant_id=tenant_id)
+ vlm_display_name = getattr(
+ image_to_text_model, 'display_name', None)
+ set_monitoring_context(tenant_id=tenant_id)
+ set_monitoring_operation(
+ "tool_validation", display_name=vlm_display_name)
params = {
**instantiation_params,
'vlm_model': image_to_text_model,
- 'storage_client': minio_client
+ 'storage_client': minio_client,
+ 'validate_url_access': lambda urls: validate_urls_access(urls, user_id)
+ }
+ tool_instance = tool_class(**params)
+ elif tool_name in ["analyze_audio", "analyze_video"]:
+ if not tenant_id or not user_id:
+ raise ToolExecutionException(
+ f"Tenant ID and User ID are required for {tool_name} validation")
+ video_understanding_model = get_video_understanding_model(tenant_id=tenant_id)
+ model_display_name = getattr(
+ video_understanding_model, 'display_name', None)
+ set_monitoring_context(tenant_id=tenant_id)
+ set_monitoring_operation(
+ "tool_validation", display_name=model_display_name)
+ params = {
+ **instantiation_params,
+ 'vlm_model': video_understanding_model,
+ 'storage_client': minio_client,
+ 'validate_url_access': lambda urls: validate_urls_access(urls, user_id)
}
tool_instance = tool_class(**params)
elif tool_name == "analyze_text_file":
@@ -748,16 +850,33 @@ def _validate_local_tool(
raise ToolExecutionException(
f"Tenant ID and User ID are required for {tool_name} validation")
long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
+ llm_display_name = getattr(
+ long_text_to_text_model, 'display_name', None)
+ set_monitoring_context(tenant_id=tenant_id)
+ set_monitoring_operation(
+ "tool_validation", display_name=llm_display_name)
params = {
**instantiation_params,
'llm_model': long_text_to_text_model,
'storage_client': minio_client,
- "data_process_service_url": DATA_PROCESS_SERVICE
+ "data_process_service_url": DATA_PROCESS_SERVICE,
+ 'validate_url_access': lambda urls: validate_urls_access(urls, user_id)
}
tool_instance = tool_class(**params)
else:
tool_instance = tool_class(**instantiation_params)
+ # # Only pass declared runtime inputs to forward() to avoid unexpected kwargs.
+ # declared_inputs = getattr(tool_class, "inputs", {}) or {}
+ # allowed_input_names = (
+ # set(declared_inputs.keys()) if isinstance(declared_inputs, dict) else set()
+ # )
+ # filtered_runtime_inputs = (
+ # {k: v for k, v in runtime_inputs.items() if k in allowed_input_names}
+ # if allowed_input_names
+ # else runtime_inputs
+ # )
+
result = tool_instance.forward(**(inputs or {}))
return result
except Exception as e:
@@ -868,6 +987,7 @@ def import_openapi_service(
tenant_id: str,
user_id: str,
service_description: str = None,
+ headers_template: Dict[str, Any] = None,
force_update: bool = False
) -> Dict[str, Any]:
"""
@@ -881,6 +1001,7 @@ def import_openapi_service(
tenant_id: Tenant ID for multi-tenancy
user_id: User ID for audit
service_description: Optional service description (if not provided, reads from openapi_json.info.description)
+ headers_template: Optional default headers template
force_update: If True, replace all existing tools for this service
Returns:
@@ -901,7 +1022,8 @@ def import_openapi_service(
server_url=server_url,
tenant_id=tenant_id,
user_id=user_id,
- description=service_description
+ description=service_description,
+ headers_template=headers_template,
)
logger.info(f"Imported service '{service_name}' for tenant {tenant_id}")
diff --git a/backend/services/user_management_service.py b/backend/services/user_management_service.py
index 39ea8cfbe..0b38a76bc 100644
--- a/backend/services/user_management_service.py
+++ b/backend/services/user_management_service.py
@@ -15,11 +15,36 @@
from utils.auth_utils import (
get_supabase_client,
+ get_supabase_admin_client,
calculate_expires_at,
get_jwt_expiry_seconds,
+ ensure_cas_session_active_from_authorization,
+ resolve_tenant_id_from_user_tenant_record,
)
-from consts.const import INVITE_CODE, SUPABASE_URL, SUPABASE_KEY, DEFAULT_TENANT_ID
-from consts.exceptions import NoInviteCodeException, IncorrectInviteCodeException, UserRegistrationException, UnauthorizedError
+from consts.const import (
+ INVITE_CODE,
+ SUPABASE_URL,
+ SUPABASE_KEY,
+ DEFAULT_TENANT_ID,
+ ASSET_OWNER_TENANT_ID,
+ ASSET_OWNER_INVITE_CODE_TYPE,
+ ASSET_OWNER_ROLE,
+ ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL,
+)
+
+from services.asset_owner_visibility import (
+ filter_accessible_routes_for_asset_owner_feature,
+ require_asset_owner_enabled,
+)
+from consts.exceptions import (
+ NoInviteCodeException,
+ IncorrectInviteCodeException,
+ UserRegistrationException,
+ UnauthorizedError,
+ ValidationError,
+)
+from consts.error_code import ErrorCode
+from consts.exceptions import AppException
from database.model_management_db import create_model_record
from database.user_tenant_db import insert_user_tenant, get_user_tenant_by_user_id
@@ -29,7 +54,7 @@
from services.invitation_service import use_invitation_code, check_invitation_available, get_invitation_by_code
from services.group_service import add_user_to_groups
from services.tool_configuration_service import init_tool_list_for_tenant
-
+from services.skill_service import init_skill_list_for_tenant
logging.getLogger("user_management_service").setLevel(logging.DEBUG)
@@ -83,6 +108,7 @@ def validate_token(token: str) -> Tuple[bool, Optional[Any]]:
try:
user = get_current_user_from_client(client, token)
if user:
+ ensure_cas_session_active_from_authorization(token)
return True, user
return False, None
except Exception as e:
@@ -133,6 +159,12 @@ async def signup_user_with_invitation(email: EmailStr,
auto_login: Optional[bool] = True):
"""User registration with invitation code support"""
client = get_supabase_client()
+
+ # Validate password strength before registration
+ if not validate_password_strength(password):
+ raise AppException(ErrorCode.PROFILE_PASSWORD_WEAK,
+ "Password must be at least 8 characters with uppercase, lowercase, and digit.")
+
logging.info(
f"Receive registration request: email={email}, invite_code={'provided' if invite_code else 'not provided'}, auto_login={auto_login}")
@@ -163,12 +195,17 @@ async def signup_user_with_invitation(email: EmailStr,
user_role = "ADMIN"
elif code_type == "DEV_INVITE":
user_role = "DEV"
+ elif code_type == ASSET_OWNER_INVITE_CODE_TYPE:
+ require_asset_owner_enabled()
+ raise ValidationError(ASSET_OWNER_SIGNUP_USE_OAUTH_DETAIL)
logging.info(
f"Invitation code {invite_code} validated successfully, will assign role: {user_role}")
except IncorrectInviteCodeException:
raise
+ except ValidationError:
+ raise
except Exception as e:
logging.error(
f"Invitation code {invite_code} validation failed: {str(e)}")
@@ -187,14 +224,20 @@ async def signup_user_with_invitation(email: EmailStr,
# Determine tenant_id based on invitation code
if invitation_info:
tenant_id = invitation_info["tenant_id"]
+ if invitation_info.get("code_type") == ASSET_OWNER_INVITE_CODE_TYPE:
+ tenant_id = ASSET_OWNER_TENANT_ID
else:
tenant_id = DEFAULT_TENANT_ID
+ is_asset_owner_registration = user_role == ASSET_OWNER_ROLE
+
# Create user tenant relationship
- logging.debug(f"Creating user tenant relationship: user_id={user_id}, tenant_id={tenant_id}, user_role={user_role}")
+ logging.debug(
+ f"Creating user tenant relationship: user_id={user_id}, tenant_id={tenant_id}, user_role={user_role}")
insert_user_tenant(
user_id=user_id, tenant_id=tenant_id, user_role=user_role, user_email=email)
- logging.debug(f"User tenant relationship created successfully for user {user_id}")
+ logging.debug(
+ f"User tenant relationship created successfully for user {user_id}")
# Use invitation code now that we have the real user_id
if invitation_info:
@@ -205,7 +248,7 @@ async def signup_user_with_invitation(email: EmailStr,
# Add user to groups specified in invitation code
group_ids = invitation_result.get("group_ids", [])
- if group_ids:
+ if group_ids and not is_asset_owner_registration:
try:
# Convert group_ids from string to list if needed
if isinstance(group_ids, str):
@@ -213,7 +256,8 @@ async def signup_user_with_invitation(email: EmailStr,
group_ids = convert_string_to_list(group_ids)
if group_ids:
- group_results = add_user_to_groups(user_id, group_ids, user_id)
+ group_results = add_user_to_groups(
+ user_id, group_ids, user_id)
successful_adds = [
r for r in group_results if not r.get("error")]
logging.info(
@@ -235,7 +279,9 @@ async def signup_user_with_invitation(email: EmailStr,
await generate_tts_stt_4_admin(tenant_id, user_id)
# Initialize tool list for the new tenant (only once per tenant)
- await init_tool_list_for_tenant(tenant_id, user_id)
+ if not is_asset_owner_registration:
+ await init_tool_list_for_tenant(tenant_id, user_id)
+ await init_skill_list_for_tenant(tenant_id, user_id)
return await parse_supabase_response(False, response, user_role, auto_login)
else:
@@ -330,14 +376,24 @@ async def signin_user(email: EmailStr,
"password": password
})
+ user_tenant = get_user_tenant_by_user_id(response.user.id)
+ if user_tenant and user_tenant.get("user_role") == ASSET_OWNER_ROLE:
+ try:
+ require_asset_owner_enabled()
+ except ValidationError:
+ client.auth.sign_out()
+ raise
+
# Get actual expiration time from access_token
expiry_seconds = get_jwt_expiry_seconds(response.session.access_token)
expires_at = calculate_expires_at(response.session.access_token)
- # Get role information from user metadata
- user_role = "user" # Default role
- if 'role' in response.user.user_metadata: # Adapt to historical user data
- user_role = response.user.user_metadata['role']
+ # Prefer user_tenant_t role; fall back to Supabase metadata for legacy users
+ user_role = "user"
+ if user_tenant and user_tenant.get("user_role"):
+ user_role = user_tenant["user_role"]
+ elif "role" in response.user.user_metadata:
+ user_role = response.user.user_metadata["role"]
logging.info(
f"User {email} logged in successfully, session validity is {expiry_seconds} seconds, role: {user_role}")
@@ -374,7 +430,8 @@ async def refresh_user_token(authorization, refresh_token: str):
async def get_session_by_authorization(authorization):
# Extract clean token from authorization header
- clean_token = authorization.replace("Bearer ", "") if authorization.startswith("Bearer ") else authorization
+ clean_token = authorization.replace(
+ "Bearer ", "") if authorization.startswith("Bearer ") else authorization
# Use the unified token validation function
is_valid, user = validate_token(clean_token)
@@ -411,9 +468,27 @@ async def get_user_info(user_id: str) -> Optional[Dict[str, Any]]:
# Get user tenant relationship
user_tenant = get_user_tenant_by_user_id(user_id)
if not user_tenant:
+ # User exists in Supabase but not in local database - this is an inconsistent state.
+ # Delete the orphaned Supabase account and return None to trigger 401.
+ logging.warning(
+ f"User {user_id} not found in local database, cleaning up orphaned Supabase account"
+ )
+ try:
+ admin_client = get_supabase_admin_client()
+ if admin_client and hasattr(admin_client.auth, "admin"):
+ admin_client.auth.admin.delete_user(user_id)
+ logging.info(f"Deleted orphaned Supabase user {user_id}")
+ else:
+ logging.warning(
+ f"Could not get Supabase admin client to delete user {user_id}"
+ )
+ except Exception as delete_err:
+ logging.error(
+ f"Failed to delete orphaned Supabase user {user_id}: {str(delete_err)}"
+ )
return None
- tenant_id = user_tenant["tenant_id"]
+ tenant_id = resolve_tenant_id_from_user_tenant_record(user_tenant)
user_role = user_tenant["user_role"]
user_email = user_tenant["user_email"]
@@ -437,7 +512,7 @@ async def get_user_info(user_id: str) -> Optional[Dict[str, Any]]:
"user_email": user_email,
"user_role": user_role,
"permissions": permissions_data["permissions"],
- "accessibleRoutes": permissions_data["accessibleRoutes"]
+ "accessibleRoutes": permissions_data["accessibleRoutes"],
}
}
@@ -476,9 +551,13 @@ def format_role_permissions(permissions: List[Dict[str, Any]]) -> Dict[str, List
# Add permission_subtype to accessible routes for LEFT_NAV_MENU type
accessible_routes.append(permission_subtype)
+ accessible_routes = filter_accessible_routes_for_asset_owner_feature(
+ accessible_routes
+ )
+
return {
"permissions": formatted_permissions,
- "accessibleRoutes": accessible_routes
+ "accessibleRoutes": accessible_routes,
}
@@ -522,3 +601,85 @@ def delete_token(token_id: int, user_id: str) -> bool:
True if the token was deleted, False if not found or not owned by user.
"""
return delete_token_record(token_id, user_id)
+
+
+# -----------------------------
+# Password Management
+# -----------------------------
+
+def validate_password_strength(password: str) -> bool:
+ """Validate password meets minimum security requirements.
+
+ Args:
+ password: The password to validate.
+
+ Returns:
+ True if password meets requirements, False otherwise.
+ """
+ if len(password) < 8:
+ return False
+ has_upper = any(c.isupper() for c in password)
+ has_lower = any(c.islower() for c in password)
+ has_digit = any(c.isdigit() for c in password)
+ return has_upper and has_lower and has_digit
+
+
+async def update_password(user_id: str, old_password: str, new_password: str) -> bool:
+ """Update user password with old password verification.
+
+ This method first re-authenticates the user with their old password,
+ then updates to the new password.
+
+ Args:
+ user_id: The user ID to update password for.
+ old_password: The current password for verification.
+ new_password: The new password to set.
+
+ Returns:
+ True if password was updated successfully.
+
+ Raises:
+ UnauthorizedError: If old password is incorrect.
+ AppException (PROFILE_PASSWORD_WEAK): If new password does not meet requirements.
+ AppException (PROFILE_PASSWORD_SAME_AS_OLD): If new password is the same as old password.
+ """
+ if not validate_password_strength(new_password):
+ raise AppException(ErrorCode.PROFILE_PASSWORD_WEAK)
+
+ if old_password == new_password:
+ raise AppException(ErrorCode.PROFILE_PASSWORD_SAME_AS_OLD)
+
+ admin_client = get_supabase_admin_client()
+
+ try:
+ user_tenant = get_user_tenant_by_user_id(user_id)
+ if not user_tenant or not user_tenant.get("user_email"):
+ raise UnauthorizedError("Unable to retrieve user email")
+
+ user_email = user_tenant["user_email"]
+
+ # Re-authenticate with old password to verify identity using admin client
+ try:
+ admin_client.auth.sign_in_with_password({
+ "email": user_email,
+ "password": old_password
+ })
+ except Exception as auth_err:
+ logging.warning(
+ f"Password verification failed for user {user_id}: {str(auth_err)}")
+ raise UnauthorizedError("Invalid old password")
+
+ # Update to new password using admin client
+ admin_client.auth.update_user({"password": new_password})
+
+ logging.info(f"Password updated successfully for user {user_id}")
+ return True
+
+ except UnauthorizedError:
+ raise
+ except AppException:
+ raise
+ except Exception as exc:
+ logging.error(
+ f"Failed to update password for user {user_id}: {str(exc)}")
+ raise
diff --git a/backend/services/user_service.py b/backend/services/user_service.py
index ceb471844..6f4edcb1a 100644
--- a/backend/services/user_service.py
+++ b/backend/services/user_service.py
@@ -11,6 +11,7 @@
from database.group_db import remove_user_from_all_groups
from database.memory_config_db import soft_delete_all_configs_by_user_id
from database.conversation_db import soft_delete_all_conversations_by_user
+from database.oauth_account_db import soft_delete_all_oauth_accounts_by_user_id
from utils.auth_utils import get_supabase_admin_client
from utils.memory_utils import build_memory_config
@@ -174,7 +175,14 @@ async def delete_user_and_cleanup(user_id: str, tenant_id: str) -> None:
except Exception as e:
logger.error(f"Failed clearing memory for user {user_id}: {e}")
- # 5) Delete from Supabase
+ # 5) Soft-delete OAuth account bindings
+ try:
+ deleted_oauth = soft_delete_all_oauth_accounts_by_user_id(user_id, user_id)
+ logger.debug(f"\t{deleted_oauth} OAuth account bindings deleted.")
+ except Exception as e:
+ logger.error(f"Failed deleting OAuth accounts for user {user_id}: {e}")
+
+ # 6) Delete from Supabase
try:
admin_client = get_supabase_admin_client()
if admin_client and hasattr(admin_client.auth, "admin"):
diff --git a/backend/services/vectordatabase_service.py b/backend/services/vectordatabase_service.py
index 5639103de..dd2f6e51a 100644
--- a/backend/services/vectordatabase_service.py
+++ b/backend/services/vectordatabase_service.py
@@ -10,6 +10,7 @@
4. Health check interface
"""
import asyncio
+import hashlib
import json
import logging
import os
@@ -20,15 +21,15 @@
from fastapi import Body, Depends, Path, Query
from fastapi.responses import StreamingResponse
-from nexent.core.models.embedding_model import OpenAICompatibleEmbedding, JinaEmbedding, BaseEmbedding
+from nexent.core.models.embedding_model import OpenAICompatibleEmbedding, JinaEmbedding, DashScopeMultimodalEmbedding, BaseEmbedding
from nexent.core.models.rerank_model import OpenAICompatibleRerank, BaseRerank
from nexent.vector_database.base import VectorDatabaseCore
from nexent.vector_database.elasticsearch_core import ElasticSearchCore
from nexent.vector_database.datamate_core import DataMateCore
-from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE, PERMISSION_EDIT, PERMISSION_READ
+from consts.const import DATAMATE_URL, ES_API_KEY, ES_HOST, LANGUAGE, VectorDatabaseType, IS_SPEED_MODE, PERMISSION_EDIT, PERMISSION_READ, ASSET_OWNER_TENANT_ID
from consts.model import ChunkCreateRequest, ChunkUpdateRequest
-from database.attachment_db import delete_file
+from database.attachment_db import delete_file, file_exists, get_file_stream
from database.knowledge_db import (
create_knowledge_record,
delete_knowledge_record,
@@ -36,13 +37,17 @@
update_knowledge_record,
get_knowledge_info_by_tenant_id,
update_model_name_by_index_name,
+ update_last_doc_update_time,
+ update_last_summary_time,
+ update_embedding_model_by_index_name,
)
from utils.str_utils import convert_list_to_string
from database.user_tenant_db import get_user_tenant_by_user_id
from database.group_db import query_group_ids_by_user
-from database.model_management_db import get_model_records
+from database.model_management_db import get_model_by_display_name, get_model_by_model_id, get_model_records
from services.redis_service import get_redis_service
from services.group_service import get_tenant_default_group_id
+from services.asset_owner_visibility import postprocess_knowledge_visibility
from utils.config_utils import tenant_config_manager, get_model_name_from_config
from utils.file_management_utils import get_all_files_status, get_file_size
from utils.str_utils import convert_string_to_list
@@ -76,6 +81,134 @@ def _update_progress(task_id: str, processed: int, total: int):
f"[PROGRESS CALLBACK] Exception updating progress for task {task_id}: {str(e)}")
+def _get_embedding_model_display_name(model_id: Optional[int], tenant_id: str) -> str:
+ """
+ Get embedding model display_name from model_id.
+
+ Args:
+ model_id: The model ID to look up
+ tenant_id: Tenant ID for the lookup
+
+ Returns:
+ The model's display_name if found, empty string otherwise
+ """
+ if model_id is None:
+ return ""
+ try:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model:
+ return model.get("display_name", "")
+ except Exception as e:
+ logger.warning(f"Failed to get display_name for model_id {model_id}: {e}")
+ return ""
+
+
+def _is_multimodal_by_model_id(model_id: Optional[int], tenant_id: str) -> bool:
+ """
+ Determine whether an embedding model is multimodal based on model_id.
+
+ Args:
+ model_id: The embedding model ID.
+ tenant_id: Tenant ID for model lookup.
+
+ Returns:
+ True when the model type is `multi_embedding`, otherwise False.
+ """
+ if model_id is None:
+ return False
+ try:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model:
+ return model.get("model_type") == "multi_embedding"
+ except Exception as e:
+ logger.warning(f"Failed to determine multimodal flag for model_id {model_id}: {e}")
+ return False
+
+
+class KnowledgeBaseNeedsModelConfigError(Exception):
+ """Exception raised when a knowledge base needs an embedding model to be configured."""
+ def __init__(self, index_name: str, message: str = None):
+ self.index_name = index_name
+ self.message = message or f"Knowledge base '{index_name}' needs an embedding model to be configured"
+ super().__init__(self.message)
+
+
+def get_embedding_model_by_index_name(tenant_id: str, index_name: str) -> tuple[Optional[Any], Optional[int], dict]:
+ """
+ Get the embedding model for a knowledge base by its index_name.
+
+ Args:
+ tenant_id: Tenant ID
+ index_name: The index name of the knowledge base
+
+ Returns:
+ Tuple of (embedding model instance or None, model_id or None, metadata dict)
+ metadata contains: {
+ "status": str, # "ok" | "needs_config" | "error"
+ "needs_update": bool, # Whether the database needs to be updated
+ "update_info": dict, # Fields to update if needs_update is True
+ "message": str # Status message
+ }
+
+ Design principles:
+ - Force explicit configuration: model_id must be explicitly set by user
+ - No auto-fix: never automatically use tenant default model
+ - Clear error guidance: return needs_config status for user action
+ """
+ try:
+ knowledge_record = get_knowledge_record({
+ "index_name": index_name,
+ "tenant_id": tenant_id,
+ "include_asset_owner_assets": True,
+ })
+
+ if not knowledge_record:
+ return None, None, {
+ "status": "error",
+ "needs_update": False,
+ "message": f"Knowledge base '{index_name}' not found"
+ }
+
+ model_id = knowledge_record.get("embedding_model_id")
+
+ # Case 1: model_id exists and is valid, use it
+ if model_id:
+ model, _ = get_embedding_model_by_id(tenant_id, model_id)
+ if model:
+ return model, model_id, {
+ "status": "ok",
+ "needs_update": False,
+ "message": "Embedding model found"
+ }
+ # Model ID exists but model not found - fall through to error
+ logger.warning(f"Model ID {model_id} specified for index '{index_name}' but model not found")
+
+ # Case 2: model_id does not exist or is invalid
+ # Design principle: Force explicit configuration, no auto-fix
+ # Return needs_config to guide user to select a model
+ embedding_model_name = knowledge_record.get("embedding_model_name")
+ if embedding_model_name:
+ # Has model_name but no valid model_id (legacy data)
+ logger.warning(f"Index '{index_name}' has embedding_model_name but no valid model_id, needs explicit configuration")
+ else:
+ # No model configured at all
+ logger.error(f"Index '{index_name}' has no embedding model configured")
+
+ return None, None, {
+ "status": "needs_config",
+ "needs_update": False,
+ "message": f"No embedding model configured for knowledge base '{index_name}'. Please select a model."
+ }
+
+ except Exception as e:
+ logger.warning(f"Failed to get embedding model for index {index_name}: {e}")
+ return None, None, {
+ "status": "error",
+ "needs_update": False,
+ "message": str(e)
+ }
+
+
ALLOWED_CHUNK_FIELDS = {
"id",
"title",
@@ -175,71 +308,142 @@ def check_knowledge_base_exist_impl(knowledge_name: str, vdb_core: VectorDatabas
# Case B: Name is available in this tenant
return {"status": "available"}
-
-def get_embedding_model(tenant_id: str, model_name: Optional[str] = None):
+def _normalize_model_type(raw_model_type: Optional[str]) -> Optional[str]:
+ if raw_model_type in ["multiEmbedding", "multi_embedding"]:
+ return "multi_embedding"
+ if raw_model_type == "embedding":
+ return "embedding"
+ return None
+
+def _build_model_config(model: dict) -> dict:
+ return {
+ "model_repo": model.get("model_repo", ""),
+ "model_name": model["model_name"],
+ "api_key": model.get("api_key", ""),
+ "base_url": model.get("base_url", ""),
+ "model_type": model.get("model_type", "embedding"),
+ "max_tokens": model.get("max_tokens", 1024),
+ "ssl_verify": model.get("ssl_verify", True),
+ }
+
+def _create_embedding_model(model: dict) -> Any:
+ model_config = _build_model_config(model)
+ common_kwargs = {
+ "api_key": model_config.get("api_key", ""),
+ "base_url": model_config.get("base_url", ""),
+ "model_name": get_model_name_from_config(model_config) or "",
+ "embedding_dim": model_config.get("max_tokens", 1024),
+ "ssl_verify": model_config.get("ssl_verify", True),
+ }
+ if model.get("model_type", "embedding") == "multi_embedding":
+ model_factory = model.get("model_factory", "").lower()
+ if model_factory == "dashscope":
+ return DashScopeMultimodalEmbedding(**common_kwargs)
+ return JinaEmbedding(**common_kwargs)
+ return OpenAICompatibleEmbedding(**common_kwargs)
+
+def get_embedding_model(
+ tenant_id: str,
+ model_name: Optional[str] = None,
+ model_type: Optional[str] = None
+) -> tuple[Optional[Any], Optional[int]]:
"""
Get the embedding model for the tenant, optionally using a specific model name.
Args:
tenant_id: Tenant ID
- model_name: Optional specific model name to use (format: "model_repo/model_name" or just "model_name")
- If provided, will try to find the model in the tenant's model list.
+ model_name: Optional display name of the embedding model to use.
+ If provided, will find the model by display_name in the tenant's model list.
+ model_type: Optional model type filter. When model_name is omitted, queries tenant
+ model records by this type; when model_type is also omitted, prefers
+ embedding models, then multi_embedding models.
Returns:
- Embedding model instance or None
+ Tuple of (embedding model instance or None, model_id or None)
"""
- # If model_name is provided, try to find it in the tenant's models
if model_name:
try:
- models = get_model_records({"model_type": "embedding"}, tenant_id)
- for model in models:
- model_display_name = model.get("model_repo") + "/" + model["model_name"] if model.get("model_repo") else model["model_name"]
- if model_display_name == model_name:
- # Found the model, create embedding instance
- model_config = {
- "model_repo": model.get("model_repo", ""),
- "model_name": model["model_name"],
- "api_key": model.get("api_key", ""),
- "base_url": model.get("base_url", ""),
- "model_type": "embedding",
- "max_tokens": model.get("max_tokens", 1024),
- "ssl_verify": model.get("ssl_verify", True),
- }
- return OpenAICompatibleEmbedding(
- api_key=model_config.get("api_key", ""),
- base_url=model_config.get("base_url", ""),
- model_name=get_model_name_from_config(model_config) or "",
- embedding_dim=model_config.get("max_tokens", 1024),
- ssl_verify=model_config.get("ssl_verify", True),
- )
+ model_type = _normalize_model_type(model_type)
+ if model_type:
+ model = get_model_by_display_name(model_name, tenant_id, model_type)
+ else:
+ model = get_model_by_display_name(model_name, tenant_id)
+
+ if not model or model.get("model_type") not in ["embedding", "multi_embedding"]:
+ logger.warning(f"Model '{model_name}' not found or is not an embedding model")
+ return None, None
+
+ return _create_embedding_model(model), model.get("model_id")
except Exception as e:
logger.warning(f"Failed to get embedding model by name {model_name}: {e}")
+ else:
+ try:
+ if model_type:
+ records = get_model_records({"model_type": model_type}, tenant_id)
+ else:
+ records = get_model_records({"model_type": "embedding"}, tenant_id)
+ if not records:
+ records = get_model_records({"model_type": "multi_embedding"}, tenant_id)
+
+ if records:
+ model = records[0]
+ if model.get("model_type") in ["embedding", "multi_embedding"]:
+ return _create_embedding_model(model), model.get("model_id")
+ logger.warning(
+ f"Resolved model is not an embedding model: {model.get('model_type')}"
+ )
+ except Exception as e:
+ logger.warning(f"Failed to get default embedding model for tenant {tenant_id}: {e}")
- # Fall back to default embedding model (current behavior)
- model_config = tenant_config_manager.get_model_config(
- key="EMBEDDING_ID", tenant_id=tenant_id)
+ return None, None
- model_type = model_config.get("model_type", "")
- if model_type == "embedding":
- # Get the es core
- return OpenAICompatibleEmbedding(
- api_key=model_config.get("api_key", ""),
- base_url=model_config.get("base_url", ""),
- model_name=get_model_name_from_config(model_config) or "",
- embedding_dim=model_config.get("max_tokens", 1024),
- ssl_verify=model_config.get("ssl_verify", True),
- )
- elif model_type == "multi_embedding":
- return JinaEmbedding(
- api_key=model_config.get("api_key", ""),
- base_url=model_config.get("base_url", ""),
- model_name=get_model_name_from_config(model_config) or "",
- embedding_dim=model_config.get("max_tokens", 1024),
- ssl_verify=model_config.get("ssl_verify", True),
- )
- else:
- return None
+def get_embedding_model_by_id(tenant_id: str, model_id: int) -> tuple[Optional[Any], Optional[int]]:
+ """
+ Get the embedding model by model_id.
+
+ Args:
+ tenant_id: Tenant ID
+ model_id: Model ID to query
+
+ Returns:
+ Tuple of (embedding model instance or None, model_id or None)
+ """
+ try:
+ model = get_model_by_model_id(model_id, tenant_id)
+ if model and model.get("model_type") in ["embedding", "multi_embedding"]:
+ model_config = {
+ "model_repo": model.get("model_repo", ""),
+ "model_name": model["model_name"],
+ "api_key": model.get("api_key", ""),
+ "base_url": model.get("base_url", ""),
+ "model_type": model.get("model_type", "embedding"),
+ "max_tokens": model.get("max_tokens", 1024),
+ "ssl_verify": model.get("ssl_verify", True),
+ }
+ model_type = model.get("model_type", "embedding")
+ if model_type == "multi_embedding":
+ embedding_model = JinaEmbedding(
+ api_key=model_config.get("api_key", ""),
+ base_url=model_config.get("base_url", ""),
+ model_name=get_model_name_from_config(model_config) or "",
+ embedding_dim=model_config.get("max_tokens", 1024),
+ ssl_verify=model_config.get("ssl_verify", True),
+ )
+ else:
+ embedding_model = OpenAICompatibleEmbedding(
+ api_key=model_config.get("api_key", ""),
+ base_url=model_config.get("base_url", ""),
+ model_name=get_model_name_from_config(model_config) or "",
+ embedding_dim=model_config.get("max_tokens", 1024),
+ ssl_verify=model_config.get("ssl_verify", True),
+ )
+ return embedding_model, model.get("model_id")
+ else:
+ logger.warning(f"Model with id {model_id} not found or is not an embedding model")
+ except Exception as e:
+ logger.warning(f"Failed to get embedding model by id {model_id}: {e}")
+ return None, None
def get_rerank_model(tenant_id: str, model_name: Optional[str] = None):
@@ -415,11 +619,19 @@ def create_index(
None, description="ID of the user creating the knowledge base"),
tenant_id: Optional[str] = Body(
None, description="ID of the tenant creating the knowledge base"),
+ model_id: Optional[int] = Body(
+ None, description="ID of the embedding model to use"),
):
try:
if vdb_core.check_index_exists(index_name):
raise Exception(f"Index {index_name} already exists")
- embedding_model = get_embedding_model(tenant_id)
+
+ # Get embedding model by model_id if provided
+ if model_id:
+ embedding_model, actual_model_id = get_embedding_model_by_id(tenant_id, model_id)
+ else:
+ embedding_model, actual_model_id = None, None
+
success = vdb_core.create_index(index_name, embedding_dim=embedding_dim or (
embedding_model.embedding_dim if embedding_model else 1024))
if not success:
@@ -427,7 +639,8 @@ def create_index(
knowledge_data = {"index_name": index_name,
"created_by": user_id,
"tenant_id": tenant_id,
- "embedding_model_name": embedding_model.model}
+ "embedding_model_name": embedding_model.model if embedding_model else None,
+ "embedding_model_id": actual_model_id}
create_knowledge_record(knowledge_data)
return {"status": "success", "message": f"Index {index_name} created successfully"}
except Exception as e:
@@ -443,6 +656,8 @@ def create_knowledge_base(
ingroup_permission: Optional[str] = None,
group_ids: Optional[List[int]] = None,
embedding_model_name: Optional[str] = None,
+ is_multimodal: Optional[bool] = None,
+ preserve_source_file: Optional[bool] = None,
):
"""
Create a new knowledge base with a user-facing name and an internal Elasticsearch index name.
@@ -462,13 +677,25 @@ def create_knowledge_base(
group_ids: List of group IDs (optional)
embedding_model_name: Specific embedding model name to use (optional).
If provided, will use this model instead of tenant default.
+ preserve_source_file: Whether to preserve uploaded source documents after
+ vectorization (optional; defaults to True when omitted).
For backward compatibility, legacy callers can still use create_index() directly
with an explicit index_name.
"""
try:
# Get embedding model - use user-selected model if provided, otherwise use tenant default
- embedding_model = get_embedding_model(tenant_id, embedding_model_name)
+ selected_model_type = None
+ if is_multimodal is True:
+ selected_model_type = "multi_embedding"
+ elif is_multimodal is False and embedding_model_name:
+ selected_model_type = "embedding"
+
+ embedding_model, model_id = get_embedding_model(
+ tenant_id,
+ embedding_model_name,
+ selected_model_type
+ )
# Determine the embedding model name to save: use user-provided name if available,
# otherwise use the model's display name
@@ -483,6 +710,7 @@ def create_knowledge_base(
"user_id": user_id,
"tenant_id": tenant_id,
"embedding_model_name": saved_embedding_model_name,
+ "embedding_model_id": model_id,
}
# Add group permission and group IDs if provided
@@ -490,6 +718,8 @@ def create_knowledge_base(
knowledge_data["ingroup_permission"] = ingroup_permission
if group_ids is not None:
knowledge_data["group_ids"] = group_ids
+ if preserve_source_file is not None:
+ knowledge_data["preserve_source_file"] = preserve_source_file
record_info = create_knowledge_record(knowledge_data)
index_name = record_info["index_name"]
@@ -570,6 +800,77 @@ def update_knowledge_base(
return result
+ @staticmethod
+ def update_embedding_model(
+ index_name: str,
+ model_id: int,
+ tenant_id: str,
+ user_id: Optional[str] = None,
+ ) -> Dict[str, Any]:
+ """
+ Update the embedding model for a knowledge base.
+
+ Args:
+ index_name: Internal index name of the knowledge base
+ model_id: ID of the embedding model to use
+ tenant_id: Tenant ID
+ user_id: ID of the user making the update
+
+ Returns:
+ Dict containing update result information
+
+ Raises:
+ ValueError: If model is not found or is not an embedding model
+ Exception: If update fails
+ """
+ try:
+ # Validate the model exists and is an embedding model
+ model = get_model_by_model_id(model_id, tenant_id)
+ if not model:
+ raise ValueError(f"Model with id {model_id} not found")
+
+ if model.get("model_type") not in ["embedding", "multi_embedding"]:
+ raise ValueError(
+ f"Model '{model.get('display_name', model_id)}' is not an embedding model. "
+ f"Please select an embedding model."
+ )
+
+ # Update the database record
+ # Use display_name as embedding_model_name
+ embedding_model_name = model.get("display_name")
+ success = update_embedding_model_by_index_name(
+ index_name=index_name,
+ embedding_model_id=model_id,
+ embedding_model_name=embedding_model_name,
+ tenant_id=tenant_id,
+ user_id=user_id or ""
+ )
+
+ if not success:
+ raise Exception(f"Failed to update embedding model for index '{index_name}'")
+
+ logger.info(
+ f"Embedding model updated for knowledge base '{index_name}' "
+ f"to model '{model.get('display_name', model_id)}' (id: {model_id}) by user '{user_id}'"
+ )
+
+ # Use display_name for consistency with database update
+ model_display_name = model.get("display_name")
+ return {
+ "status": "success",
+ "index_name": index_name,
+ "model_id": model_id,
+ "model_name": model_display_name,
+ "model_display_name": model.get("display_name"),
+ "message": f"Embedding model updated successfully to '{model_display_name}'"
+ }
+
+ except ValueError:
+ raise
+ except Exception as e:
+ logger.error(f"Failed to update embedding model for index '{index_name}': {e}")
+ raise Exception(f"Failed to update embedding model: {str(e)}")
+
@staticmethod
async def delete_index(
index_name: str = Path(...,
@@ -631,7 +932,9 @@ def list_indices(
Permission logic:
- SU: All knowledgebases visible, all editable
- ADMIN: Knowledgebases from same tenant visible, all editable
- - USER/DEV: Knowledgebases where user belongs to intersecting groups, permission determined by:
+ - DEV on ASSET_OWNER-scoped records: all visible, read-only (READ_ONLY)
+ - SU/ADMIN/SPEED cross-tenant view of ASSET_OWNER records: read-only
+ - USER/DEV (non-ASSET_OWNER records): group intersection required; permission by:
* If user is creator: editable
* If ingroup_permission=EDIT: editable
* If ingroup_permission=READ_ONLY: read-only
@@ -663,7 +966,9 @@ def list_indices(
es_indices_list = vdb_core.get_user_indices(pattern)
# Get all knowledgebase records from database (for cleanup and permission checking)
- all_db_records = get_knowledge_info_by_tenant_id(target_tenant_id)
+ all_db_records = get_knowledge_info_by_tenant_id(
+ target_tenant_id
+ )
# Filter visible knowledgebases based on user role and permissions
visible_knowledgebases = []
@@ -679,6 +984,8 @@ def list_indices(
# Check permission based on user role
permission = None
+ record_tenant_id = str(record.get("tenant_id") or "")
+ is_asset_owner_record = record_tenant_id == ASSET_OWNER_TENANT_ID
# Fallback logic: if user_id equals user_tenant_id, treat as legacy admin user
# even if user_role is None or empty
@@ -690,7 +997,12 @@ def list_indices(
effective_user_role = "SPEED"
logger.info("User under SPEED version is treated as admin")
- if effective_user_role in ["SU", "ADMIN", "SPEED"]:
+ if is_asset_owner_record:
+ if effective_user_role in ["ASSET_OWNER"]:
+ permission = PERMISSION_EDIT
+ elif effective_user_role in ["SU", "ADMIN", "SPEED", "DEV"]:
+ permission = PERMISSION_READ
+ elif effective_user_role in ["SU", "ADMIN", "SPEED", "ASSET_OWNER"]:
# SU, ADMIN and SPEED roles can see all knowledgebases
permission = PERMISSION_EDIT
elif effective_user_role in ["USER", "DEV"]:
@@ -756,6 +1068,11 @@ def list_indices(
model_name_is_none_list.append(index_name)
# Build response
+ visible_knowledgebases = postprocess_knowledge_visibility(
+ visible_knowledgebases,
+ caller_role=user_role,
+ caller_tenant_id=target_tenant_id,
+ )
indices = [record["index_name"] for record in visible_knowledgebases]
response = {
@@ -774,6 +1091,12 @@ def list_indices(
index_name = record["index_name"]
index_stats = indice_stats.get(index_name, {})
+ # Get embedding model display_name from model_id
+ model_id = record.get("embedding_model_id")
+ tenant_id = record.get("tenant_id") or target_tenant_id
+ embedding_model_display_name = _get_embedding_model_display_name(model_id, tenant_id)
+ is_multimodal = _is_multimodal_by_model_id(model_id, tenant_id)
+
stats_info.append({
# Internal index name (used as ID)
"name": index_name,
@@ -784,9 +1107,17 @@ def list_indices(
# knowledge source and ingroup permission from DB record
"knowledge_sources": record["knowledge_sources"],
"ingroup_permission": record["ingroup_permission"],
+ "is_multimodal": is_multimodal,
"tenant_id": record.get("tenant_id"),
+ # Embedding model info: display_name from model_id
+ "embedding_model_name": embedding_model_display_name or record.get("embedding_model_name", ""),
+ "embedding_model_id": model_id,
# Update time for sorting and display
"update_time": record.get("update_time"),
+ # Auto-summary settings
+ "summary_frequency": record.get("summary_frequency"),
+ "last_summary_time": record.get("last_summary_time"),
+ "preserve_source_file": record.get("preserve_source_file", True),
"stats": index_stats,
})
@@ -812,6 +1143,9 @@ def index_documents(
] = Body(..., description="Document List to process"),
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
task_id: Optional[str] = None,
+ model_id: Optional[int] = Body(
+ None, description="ID of the embedding model to use"),
+ large_mode: bool = False,
):
"""
Index documents and create vector embeddings, create index if it doesn't exist
@@ -821,6 +1155,8 @@ def index_documents(
index_name: Index name
data: List containing document data to be indexed
vdb_core: VectorDatabaseCore instance
+ task_id: Optional task ID for progress tracking
+ model_id: Optional model ID for the embedding model
Returns:
IndexingResponse object containing indexing result information
@@ -833,7 +1169,7 @@ def index_documents(
if not vdb_core.check_index_exists(index_name):
try:
ElasticSearchService.create_index(
- index_name, vdb_core=vdb_core)
+ index_name, vdb_core=vdb_core, model_id=model_id)
logger.info(f"Created new index {index_name}")
except Exception as create_error:
raise Exception(
@@ -882,12 +1218,27 @@ def index_documents(
"author": author,
"date": date,
"content": text,
- "process_source": "Unstructured",
+ "process_source": metadata.get("process_source", "Unstructured"),
"file_size": file_size,
"create_time": create_time,
"languages": metadata.get("languages", []),
"embedding_model_name": embedding_model_name
}
+
+ image_url = metadata.get("image_url", "")
+ if len(image_url) > 0:
+ # Fetch image bytes from MinIO (supports s3://bucket/key or /bucket/key)
+ try:
+ file_stream = get_file_stream(
+ object_name=image_url)
+ if file_stream is None:
+ raise FileNotFoundError(
+ f"Unable to fetch file from URL: {image_url}")
+ document["image_bytes"] = file_stream.read()
+ except Exception as e:
+ logger.error(
+ f"Failed to fetch file from {image_url}: {e}")
+ raise
documents.append(document)
@@ -908,8 +1259,9 @@ def index_documents(
'tenant_id') if knowledge_record else None
if tenant_id:
+ model_type = "EMBEDDING_ID" if embedding_model.model_type == "text" else "MULTI_EMBEDDING_ID"
model_config = tenant_config_manager.get_model_config(
- key="EMBEDDING_ID", tenant_id=tenant_id)
+ key=model_type, tenant_id=tenant_id)
embedding_batch_size = model_config.get("chunk_batch", 10)
if embedding_batch_size is None:
embedding_batch_size = 10
@@ -939,6 +1291,7 @@ def index_documents(
embedding_model=embedding_model,
documents=documents,
embedding_batch_size=embedding_batch_size,
+ large_mode=large_mode,
progress_callback=lambda processed, total: _update_progress(
task_id, processed, total) if task_id else None
)
@@ -959,6 +1312,9 @@ def index_documents(
logger.warning(
f"[REDIS PROGRESS] Exception updating final progress for task {task_id}: {str(e)}")
+ # Update last_doc_update_time for auto-summary tracking
+ update_last_doc_update_time(index_name)
+
return {
"success": True,
"message": f"Successfully indexed {total_indexed} documents",
@@ -993,35 +1349,33 @@ async def list_files(
"""
try:
files_map: Dict[str, Dict[str, Any]] = {}
- # Get existing files from ES
+ total_start_time = time.time()
+
+ logger.info(f"[list_files] index={index_name}, include_chunks={include_chunks}")
+
+ # Step 1: Get existing files from ES (includes chunk_count via aggregation)
+ step1_start = time.time()
existing_files = vdb_core.get_documents_detail(index_name)
+ step1_duration = time.time() - step1_start
+ logger.info(f"[list_files:step1] ES get_documents_detail: {len(existing_files)} files in {step1_duration:.3f}s")
- # Get unique celery files list and the status of each file
+ # Step 2: Get celery task statuses from external service
+ step2_start = time.time()
celery_task_files = await get_all_files_status(index_name)
+ step2_duration = time.time() - step2_start
+ logger.info(f"[list_files:step2] Celery task status: {len(celery_task_files)} tasks in {step2_duration:.3f}s")
- # For files already stored in ES, add to files list
+ # Step 3: Build files_map from ES data
+ step3_start = time.time()
for file_info in existing_files:
utc_create_time_str = file_info.get('create_time', '')
- # Try to parse the create_time string, fallback to current timestamp if format is invalid
try:
utc_create_timestamp = datetime.strptime(utc_create_time_str, '%Y-%m-%dT%H:%M:%S').replace(
tzinfo=timezone.utc).timestamp()
except (ValueError, TypeError):
utc_create_timestamp = time.time()
- # Always re-query chunk count to ensure accuracy (aggregation may be stale)
path_or_url = file_info.get('path_or_url')
- chunk_count = file_info.get('chunk_count', 0)
- try:
- count_result = vdb_core.client.count(
- index=index_name,
- body={"query": {"term": {"path_or_url": path_or_url}}}
- )
- chunk_count = count_result.get("count", chunk_count)
- except Exception as count_err:
- logger.warning(
- f"Failed to get chunk count for {path_or_url}: {count_err}, using aggregation value {chunk_count}")
-
file_data = {
'path_or_url': path_or_url,
'file': file_info.get('filename', ''),
@@ -1029,65 +1383,40 @@ async def list_files(
'create_time': int(utc_create_timestamp * 1000),
'status': "COMPLETED",
'latest_task_id': '',
- 'chunk_count': chunk_count,
+ 'chunk_count': file_info.get('chunk_count', 0),
'error_reason': None,
'has_error_info': False
}
files_map[path_or_url] = file_data
+ step3_duration = time.time() - step3_start
+ logger.info(f"[list_files:step3] Build files_map from ES: {len(existing_files)} files in {step3_duration:.3f}s")
- # For files not yet stored in ES (files currently being processed)
+ # Step 4: Merge celery task data (Redis progress already fetched in get_all_files_status)
+ step4_start = time.time()
+ celery_file_count = 0
for path_or_url, status_info in celery_task_files.items():
- status_dict = status_info if isinstance(
- status_info, dict) else {}
+ celery_file_count += 1
+ status_dict = status_info if isinstance(status_info, dict) else {}
- # Get source_type and original_filename, with defaults
- source_type = status_dict.get('source_type') if status_dict.get(
- 'source_type') else 'minio'
+ source_type = status_dict.get('source_type') if status_dict.get('source_type') else 'minio'
original_filename = status_dict.get('original_filename')
+ filename = original_filename or (os.path.basename(path_or_url) if path_or_url else '')
- # Determine the filename
- filename = original_filename or (
- os.path.basename(path_or_url) if path_or_url else '')
-
- # Safely get file size; default to 0 on any error
file_size = 0
if path_or_url in files_map:
file_size = files_map[path_or_url].get('file_size', 0)
else:
try:
- file_size = get_file_size(
- source_type or 'minio', path_or_url)
+ file_size = get_file_size(source_type or 'minio', path_or_url)
except Exception as size_err:
- logger.error(
- f"Failed to get file size for '{path_or_url}': {size_err}")
+ logger.error(f"Failed to get file size for '{path_or_url}': {size_err}")
file_size = 0
- # Get progress from status_dict first, then try Redis for real-time updates
+ # Get progress from celery_task_files (already includes Redis batch data)
processed_chunks = status_dict.get('processed_chunks')
total_chunks = status_dict.get('total_chunks')
task_id = status_dict.get('latest_task_id', '')
- # Always try to get latest progress from Redis if task_id exists
- # Redis has the most up-to-date progress during vectorization
- if task_id:
- try:
- redis_service = get_redis_service()
- progress_info = redis_service.get_progress_info(
- task_id)
- if progress_info:
- redis_processed = progress_info.get(
- 'processed_chunks')
- redis_total = progress_info.get('total_chunks')
- if redis_processed is not None:
- processed_chunks = redis_processed
- if redis_total is not None:
- total_chunks = redis_total
- logger.debug(
- f"Retrieved progress from Redis for task {task_id}: {processed_chunks}/{total_chunks}")
- except Exception as e:
- logger.debug(
- f"Failed to get progress from Redis for task {task_id}: {str(e)}")
-
if path_or_url in files_map:
file_data = files_map[path_or_url]
else:
@@ -1102,13 +1431,12 @@ async def list_files(
}
files_map[path_or_url] = file_data
- file_data['status'] = status_dict.get('state', file_data.get(
- 'status', 'UNKNOWN'))
+ file_data['status'] = status_dict.get('state', file_data.get('status', 'UNKNOWN'))
file_data['latest_task_id'] = task_id
file_data['processed_chunk_num'] = processed_chunks
file_data['total_chunk_num'] = total_chunks
- # Get error reason for failed documents
+ # Get error reason for failed documents (fetch from Redis batch if needed)
if task_id and status_dict.get('state') in ['PROCESS_FAILED', 'FORWARD_FAILED']:
try:
redis_service = get_redis_service()
@@ -1116,17 +1444,20 @@ async def list_files(
if error_reason:
file_data['error_reason'] = error_reason
file_data['has_error_info'] = True
- except Exception as e:
- logger.debug(
- f"Failed to get error info for task {task_id}: {str(e)}")
+ except Exception:
+ pass # Error info is optional, don't fail the request
+ step4_duration = time.time() - step4_start
+ logger.info(f"[list_files:step4] Merge celery tasks: {celery_file_count} tasks in {step4_duration:.3f}s")
files = list(files_map.values())
+ logger.info(f"[list_files:step4] Total files built: {len(files)}")
# Unified chunks processing for all files
if include_chunks:
- # Prepare msearch body for all completed files
+ step5_start = time.time()
completed_files_map = {
f['path_or_url']: f for f in files if f['status'] == "COMPLETED"}
+ completed_count = len(completed_files_map)
msearch_body = []
for path_or_url in completed_files_map.keys():
@@ -1137,7 +1468,6 @@ async def list_files(
"_source": ["id", "title", "content", "create_time"]
})
- # Initialize chunks for all files
for file_data in files:
file_data['chunks'] = []
file_data['chunk_count'] = file_data.get('chunk_count', 0)
@@ -1169,46 +1499,30 @@ async def list_files(
})
file_data['chunks'] = chunks
- # Get accurate chunk count using count query instead of len(chunks)
- # because msearch may have size limits
- try:
- count_result = vdb_core.client.count(
- index=index_name,
- body={
- "query": {"term": {"path_or_url": file_path}}}
- )
- file_data['chunk_count'] = count_result.get(
- "count", len(chunks))
- except Exception as count_err:
- logger.warning(
- f"Failed to get chunk count for {file_path}: {count_err}, using len(chunks)")
- file_data['chunk_count'] = len(chunks)
+ # chunk_count from aggregation is already accurate
+ # no need for additional count queries
except Exception as e:
logger.error(
f"Error during msearch for chunks: {str(e)}")
+ step5_duration = time.time() - step5_start
+ logger.info(f"[list_files:step5] ES msearch chunks: {completed_count} files in {step5_duration:.3f}s")
else:
- # When include_chunks=False, ensure chunk_count is accurate for completed files
+ # When include_chunks=False, chunk_count is already accurate from ES aggregation
+ # No need for additional count queries - doc_count from terms aggregation is accurate
for file_data in files:
file_data['chunks'] = []
- if file_data.get('status') == "COMPLETED":
- # Always re-query chunk count for completed files to ensure accuracy
- try:
- count_result = vdb_core.client.count(
- index=index_name,
- body={
- "query": {"term": {"path_or_url": file_data.get('path_or_url')}}}
- )
- file_data['chunk_count'] = count_result.get(
- "count", 0)
- except Exception as count_err:
- logger.warning(
- f"Failed to get chunk count for {file_data.get('path_or_url')}: {count_err}")
- file_data['chunk_count'] = file_data.get(
- 'chunk_count', 0)
- else:
- file_data['chunk_count'] = file_data.get(
- 'chunk_count', 0)
+ # chunk_count is already set from ES aggregation (doc_count)
+ file_data['chunk_count'] = file_data.get('chunk_count', 0)
+
+ for file_data in files:
+ file_data["source_available"] = (
+ ElasticSearchService._compute_source_available(file_data)
+ )
+
+ total_duration = time.time() - total_start_time
+ logger.info(f"[list_files:complete] index={index_name}, total_files={len(files)}, "
+ f"total_duration={total_duration:.3f}s")
return {"files": files}
@@ -1216,6 +1530,100 @@ async def list_files(
raise Exception(
f"Error getting file list for index {index_name}: {str(e)}")
+ DOCUMENT_DELETE_SCOPES = ("source_only", "full")
+
+ @staticmethod
+ def _preview_pdf_cache_object_name(object_name: str) -> str:
+ """Object key for Office-to-PDF preview cache (matches file_management_service)."""
+ name_without_ext = (
+ object_name.rsplit(".", 1)[0] if "." in object_name else object_name
+ )
+ hash_suffix = hashlib.md5(object_name.encode()).hexdigest()[:8]
+ return f"preview/converted/{name_without_ext}_{hash_suffix}.pdf"
+
+ @staticmethod
+ def _compute_source_available(file_data: Dict[str, Any]) -> bool:
+ path_or_url = file_data.get("path_or_url") or ""
+ status = file_data.get("status", "")
+ if status != "COMPLETED":
+ return True
+ if path_or_url.startswith("knowledge_base/"):
+ return file_exists(path_or_url)
+ return True
+
+ @staticmethod
+ def delete_source_file(path_or_url: str) -> Dict[str, Any]:
+ """Remove MinIO source (and preview cache); does not touch Elasticsearch."""
+ minio_result = delete_file(path_or_url)
+ deleted_minio = bool(minio_result.get("success"))
+
+ if path_or_url.startswith("knowledge_base/"):
+ preview_key = ElasticSearchService._preview_pdf_cache_object_name(
+ path_or_url
+ )
+ try:
+ if file_exists(preview_key):
+ delete_file(preview_key)
+ except Exception as exc:
+ logger.warning(
+ "Failed to delete preview cache for '%s': %s",
+ path_or_url,
+ exc,
+ )
+
+ return {"deleted_minio": deleted_minio}
+
+ @staticmethod
+ async def _assert_source_only_deletable(
+ index_name: str, path_or_url: str
+ ) -> None:
+ celery_task_files = await get_all_files_status(index_name)
+ status_info = celery_task_files.get(path_or_url)
+ if not status_info or not isinstance(status_info, dict):
+ return
+ state = status_info.get("state") or ""
+ if state and state != "COMPLETED":
+ raise ValueError(
+ f"Cannot delete source file while document is in state '{state}'. "
+ "Wait until processing completes or use scope=full to remove the document."
+ )
+
+ @staticmethod
+ async def delete_document_by_scope(
+ index_name: str,
+ path_or_url: str,
+ scope: str,
+ vdb_core: VectorDatabaseCore,
+ ) -> Dict[str, Any]:
+ if scope not in ElasticSearchService.DOCUMENT_DELETE_SCOPES:
+ raise ValueError(
+ f"Invalid scope '{scope}'. "
+ f"Must be one of: {ElasticSearchService.DOCUMENT_DELETE_SCOPES}"
+ )
+
+ if scope == "source_only":
+ await ElasticSearchService._assert_source_only_deletable(
+ index_name, path_or_url
+ )
+ minio_part = ElasticSearchService.delete_source_file(path_or_url)
+ return {
+ "status": "success",
+ "scope": scope,
+ "deleted_es_count": 0,
+ "deleted_minio": minio_part.get("deleted_minio", False),
+ "source_available": False,
+ "message": (
+ "Source file deleted; index chunks and vectors preserved."
+ ),
+ }
+
+ result = ElasticSearchService.delete_documents(
+ index_name, path_or_url, vdb_core
+ )
+ result["scope"] = scope
+ result["source_available"] = False
+ return result
+
@staticmethod
def delete_documents(
index_name: str = Path(..., description="Name of the index"),
@@ -1228,6 +1636,10 @@ def delete_documents(
index_name, path_or_url)
# 2. Delete MinIO file
minio_result = delete_file(path_or_url)
+
+ # Update last_doc_update_time for auto-summary tracking
+ update_last_doc_update_time(index_name)
+
return {"status": "success", "deleted_es_count": deleted_count, "deleted_minio": minio_result.get("success")}
@staticmethod
@@ -1450,6 +1862,8 @@ def change_summary(
"index_name": index_name
}
update_knowledge_record(update_data)
+ # Update last_summary_time for auto-summary tracking
+ update_last_summary_time(index_name)
return {"status": "success", "message": f"Index {index_name} summary updated successfully",
"summary": summary_result}
except Exception as e:
@@ -1550,23 +1964,23 @@ def create_chunk(
Automatically generates and stores embedding for semantic search.
"""
try:
- # Get knowledge base's embedding model name
- embedding_model_name = None
+ # Get knowledge base's embedding model by model_id
+ embedding_model_id = None
if tenant_id:
try:
knowledge_record = get_knowledge_record({
"index_name": index_name,
"tenant_id": tenant_id
})
- embedding_model_name = knowledge_record.get("embedding_model_name") if knowledge_record else None
+ embedding_model_id = knowledge_record.get("embedding_model_id") if knowledge_record else None
except Exception as e:
- logger.warning(f"Failed to get embedding model name for index {index_name}: {e}")
+ logger.warning(f"Failed to get embedding model id for index {index_name}: {e}")
# Generate embedding if we have content and can get embedding model
embedding_vector = None
if chunk_request.content:
try:
- embedding_model = get_embedding_model(tenant_id, embedding_model_name) if tenant_id else None
+ embedding_model = get_embedding_model_by_id(tenant_id, embedding_model_id)[0] if tenant_id and embedding_model_id else None
if embedding_model:
embeddings = embedding_model.get_embeddings(chunk_request.content)
if embeddings and len(embeddings) > 0:
@@ -1596,8 +2010,8 @@ def create_chunk(
# Add embedding if generated
if embedding_vector:
chunk_payload["embedding"] = embedding_vector
- if embedding_model_name:
- chunk_payload["embedding_model_name"] = embedding_model_name
+ if embedding_model_id:
+ chunk_payload["embedding_model_id"] = embedding_model_id
result = vdb_core.create_chunk(index_name, chunk_payload)
return {
@@ -1617,6 +2031,7 @@ def update_chunk(
chunk_request: ChunkUpdateRequest,
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
user_id: Optional[str] = None,
+ tenant_id: Optional[str] = None,
):
"""
Update a chunk document.
@@ -1700,10 +2115,23 @@ def search_hybrid(
if weight_accurate < 0 or weight_accurate > 1:
raise ValueError("weight_accurate must be between 0 and 1")
- embedding_model = get_embedding_model(tenant_id)
+ # Get embedding model from the first index's knowledge base record
+ if not index_names:
+ raise ValueError("At least one index name is required")
+
+ embedding_model, model_id, meta = get_embedding_model_by_index_name(tenant_id, index_names[0])
+
if not embedding_model:
- raise ValueError(
- "No embedding model configured for the current tenant")
+ if meta.get("status") == "needs_config":
+ # Return a clear error indicating model needs to be configured
+ raise KnowledgeBaseNeedsModelConfigError(
+ index_name=index_names[0],
+ message=f"Knowledge base '{index_names[0]}' does not have an embedding model configured. Please select a model in the knowledge base settings."
+ )
+ else:
+ raise ValueError(
+ f"No embedding model found for index '{index_names[0]}'. "
+ f"Please configure an embedding model for this knowledge base.")
start_time = time.perf_counter()
raw_results = vdb_core.hybrid_search(
@@ -1729,6 +2157,8 @@ def search_hybrid(
"total": len(formatted_results),
"query_time_ms": elapsed_ms,
}
+ except KnowledgeBaseNeedsModelConfigError:
+ raise
except ValueError:
raise
except Exception as exc:
diff --git a/backend/services/voice_service.py b/backend/services/voice_service.py
index 05dba6231..5a08e1f8b 100644
--- a/backend/services/voice_service.py
+++ b/backend/services/voice_service.py
@@ -1,17 +1,22 @@
import asyncio
import logging
-from typing import Any, Optional
+from typing import Any, Dict, Optional
-from nexent.core.models.stt_model import STTConfig, STTModel
-from nexent.core.models.tts_model import TTSConfig, TTSModel
+from nexent.core.models.stt_model import BaseSTTModel
+from nexent.core.models.tts_model import BaseTTSModel
+from nexent.core.models.volc_stt_model import VolcSTTConfig, VolcSTTModel
+from nexent.core.models.ali_stt_model import AliSTTConfig, AliSTTModel
+from nexent.core.models.volc_tts_model import VolcTTSConfig, VolcTTSModel
+from nexent.core.models.ali_tts_model import AliTTSConfig, AliTTSModel
-from consts.const import APPID, CLUSTER, SPEED_RATIO, TEST_VOICE_PATH, TOKEN, VOICE_TYPE
+from consts.const import TEST_VOICE_PATH, TEST_PCM_PATH
from consts.exceptions import (
VoiceServiceException,
STTConnectionException,
TTSConnectionException,
- VoiceConfigException
)
+from database.model_management_db import get_model_records
+from utils.config_utils import tenant_config_manager
logger = logging.getLogger("voice_service")
@@ -19,56 +24,311 @@
class VoiceService:
"""Voice service that handles STT and TTS operations"""
- def __init__(self):
- """Initialize the voice service with configurations from const.py"""
- try:
- # Initialize STT configuration
- self.stt_config = STTConfig(
- appid=APPID,
- token=TOKEN
- )
+ def _get_stt_model_from_config(
+ self,
+ model_factory: Optional[str] = None,
+ model_name: Optional[str] = None,
+ api_key: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ base_url: Optional[str] = None,
+ language: str = "zh"
+ ) -> BaseSTTModel:
+ """
+ Get the appropriate STT model based on model factory configuration.
- # Initialize TTS configuration
- self.tts_config = TTSConfig(
- appid=APPID,
- token=TOKEN,
- cluster=CLUSTER,
- voice_type=VOICE_TYPE,
- speed_ratio=SPEED_RATIO
+ Args:
+ model_factory: Model factory/vendor name
+ model_name: Model name
+ api_key: API key (for Ali STT)
+ model_appid: Application ID (for Volcano STT)
+ access_token: Access token (for Volcano STT)
+ base_url: Custom WebSocket URL (optional)
+ language: Language for speech recognition
+
+ Returns:
+ STT model instance based on configuration
+ """
+ # Default to Ali Cloud if model_factory is not specified or is dashscope
+ use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ # Use Volcano Engine STT
+ volc_config = VolcSTTConfig(
+ appid=model_appid or "",
+ access_token=access_token or "",
+ ws_url=base_url if base_url else "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
+ format="pcm",
+ rate=16000
)
+ return VolcSTTModel(volc_config, TEST_PCM_PATH)
+ else:
+ # Use Ali Cloud STT (default)
+ ali_config = AliSTTConfig(
+ api_key=api_key or "",
+ model=model_name or "qwen3-asr-flash-realtime",
+ language=language,
+ ws_url=base_url if base_url else None,
+ format="pcm",
+ rate=16000,
+ enable_vad=True,
+ timeout=5
+ )
+ return AliSTTModel(ali_config, TEST_PCM_PATH)
+
+ def _get_stt_model_from_tenant_config(
+ self,
+ tenant_id: str,
+ language: str = "zh"
+ ) -> BaseSTTModel:
+ """
+ Get STT model based on tenant's model configuration.
- # Initialize models
- self.stt_model = STTModel(self.stt_config, TEST_VOICE_PATH)
- self.tts_model = TTSModel(self.tts_config)
+ Args:
+ tenant_id: Tenant ID
+ language: Language for speech recognition
+
+ Returns:
+ STT model instance based on tenant's configuration
+ """
+ try:
+ # Get STT model configuration from tenant config
+ stt_config = tenant_config_manager.get_model_config(tenant_id, "stt")
+
+ if stt_config:
+ model_factory = stt_config.get("model_factory", "")
+ model_name = stt_config.get("model_name", "")
+ api_key = stt_config.get("api_key", "")
+ base_url = stt_config.get("base_url", "")
+ model_appid = stt_config.get("model_appid", "")
+ access_token_val = stt_config.get("access_token", "")
+
+ return self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model_name,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token_val,
+ base_url=base_url,
+ language=language
+ )
+
+ # Try to get from model records in database
+ model_records = get_model_records({"model_type": "stt"}, tenant_id)
+ if model_records:
+ record = model_records[0]
+ model_factory = record.get("model_factory", "")
+ model_name = record.get("model_name", "")
+ api_key = record.get("api_key", "")
+ base_url = record.get("base_url", "")
+ model_appid = record.get("model_appid", "")
+ access_token_val = record.get("access_token", "")
+
+ return self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model_name,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token_val,
+ base_url=base_url,
+ language=language
+ )
+
+ logger.warning(f"No STT model configuration found for tenant {tenant_id}, using default config")
+ return self._get_stt_model_from_config(language=language)
except Exception as e:
- logger.error(f"Failed to initialize voice service: {str(e)}")
- raise VoiceConfigException(f"Voice service initialization failed: {str(e)}") from e
+ logger.error(f"Error getting STT model config for tenant {tenant_id}: {str(e)}")
+ return self._get_stt_model_from_config(language=language)
+
+ def _get_tts_model_from_config(
+ self,
+ model_factory: Optional[str] = None,
+ api_key: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ speed_ratio: float = 1.0,
+ base_url: Optional[str] = None,
+ model: Optional[str] = None
+ ) -> BaseTTSModel:
+ """
+ Get the appropriate TTS model based on model factory configuration.
+
+ Args:
+ model_factory: Model factory/vendor name
+ api_key: API key (for Ali TTS)
+ model_appid: Application ID (for Volcano TTS)
+ access_token: Access token (for Volcano TTS)
+ speed_ratio: Speech speed ratio
+ base_url: Custom WebSocket URL (optional)
+ model: Model name (for Ali TTS)
- async def start_stt_streaming_session(self, websocket) -> None:
+ Returns:
+ TTS model instance based on configuration
"""
- Start STT streaming session
+ use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ volc_config = VolcTTSConfig(
+ appid=model_appid or "",
+ token=access_token or "",
+ speed_ratio=speed_ratio,
+ ws_url=base_url or None,
+ )
+ return VolcTTSModel(volc_config)
+ else:
+ ali_config = AliTTSConfig(
+ api_key=api_key or "",
+ model=model or "qwen3-tts-flash",
+ voice="Cherry",
+ speech_rate=speed_ratio,
+ ws_url=base_url if base_url else None
+ )
+ return AliTTSModel(ali_config)
+
+ def _get_tts_model_from_tenant_config(
+ self,
+ tenant_id: str
+ ) -> BaseTTSModel:
+ """
+ Get TTS model based on tenant's model configuration.
+
+ Args:
+ tenant_id: Tenant ID
+
+ Returns:
+ TTS model instance based on tenant's configuration
+ """
+ try:
+ tts_config = tenant_config_manager.get_model_config(tenant_id, "tts")
+
+ if tts_config:
+ model_factory = tts_config.get("model_factory", "")
+ api_key = tts_config.get("api_key", "")
+ model_appid = tts_config.get("model_appid", "")
+ access_token_val = tts_config.get("access_token", "")
+ speed_ratio = float(tts_config.get("speed_ratio", 1.0))
+ base_url = tts_config.get("base_url", "")
+ model = tts_config.get("model") or tts_config.get("model_name", "")
+
+ return self._get_tts_model_from_config(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token_val,
+ speed_ratio=speed_ratio,
+ base_url=base_url if base_url else None,
+ model=model if model else None
+ )
+
+ model_records = get_model_records({"model_type": "tts"}, tenant_id)
+ if model_records:
+ record = model_records[0]
+ model_factory = record.get("model_factory", "")
+ api_key = record.get("api_key", "")
+ model_appid = record.get("model_appid", "")
+ access_token_val = record.get("access_token", "")
+ speed_ratio = float(record.get("speed_ratio", 1.0))
+ base_url = record.get("base_url", "")
+ model = record.get("model_name", "")
+
+ return self._get_tts_model_from_config(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token_val,
+ speed_ratio=speed_ratio,
+ base_url=base_url if base_url else None,
+ model=model if model else None
+ )
+
+ logger.warning(f"No TTS model configuration found for tenant {tenant_id}, using default config")
+ return self._get_tts_model_from_config()
+
+ except Exception as e:
+ logger.error(f"Error getting TTS model config for tenant {tenant_id}: {str(e)}")
+ return self._get_tts_model_from_config()
+
+ async def start_stt_streaming_session(
+ self,
+ websocket,
+ stt_config: Optional[Dict[str, Any]] = None,
+ tenant_id: Optional[str] = None,
+ language: str = "zh"
+ ) -> None:
+ """
+ Start STT streaming session.
Args:
websocket: WebSocket connection for real-time audio streaming
+ stt_config: STT configuration dict from client (preferred)
+ tenant_id: Tenant ID for model lookup
+ language: Language for speech recognition (default: zh)
Raises:
STTConnectionException: If STT streaming fails
"""
try:
- logger.info("Starting STT streaming session")
- await self.stt_model.start_streaming_session(websocket)
+ model_factory = None
+ model_name = None
+ api_key = None
+ model_appid = None
+ access_token = None
+ base_url = None
+
+ if stt_config:
+ model_factory = stt_config.get("model_factory")
+ model_name = stt_config.get("model") or stt_config.get("model_name")
+ api_key = stt_config.get("api_key") or stt_config.get("apiKey")
+ model_appid = stt_config.get("model_appid") or stt_config.get("appid")
+ access_token = stt_config.get("access_token")
+ base_url = stt_config.get("base_url") or stt_config.get("baseUrl")
+ language = stt_config.get("language", language)
+ else:
+ logger.warning("No stt_config provided, will use tenant model config if available")
+
+ # Get STT model based on configuration
+ if model_factory or api_key or model_appid:
+ stt_model = self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model_name,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ base_url=base_url,
+ language=language
+ )
+ elif tenant_id:
+ stt_model = self._get_stt_model_from_tenant_config(tenant_id, language)
+ else:
+ logger.warning("No tenant_id provided and no explicit config, using default Ali STT")
+ stt_model = self._get_stt_model_from_config(
+ api_key=api_key,
+ language=language
+ )
+
+ await stt_model.start_streaming_session(websocket)
except Exception as e:
logger.error(f"STT streaming session failed: {str(e)}")
raise STTConnectionException(f"STT streaming failed: {str(e)}") from e
- async def generate_tts_speech(self, text: str, stream: bool = True) -> Any:
+ async def generate_tts_speech(
+ self,
+ text: str,
+ stream: bool = True,
+ tts_config: Optional[Dict[str, Any]] = None,
+ tenant_id: Optional[str] = None,
+ model_name_override: Optional[str] = None
+ ) -> Any:
"""
Generate TTS speech from text
Args:
text: Text to convert to speech
stream: Whether to stream the audio or return complete audio
+ tts_config: TTS configuration dict from client (preferred)
+ tenant_id: Tenant ID for model lookup
+ model_name_override: Model name override
Returns:
Audio data (streaming or complete)
@@ -81,67 +341,145 @@ async def generate_tts_speech(self, text: str, stream: bool = True) -> Any:
try:
logger.info(f"Generating TTS speech for text: {text[:50]}...")
- speech_result = await self.tts_model.generate_speech(text, stream=stream)
+
+ model_factory = None
+ api_key = None
+ model_appid = None
+ access_token = None
+ speed_ratio = 1.0
+ base_url = None
+ model_name = None
+
+ if tts_config:
+ model_factory = tts_config.get("model_factory")
+ api_key = tts_config.get("api_key") or tts_config.get("apiKey")
+ model_appid = tts_config.get("model_appid") or tts_config.get("appid")
+ access_token = tts_config.get("access_token")
+ speed_ratio = float(tts_config.get("speed_ratio", 1.0))
+ base_url = tts_config.get("base_url") or tts_config.get("baseUrl")
+ model_name = tts_config.get("model") or tts_config.get("model_name")
+
+ # If model_name is provided directly, use it
+ effective_model = model_name_override or model_name
+ logger.info(f"TTS config - api_key: {bool(api_key)}, model_name_override: {model_name_override}, "
+ f"model_name from config: {model_name}, effective_model: {effective_model}")
+
+
+ # Determine model factory and create appropriate TTS model
+ use_volc = model_factory and model_factory.lower() in ["volc", "volcano", "volcengine", "火山引擎"]
+
+ if use_volc:
+ # Use Volcano TTS
+ tts_model = self._get_tts_model_from_config(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ speed_ratio=speed_ratio,
+ base_url=base_url,
+ model=effective_model
+ )
+ logger.info(f"TTS model created: Volcano TTS (factory={model_factory})")
+ elif api_key:
+ # Use Ali TTS with provided api_key
+ tts_model = self._get_tts_model_from_config(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ speed_ratio=speed_ratio,
+ base_url=base_url,
+ model=effective_model
+ )
+ logger.info(f"TTS model created: Ali TTS (api_key provided)")
+ elif tenant_id:
+ tts_model = self._get_tts_model_from_tenant_config(tenant_id)
+ logger.info(f"TTS model created from tenant config for tenant_id={tenant_id}")
+ else:
+ logger.warning("No api_key, model_name, or tenant_id provided, using default TTS model")
+ tts_model = self._get_tts_model_from_config()
+
+ speech_result = await tts_model.generate_speech(text, stream=stream)
return speech_result
except Exception as e:
logger.error(f"TTS generation failed: {str(e)}")
raise TTSConnectionException(f"TTS generation failed: {str(e)}") from e
- async def stream_tts_to_websocket(self, websocket, text: str) -> None:
+ async def stream_tts_to_websocket(
+ self,
+ websocket,
+ text: str,
+ tenant_id: Optional[str] = None,
+ model_name: Optional[str] = None,
+ tts_config: Optional[Dict[str, Any]] = None,
+ ) -> None:
"""
Stream TTS audio to WebSocket with proper error handling and fallback
Args:
websocket: WebSocket connection to stream to
text: Text to convert to speech
+ tenant_id: Optional tenant ID for model selection
+ model_name: Optional model name override
+ tts_config: Optional TTS configuration dict with model_factory, api_key, model_appid, access_token, base_url
Raises:
TTSConnectionException: If TTS service connection fails
VoiceServiceException: If TTS streaming fails
"""
- try:
- # Generate and stream audio chunks
- speech_result = await self.generate_tts_speech(text, stream=True)
-
- # Check if it's an async iterator or a regular iterable
- if hasattr(speech_result, '__aiter__'):
- # It's an async iterator, use async for
- async for chunk in speech_result:
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(chunk)
- else:
- break
- elif hasattr(speech_result, '__iter__'):
- # It's a regular iterator, use normal for
- for chunk in speech_result:
- if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(chunk)
- else:
- break
- else:
- # It's a single chunk, send it directly
+ speech_result = await self.generate_tts_speech(
+ text,
+ stream=True,
+ tenant_id=tenant_id,
+ model_name_override=model_name,
+ tts_config=tts_config
+ )
+
+ # Check if it's an async iterator or a regular iterable
+ if hasattr(speech_result, '__aiter__'):
+ # It's an async iterator, use async for
+ async for chunk in speech_result:
if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(speech_result)
-
- await asyncio.sleep(0.1)
-
- except TypeError as te:
- # If speech_result is still a coroutine, try calling it directly without stream=True
- if "async for" in str(te) and "requires an object with __aiter__" in str(te):
- logger.error("Falling back to non-streaming TTS")
- speech_data = await self.generate_tts_speech(text, stream=False)
+ await websocket.send_bytes(chunk)
+ else:
+ break
+ elif hasattr(speech_result, '__iter__'):
+ # It's a regular iterator, use normal for
+ for chunk in speech_result:
if websocket.client_state.name == "CONNECTED":
- await websocket.send_bytes(speech_data)
- else:
- raise
+ await websocket.send_bytes(chunk)
+ else:
+ break
+ else:
+ # It's a single chunk, send it directly
+ if websocket.client_state.name == "CONNECTED":
+ await websocket.send_bytes(speech_result)
# Send end marker after successful TTS generation
if websocket.client_state.name == "CONNECTED":
await websocket.send_json({"status": "completed"})
- async def check_stt_connectivity(self) -> bool:
+ async def check_stt_connectivity(
+ self,
+ model_factory: Optional[str] = None,
+ api_key: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ language: str = "zh",
+ model: str = "qwen3-asr-flash-realtime",
+ base_url: Optional[str] = None
+ ) -> bool:
"""
- Check STT service connectivity
+ Check STT service connectivity.
+
+ Args:
+ model_factory: Model factory/vendor name (e.g., "volc", "dashscope")
+ api_key: API key for Ali STT
+ model_appid: Application ID for Volcano STT
+ access_token: Access token for Volcano STT
+ language: Language for speech recognition (default: zh)
+ model: STT model name (default: qwen3-asr-flash-realtime)
+ base_url: Custom WebSocket URL (optional)
Returns:
bool: True if STT service is connected, False otherwise
@@ -150,8 +488,20 @@ async def check_stt_connectivity(self) -> bool:
STTConnectionException: If connectivity check fails
"""
try:
- logger.info(f"Checking STT connectivity with config: {self.stt_config}")
- connected = await self.stt_model.check_connectivity()
+ # Get STT model based on factory
+ stt_model = self._get_stt_model_from_config(
+ model_factory=model_factory,
+ model_name=model,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ base_url=base_url,
+ language=language
+ )
+
+
+ connected = await stt_model.check_connectivity()
+
if not connected:
logger.error("STT service connection failed")
raise STTConnectionException("STT service connection failed")
@@ -162,9 +512,27 @@ async def check_stt_connectivity(self) -> bool:
logger.error(f"STT connectivity check failed: {str(e)}")
raise STTConnectionException(f"STT connectivity check failed: {str(e)}") from e
- async def check_tts_connectivity(self) -> bool:
+ async def check_tts_connectivity(
+ self,
+ model_factory: Optional[str] = None,
+ api_key: Optional[str] = None,
+ model_appid: Optional[str] = None,
+ access_token: Optional[str] = None,
+ speed_ratio: float = 1.0,
+ base_url: Optional[str] = None,
+ model: Optional[str] = None
+ ) -> bool:
"""
- Check TTS service connectivity
+ Check TTS service connectivity.
+
+ Args:
+ model_factory: Model factory/vendor name (e.g., "volc", "dashscope")
+ api_key: API key for Ali TTS
+ model_appid: Application ID for Volcano TTS
+ access_token: Access token for Volcano TTS
+ speed_ratio: Speech speed ratio
+ base_url: Custom WebSocket URL (optional)
+ model: Model name (e.g., "qwen3-tts-flash")
Returns:
bool: True if TTS service is connected, False otherwise
@@ -173,11 +541,21 @@ async def check_tts_connectivity(self) -> bool:
TTSConnectionException: If connectivity check fails
"""
try:
- logger.info(f"Checking TTS connectivity with config: {self.tts_config}")
- connected = await self.tts_model.check_connectivity()
+ tts_model = self._get_tts_model_from_config(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ speed_ratio=speed_ratio,
+ base_url=base_url,
+ model=model
+ )
+
+ connected = await tts_model.check_connectivity()
if not connected:
- logger.error("TTS service connection failed")
- raise TTSConnectionException("TTS service connection failed")
+ msg = "TTS service connectivity check returned False"
+ logger.warning(msg)
+ raise TTSConnectionException(msg)
return connected
except TTSConnectionException:
raise
@@ -185,12 +563,17 @@ async def check_tts_connectivity(self) -> bool:
logger.error(f"TTS connectivity check failed: {str(e)}")
raise TTSConnectionException(f"TTS connectivity check failed: {str(e)}") from e
- async def check_voice_connectivity(self, model_type: str) -> bool:
+ async def check_voice_connectivity(
+ self,
+ model_type: str,
+ stt_config: Optional[Dict[str, Any]] = None
+ ) -> bool:
"""
- Check voice service connectivity based on model type
+ Check voice service connectivity based on model type.
Args:
model_type: Type of model to check ('stt' or 'tts')
+ stt_config: Optional STT configuration dict
Returns:
bool: True if the specified service is connected, False otherwise
@@ -202,9 +585,44 @@ async def check_voice_connectivity(self, model_type: str) -> bool:
"""
try:
if model_type == 'stt':
- return await self.check_stt_connectivity()
+ model_factory = stt_config.get("model_factory") if stt_config else None
+ api_key = stt_config.get("api_key") if stt_config else None
+ model_appid = stt_config.get("model_appid") if stt_config else None
+ access_token = stt_config.get("access_token") if stt_config else None
+ language = stt_config.get("language", "zh") if stt_config else "zh"
+ model = stt_config.get("model", "qwen3-asr-flash-realtime") if stt_config else "qwen3-asr-flash-realtime"
+ base_url = stt_config.get("base_url") if stt_config else None
+
+ return await self.check_stt_connectivity(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ language=language,
+ model=model,
+ base_url=base_url
+ )
elif model_type == 'tts':
- return await self.check_tts_connectivity()
+ model_factory = stt_config.get("model_factory") if stt_config else None
+ api_key = stt_config.get("api_key") if stt_config else None
+ model_appid = stt_config.get("model_appid") if stt_config else None
+ access_token = stt_config.get("access_token") if stt_config else None
+ speed_ratio = float(stt_config.get("speed_ratio", 1.0)) if stt_config else 1.0
+ base_url = stt_config.get("base_url") if stt_config else None
+ model = stt_config.get("model", "qwen3-tts-flash") if stt_config else "qwen3-tts-flash"
+
+ connected = await self.check_tts_connectivity(
+ model_factory=model_factory,
+ api_key=api_key,
+ model_appid=model_appid,
+ access_token=access_token,
+ speed_ratio=speed_ratio,
+ base_url=base_url,
+ model=model
+ )
+ if not connected:
+ raise TTSConnectionException("TTS service connectivity check returned False")
+ return connected
else:
logger.error(f"Unknown model type: {model_type}")
raise VoiceServiceException(f"Unknown model type: {model_type}")
diff --git a/backend/utils/a2a_http_client.py b/backend/utils/a2a_http_client.py
index 2bc829403..8b7c55d9f 100644
--- a/backend/utils/a2a_http_client.py
+++ b/backend/utils/a2a_http_client.py
@@ -134,6 +134,7 @@ async def get_json(
"User-Agent": "Nexent-A2A-Client/1.0",
"Accept": CONTENT_TYPE_JSON,
"Connection": "close",
+ "A2A-Version": "1.0",
}
if headers:
request_headers.update(headers)
@@ -141,14 +142,24 @@ async def get_json(
logger.debug(f"A2A GET request: url={url}")
try:
- _, body = await self._request_with_retry(
+ status, body = await self._request_with_retry(
"GET",
url,
headers=request_headers
)
+ # Decode body and handle empty responses
+ body_text = body.decode('utf-8') if body else ""
+
+ if not body_text.strip():
+ logger.error(
+ f"A2A GET received empty response for {url}: HTTP status={status}. "
+ f"Expected JSON response but got empty body."
+ )
+ raise ValueError(f"Empty response from {url} (HTTP {status})")
+
# Parse JSON from body
import json
- data = json.loads(body.decode('utf-8'))
+ data = json.loads(body_text)
return data
except asyncio.TimeoutError as e:
logger.error(f"A2A GET timeout for {url}: {e}")
@@ -156,6 +167,9 @@ async def get_json(
except aiohttp.ClientResponseError as e:
logger.error(f"A2A GET HTTP error for {url}: {e.status}")
raise
+ except ValueError:
+ # Re-raise empty response errors without wrapping
+ raise
except Exception as e:
import traceback
logger.error(f"A2A GET request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
@@ -176,6 +190,7 @@ async def post_json(
"Content-Type": CONTENT_TYPE_JSON,
"Accept": CONTENT_TYPE_JSON,
"Connection": "close",
+ "A2A-Version": "1.0",
}
if headers:
request_headers.update(headers)
@@ -183,15 +198,29 @@ async def post_json(
logger.info(f"A2A POST request: url={url}, payload={payload}")
try:
- _, body = await self._request_with_retry(
+ status, body = await self._request_with_retry(
"POST",
url,
json=payload,
headers=request_headers
)
+ # Decode body and handle empty responses
+ body_text = body.decode('utf-8') if body else ""
+
+ if not body_text.strip():
+ logger.error(
+ f"A2A POST received empty response for {url}: HTTP status={status}. "
+ f"This usually indicates the remote agent is not responding correctly. "
+ f"Check that the agent URL '{url}' is correct and the agent is running."
+ )
+ raise ValueError(
+ f"Empty response from agent at {url} (HTTP {status}). "
+ f"The agent may be unreachable, still processing, or the endpoint URL is incorrect."
+ )
+
# Parse JSON from body
import json
- data = json.loads(body.decode('utf-8'))
+ data = json.loads(body_text)
return data
except asyncio.TimeoutError as e:
logger.error(f"A2A POST timeout for {url}: {e}")
@@ -199,6 +228,9 @@ async def post_json(
except aiohttp.ClientResponseError as e:
logger.error(f"A2A POST HTTP error for {url}: {e.status}")
raise
+ except ValueError:
+ # Re-raise empty response errors without wrapping
+ raise
except Exception as e:
import traceback
logger.error(f"A2A POST request failed for {url}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
@@ -249,6 +281,7 @@ def build_a2a_headers(api_key: Optional[str] = None) -> Dict[str, str]:
headers = {
"Content-Type": CONTENT_TYPE_JSON,
"Accept": CONTENT_TYPE_JSON,
+ "A2A-Version": "1.0",
}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
diff --git a/backend/utils/auth_utils.py b/backend/utils/auth_utils.py
index 7b40576e2..4ade6f211 100644
--- a/backend/utils/auth_utils.py
+++ b/backend/utils/auth_utils.py
@@ -3,13 +3,17 @@
import hmac
import hashlib
from datetime import datetime, timedelta
-from typing import Dict, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple
import jwt
+import httpx
from fastapi import Request
from supabase import create_client
+from supabase.lib.client_options import SyncClientOptions
from consts.const import (
+ ASSET_OWNER_ROLE,
+ ASSET_OWNER_TENANT_ID,
DEFAULT_TENANT_ID,
DEFAULT_USER_ID,
IS_SPEED_MODE,
@@ -42,7 +46,9 @@
TIMESTAMP_VALIDITY_WINDOW = 5 * 60
-def calculate_hmac_signature(secret_key: str, access_key: str, timestamp: str, body: str) -> str:
+def calculate_hmac_signature(
+ secret_key: str, access_key: str, timestamp: str, body: str
+) -> str:
"""
Calculate HMAC-SHA256 signature for AK/SK authentication.
@@ -84,7 +90,9 @@ def get_aksk_config(tenant_id: str) -> Tuple[str, str]:
raise UnauthorizedError("AK/SK authentication is not configured")
-def verify_aksk_signature(access_key: str, timestamp: str, signature: str, body: str, tenant_id: str = None) -> bool:
+def verify_aksk_signature(
+ access_key: str, timestamp: str, signature: str, body: str, tenant_id: str = None
+) -> bool:
"""Verify AK/SK signature; returns False instead of raising on mismatch."""
tenant = tenant_id or DEFAULT_TENANT_ID
try:
@@ -95,17 +103,22 @@ def verify_aksk_signature(access_key: str, timestamp: str, signature: str, body:
if access_key != expected_access_key:
return False
- expected_sig = calculate_hmac_signature(secret_key, access_key, timestamp, body)
+ expected_sig = calculate_hmac_signature(
+ secret_key, access_key, timestamp, body)
return hmac.compare_digest(expected_sig, signature)
-def validate_aksk_authentication(headers: Dict[str, str], body: str, tenant_id: str = None) -> bool:
+def validate_aksk_authentication(
+ headers: Dict[str, str], body: str, tenant_id: str = None
+) -> bool:
"""
Validate AK/SK authentication.
Returns True when valid, otherwise raises domain exceptions.
"""
- from consts.exceptions import SignatureValidationError # imported lazily for test-time stubbing
+ from consts.exceptions import (
+ SignatureValidationError,
+ ) # imported lazily for test-time stubbing
try:
access_key, ts, sig = extract_aksk_headers(headers)
@@ -129,6 +142,7 @@ def validate_aksk_authentication(headers: Dict[str, str], body: str, tenant_id:
logger.exception("Unexpected error during AK/SK authentication")
raise UnauthorizedError("Authentication failed") from exc
+
# ---------------------------------------------------------------------------
# Bearer Token (API Key) authentication
# ---------------------------------------------------------------------------
@@ -151,7 +165,11 @@ def validate_bearer_token(authorization: Optional[str]) -> Tuple[bool, Optional[
return False, None
# Extract token from "Bearer " format
- token = authorization.replace("Bearer ", "") if authorization.startswith("Bearer ") else authorization
+ token = (
+ authorization.replace("Bearer ", "")
+ if authorization.startswith("Bearer ")
+ else authorization
+ )
if not token:
logger.warning("Empty bearer token")
@@ -161,7 +179,9 @@ def validate_bearer_token(authorization: Optional[str]) -> Tuple[bool, Optional[
try:
token_info = get_token_by_access_key(token)
if token_info and token_info.get("delete_flag") != "Y":
- logger.debug(f"Token validated successfully for user {token_info.get('user_id')}")
+ logger.debug(
+ f"Token validated successfully for user {token_info.get('user_id')}"
+ )
return True, token_info
else:
logger.warning(f"Invalid or inactive token: {token[:20]}...")
@@ -202,19 +222,59 @@ def get_user_and_tenant_by_access_key(access_key: str) -> Dict[str, str]:
tenant_id = user_tenant_record["tenant_id"]
else:
tenant_id = DEFAULT_TENANT_ID
- logger.warning(f"No tenant relationship found for user {user_id}, using default tenant")
+ logger.warning(
+ f"No tenant relationship found for user {user_id}, using default tenant"
+ )
return {
"user_id": user_id,
"tenant_id": tenant_id,
- "token_id": token_info.get("token_id")
+ "token_id": token_info.get("token_id"),
}
+def resolve_tenant_id_from_user_tenant_record(user_tenant: Dict[str, Any]) -> str:
+ """
+ Resolve the effective tenant_id from a user_tenant_t record.
+
+ ASSET_OWNER users may have an empty legacy tenant_id; map them to the
+ virtual ASSET_OWNER tenant. Fall back to DEFAULT_TENANT_ID when unset.
+ """
+ tenant_id = user_tenant.get("tenant_id")
+ if tenant_id:
+ return tenant_id
+
+ user_role = (user_tenant.get("user_role") or "").upper()
+ if user_role == ASSET_OWNER_ROLE:
+ return ASSET_OWNER_TENANT_ID
+
+ return DEFAULT_TENANT_ID
+
+
+def _build_supabase_options() -> SyncClientOptions:
+ """Build ClientOptions that bypass the system HTTP proxy.
+
+ httpx 0.28 reads the Windows system proxy (e.g. Clash on 127.0.0.1:7897)
+ by default and routes every request through it. When the proxy cannot
+ reach a local service (such as GoTrue on http://localhost:8000) the
+ request hangs until the timeout, breaking login.
+
+ Pass an explicit ``httpx.Client`` with ``trust_env=False`` and
+ ``proxy=None`` so Supabase always talks to ``SUPABASE_URL`` directly.
+ """
+ http_client = httpx.Client(
+ trust_env=False,
+ proxy=None,
+ timeout=httpx.Timeout(30.0, connect=10.0),
+ follow_redirects=True,
+ )
+ return SyncClientOptions(httpx_client=http_client)
+
+
def get_supabase_client():
"""Get Supabase client instance with regular key (user-context operations)."""
try:
- return create_client(SUPABASE_URL, SUPABASE_KEY)
+ return create_client(SUPABASE_URL, SUPABASE_KEY, options=_build_supabase_options())
except Exception as e:
logging.error(f"Failed to create Supabase client: {str(e)}")
return None
@@ -223,7 +283,7 @@ def get_supabase_client():
def get_supabase_admin_client():
"""Get Supabase client instance with service role key for admin operations."""
try:
- return create_client(SUPABASE_URL, SERVICE_ROLE_KEY)
+ return create_client(SUPABASE_URL, SERVICE_ROLE_KEY, options=_build_supabase_options())
except Exception as e:
logging.error(f"Failed to create Supabase admin client: {str(e)}")
return None
@@ -245,8 +305,10 @@ def get_jwt_expiry_seconds(token: str) -> int:
# 10 years in seconds
return 10 * 365 * 24 * 60 * 60
# Ensure token is pure JWT, remove possible Bearer prefix
- jwt_token = token.replace(
- "Bearer ", "") if token.startswith("Bearer ") else token
+ jwt_token = (
+ token.replace("Bearer ", "") if token.startswith(
+ "Bearer ") else token
+ )
# If debug expiration time is set, return directly for quick debugging
if DEBUG_JWT_EXPIRE_SECONDS > 0:
@@ -286,41 +348,38 @@ def calculate_expires_at(token: Optional[str] = None) -> int:
return int((datetime.now() + timedelta(seconds=expiry_seconds)).timestamp())
-def _extract_user_id_from_jwt_token(authorization: str) -> Optional[str]:
+def _decode_jwt_token(authorization: str) -> dict:
"""
Extract user ID from JWT token after verifying signature and expiration.
Args:
authorization: Authorization header value
- Returns:
- Optional[str]: User ID, return None if parsing fails
-
Raises:
UnauthorizedError: If token is invalid, expired, or signature verification fails
"""
if not SUPABASE_JWT_SECRET:
- logging.error("SUPABASE_JWT_SECRET (or JWT_SECRET) is not configured; cannot verify JWT")
+ logging.error(
+ "SUPABASE_JWT_SECRET (or JWT_SECRET) is not configured; cannot verify JWT"
+ )
raise UnauthorizedError("JWT verification is not configured")
try:
# Format authorization header
- token = authorization.replace("Bearer ", "") if authorization.startswith(
- "Bearer ") else authorization
+ token = (
+ authorization.replace("Bearer ", "")
+ if authorization.startswith("Bearer ")
+ else authorization
+ )
# Decode and verify JWT (signature + expiration)
# verify_aud=False: allow tokens with aud claim (e.g. test JWT, Supabase) without strict audience check
- decoded = jwt.decode(
+ return jwt.decode(
token,
SUPABASE_JWT_SECRET,
algorithms=["HS256"],
options={"verify_exp": True, "verify_aud": False},
)
-
- # Extract user ID from JWT claims
- user_id = decoded.get("sub")
-
- return user_id
except jwt.ExpiredSignatureError:
logging.warning("Token expired")
raise UnauthorizedError("Token has expired")
@@ -333,10 +392,47 @@ def _extract_user_id_from_jwt_token(authorization: str) -> Optional[str]:
except UnauthorizedError:
raise
except Exception as e:
- logging.error(f"Failed to extract user ID from token: {str(e)}")
+ logging.error(f"Failed to decode token: {str(e)}")
raise UnauthorizedError("Invalid or expired authentication token")
+def _extract_user_id_from_jwt_token(authorization: str) -> Optional[str]:
+ """
+ Extract user ID from JWT token after verifying signature and expiration.
+ """
+ decoded = _decode_jwt_token(authorization)
+ return decoded.get("sub")
+
+
+def extract_session_id_from_authorization(authorization: Optional[str]) -> Optional[str]:
+ """Extract the sid claim without enforcing token validity, for idempotent logout."""
+ if not authorization:
+ return None
+ try:
+ token = (
+ authorization.replace("Bearer ", "")
+ if authorization.startswith("Bearer ")
+ else authorization
+ )
+ decoded = jwt.decode(token, options={"verify_signature": False})
+ sid = decoded.get("sid")
+ return str(sid) if sid else None
+ except Exception:
+ return None
+
+
+def ensure_cas_session_active_from_authorization(authorization: Optional[str]) -> None:
+ """Reject CAS-issued JWTs whose server-side session is expired or revoked."""
+ session_id = extract_session_id_from_authorization(authorization)
+ if not session_id:
+ return
+
+ from database.cas_session_db import is_cas_session_active
+
+ if not is_cas_session_active(str(session_id)):
+ raise UnauthorizedError("CAS session has expired or been revoked")
+
+
def get_current_user_id(authorization: Optional[str] = None) -> tuple[str, str]:
"""
Get current user ID and tenant ID from authorization token
@@ -354,25 +450,33 @@ def get_current_user_id(authorization: Optional[str] = None) -> tuple[str, str]:
return DEFAULT_USER_ID, DEFAULT_TENANT_ID
# In normal mode, missing auth header means unauthorized - return 401, not default user
- if authorization is None or (isinstance(authorization, str) and not authorization.strip()):
+ if authorization is None or (
+ isinstance(authorization, str) and not authorization.strip()
+ ):
raise UnauthorizedError("No authorization header provided")
try:
- user_id = _extract_user_id_from_jwt_token(authorization)
+ decoded = _decode_jwt_token(authorization)
+ user_id = decoded.get("sub")
if not user_id:
raise UnauthorizedError("Invalid or expired authentication token")
+ ensure_cas_session_active_from_authorization(authorization)
+
user_tenant_record = get_user_tenant_by_user_id(user_id)
- if user_tenant_record and user_tenant_record.get('tenant_id'):
- tenant_id = user_tenant_record['tenant_id']
+ if user_tenant_record and user_tenant_record.get("tenant_id"):
+ tenant_id = user_tenant_record["tenant_id"]
logging.debug(f"Found tenant ID for user {user_id}: {tenant_id}")
else:
tenant_id = DEFAULT_TENANT_ID
logging.warning(
- f"No tenant relationship found for user {user_id}, using default tenant")
+ f"No tenant relationship found for user {user_id}, using default tenant"
+ )
return user_id, tenant_id
+ except UnauthorizedError:
+ raise
except Exception as e:
logging.error(f"Failed to get user ID and tenant ID: {str(e)}")
raise UnauthorizedError("Invalid or expired authentication token")
@@ -393,8 +497,8 @@ def get_user_language(request: Request = None) -> str:
# Read language setting from cookie
if request:
try:
- if hasattr(request, 'cookies') and request.cookies:
- cookie_locale = request.cookies.get('NEXT_LOCALE')
+ if hasattr(request, "cookies") and request.cookies:
+ cookie_locale = request.cookies.get("NEXT_LOCALE")
if cookie_locale and cookie_locale in [LANGUAGE["ZH"], LANGUAGE["EN"]]:
return cookie_locale
except (AttributeError, TypeError) as e:
@@ -407,6 +511,7 @@ def get_user_language(request: Request = None) -> str:
# Simple JWT helpers for tests and tooling
# ---------------------------------------------------------------------------
+
def generate_test_jwt(user_id: str, expires_in: int = 3600) -> str:
"""
Generate a simple unsigned JWT for testing purposes (HS256 with dummy secret)
@@ -423,7 +528,25 @@ def generate_test_jwt(user_id: str, expires_in: int = 3600) -> str:
return jwt.encode(payload, MOCK_JWT_SECRET_KEY, algorithm="HS256")
-def get_current_user_info(authorization: Optional[str] = None, request: Request = None) -> tuple[str, str, str]:
+def generate_session_jwt(user_id: str, expires_in: int = 3600, session_id: str = None) -> str:
+ """Generate a signed JWT compatible with the existing auth verification flow."""
+ now = int(time.time())
+ payload = {
+ "sub": user_id,
+ "role": "authenticated",
+ "aud": "authenticated",
+ "iat": now,
+ "exp": now + expires_in,
+ "iss": SUPABASE_URL,
+ }
+ if session_id:
+ payload["sid"] = session_id
+ return jwt.encode(payload, SUPABASE_JWT_SECRET, algorithm="HS256")
+
+
+def get_current_user_info(
+ authorization: Optional[str] = None, request: Request = None
+) -> tuple[str, str, str]:
"""
Get current user information, including user ID, tenant ID, and language preference
diff --git a/backend/utils/content_classifier_utils.py b/backend/utils/content_classifier_utils.py
new file mode 100644
index 000000000..fcdb33f70
--- /dev/null
+++ b/backend/utils/content_classifier_utils.py
@@ -0,0 +1,197 @@
+"""Content classification utilities for streaming LLM output parsing."""
+
+import re
+from typing import Any, Dict, List, Optional
+
+
+class ContentClassifier:
+ """Parse XML tags from LLM output and classify streaming content in real-time.
+
+ Uses tag pool matching with state machine for elegant streaming XML parsing.
+ Classifies content into:
+ - skill_body: SKILL.md content (including frontmatter - detected by frontend)
+ - file_content: Additional file content with path information
+ - summary: Summary text after
+ - others: Content outside all tags (LLM reasoning process)
+
+ Includes DoS protection to prevent resource exhaustion from malicious input.
+ """
+
+ MAX_BUFFER_SIZE = 1024 * 1024 # 1MB
+ MAX_TAG_LENGTH = 256 # Single tag max length
+ MAX_PATH_LENGTH = 512 # File path max length
+ MAX_TAG_COUNT = 100 # Max tags before stopping
+
+ def __init__(self):
+ self.state = "others" # others | skill_body | file | summary
+ self.current_file_path: Optional[str] = None
+ self.buffer = ""
+ self.tag_count = 0
+ self._known_tags = {
+ "",
+ "",
+ "",
+ "",
+ "",
+ }
+ self._pending_file_path: Optional[str] = None
+
+ def classify(self, chunk: str) -> List[Dict[str, Any]]:
+ """Process streaming chunk and return list of classified events."""
+ results = []
+ self.buffer += chunk
+
+ while self.buffer:
+ if self.buffer.startswith("<"):
+ if ">" not in self.buffer:
+ break
+ results.extend(self._process_tag_start())
+ else:
+ results.extend(self._process_non_tag_content())
+
+ return results
+
+ def _process_tag_start(self) -> List[Dict[str, Any]]:
+ """Process buffer when it starts with '<' - extracts and handles tags."""
+ results = []
+ gt_pos = self.buffer.index(">")
+ potential_tag = self.buffer[:gt_pos + 1]
+ matched = self._match_known_tag_with_buffer(potential_tag)
+
+ if matched:
+ results.extend(self._handle_matched_tag(gt_pos, potential_tag, matched))
+ elif len(potential_tag) > self.MAX_TAG_LENGTH:
+ results.extend(self._emit_dos_protected_content())
+ else:
+ results.extend(self._emit_potential_tag_start())
+
+ return results
+
+ def _handle_matched_tag(self, gt_pos: int, potential_tag: str, matched_tag: str) -> List[Dict[str, Any]]:
+ """Handle a successfully matched tag and process following content."""
+ results = []
+ if self.tag_count >= self.MAX_TAG_COUNT:
+ self.buffer = self.buffer[gt_pos + 1:]
+ return results
+
+ self.tag_count += 1
+ content_after_tag = self.buffer[gt_pos + 1:]
+ self.buffer = ""
+
+ event = self._handle_tag(matched_tag)
+ if event:
+ results.append(event)
+
+ if content_after_tag:
+ results.extend(self._process_content_after_tag(content_after_tag))
+
+ return results
+
+ def _process_content_after_tag(self, content: str) -> List[Dict[str, Any]]:
+ """Process content following a tag, handling embedded tag starts."""
+ results = []
+ if "<" not in content:
+ event = self._create_event(content)
+ if event:
+ results.append(event)
+ return results
+
+ next_tag_pos = content.index("<")
+ immediate_content = content[:next_tag_pos]
+ if immediate_content:
+ event = self._create_event(immediate_content)
+ if event:
+ results.append(event)
+
+ self.buffer = content[next_tag_pos:]
+ return results
+
+ def _emit_dos_protected_content(self) -> List[Dict[str, Any]]:
+ """Handle content that exceeds max tag length (DoS protection)."""
+ results = []
+ event = self._create_event("<")
+ if event:
+ results.append(event)
+ self.buffer = self.buffer[1:]
+ return results
+
+ def _emit_potential_tag_start(self) -> List[Dict[str, Any]]:
+ """Handle buffer starting with '<' that doesn't match any known tag."""
+ results = []
+ event = self._create_event("<")
+ if event:
+ results.append(event)
+ self.buffer = self.buffer[1:]
+ return results
+
+ def _process_non_tag_content(self) -> List[Dict[str, Any]]:
+ """Process buffered content that doesn't start with '<'."""
+ results = []
+ emit_len = min(len(self.buffer), 64)
+ event = self._create_event(self.buffer[:emit_len])
+ if event:
+ results.append(event)
+ self.buffer = self.buffer[emit_len:]
+ return results
+
+ def _match_known_tag_with_buffer(self, buffer_content: str) -> Optional[str]:
+ """Check if buffer content matches a known complete tag."""
+ # Check exact match for simple tags
+ if buffer_content in self._known_tags:
+ return buffer_content
+
+ # Check pattern
+ if buffer_content.startswith(""):
+ match = re.match(
+ r'$',
+ buffer_content
+ )
+ if match:
+ self._pending_file_path = match.group(1)
+ return ""
+
+ return None
+
+ def _create_event(self, content: str) -> Dict[str, Any]:
+ """Create event based on current state."""
+ if not content:
+ return {}
+
+ if self.state == "skill_body":
+ return {"type": "skill_body", "content": content}
+ elif self.state == "file":
+ return {"type": "file_content", "content": content, "path": self.current_file_path}
+ elif self.state == "summary":
+ return {"type": "summary", "content": content}
+ else:
+ return {"type": "others", "content": content}
+
+ def _handle_tag(self, tag: str) -> Optional[Dict[str, Any]]:
+ """Handle matched tag and update state."""
+ if tag == "":
+ self.state = "skill_body"
+ return None
+
+ elif tag == "":
+ self.state = "summary"
+ return None
+
+ elif tag == "" or tag == "":
+ if tag == "":
+ self.state = "summary"
+ else:
+ self.state = "others"
+ return None
+
+ elif tag == "":
+ self.state = "file"
+ self.current_file_path = self._pending_file_path
+ self._pending_file_path = None
+ return {"type": "file_content", "content": "", "path": self.current_file_path, "is_new_file": True}
+
+ elif tag == "":
+ self.state = "skill_body"
+ self.current_file_path = None
+ return None
+
+ return None
diff --git a/backend/utils/context_utils.py b/backend/utils/context_utils.py
new file mode 100644
index 000000000..0c3af8915
--- /dev/null
+++ b/backend/utils/context_utils.py
@@ -0,0 +1,1338 @@
+"""Context component building utilities for system prompt assembly.
+
+Provides build_context_components() to convert agent configuration data
+into ContextComponent instances for use with ContextManager.
+
+This module implements the piecewise component architecture where each
+semantic section of the system prompt is emitted by a dedicated function,
+allowing ContextManager to assemble them in the correct order.
+"""
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+
+if TYPE_CHECKING:
+ from nexent.core.agents.agent_model import (
+ ContextComponent,
+ ToolsComponent,
+ SkillsComponent,
+ MemoryComponent,
+ KnowledgeBaseComponent,
+ ManagedAgentsComponent,
+ ExternalAgentsComponent,
+ SystemPromptComponent,
+ ToolConfig,
+ AgentConfig,
+ ExternalA2AAgentConfig,
+ )
+
+
+# =============================================================================
+# SECTION 1: Long-text format functions (expanded from Jinja2 templates)
+# Each function accepts language and is_manager params for variant-specific text
+# =============================================================================
+
+
+def _format_memory_context(
+ memory_list: List[Any],
+ language: str = "zh",
+) -> str:
+ """Format memory search results with full usage guidelines.
+
+ Jinja2 templates have ~30 lines of "记忆使用准则" text that must be
+ included here for semantic equivalence.
+ """
+ if not memory_list:
+ return ""
+
+ # Group memories by level in correct order: tenant, user_agent, user, agent
+ level_order = ["tenant", "user_agent", "user", "agent"]
+ memory_by_level: Dict[str, List[Any]] = {}
+ for mem in memory_list:
+ if isinstance(mem, dict):
+ level = mem.get("memory_level", "user")
+ if level not in memory_by_level:
+ memory_by_level[level] = []
+ memory_by_level[level].append(mem)
+
+ lines = []
+
+ if language == "zh":
+ lines.append("### 上下文记忆")
+ lines.append("基于之前的交互记录,以下是按作用域和重要程度排序的最相关记忆:")
+ lines.append("")
+
+ for level in level_order:
+ if level in memory_by_level:
+ level_title = {
+ "tenant": "Tenant",
+ "user_agent": "User_agent",
+ "user": "User",
+ "agent": "Agent",
+ }.get(level, level.title())
+ lines.append(f"**{level_title} 层级记忆:**")
+ for item in memory_by_level[level]:
+ content = item.get("memory", "") or item.get("content", "")
+ score = item.get("score", 0.0)
+ lines.append(f"- {content} `({score:.2f})`")
+ lines.append("")
+
+ lines.append("**记忆使用准则:**")
+ lines.append("1. **冲突处理优先级**:当记忆信息存在矛盾时,严格按以下顺序处理:")
+ lines.append("- **最优先**:在上述列表中位置靠前的记忆具有优先权")
+ lines.append("- **次优先**:当前对话内容与记忆直接冲突时,以当前对话为准")
+ lines.append("- **次优先**:相关度分数越高,表示记忆越可信")
+ lines.append("")
+ lines.append("2. **记忆整合最佳实践**:")
+ lines.append(" - 自然地将相关记忆融入回答中,避免显式使用\"根据记忆\"、\"根据上下文\"或\"根据交互记忆\"等语言")
+ lines.append(" - 利用记忆信息调整回答的语调、方式和技术深度以适应用户")
+ lines.append(" - 让记忆指导您对用户偏好和上下文的理解")
+ lines.append("")
+ lines.append("3. **级别特定说明**:")
+ lines.append(" - **tenant(租户级)**:组织层面的约束和政策(不可违背)")
+ lines.append(" - **user_agent(用户-代理级)**:特定用户在代理中的交互模式和既定工作流程")
+ lines.append(" - **user(用户级)**:用户的个人偏好、技能水平和历史上下文")
+ lines.append(" - **agent(代理级)**:您的既定行为模式和能力特征,通常对所有用户共享(重要性最低)")
+ else:
+ lines.append("### Contextual Memory")
+ lines.append("Based on previous interactions, here are the most relevant memories organized by scope and importance:")
+ lines.append("")
+
+ for level in level_order:
+ if level in memory_by_level:
+ lines.append(f"**{level.title()} Level Memory:**")
+ for item in memory_by_level[level]:
+ content = item.get("memory", "") or item.get("content", "")
+ score = item.get("score", 0.0)
+ lines.append(f"- {content} `({score:.2f})`")
+ lines.append("")
+
+ lines.append("**Memory Usage Guidelines:**")
+ lines.append("1. **Conflict Resolution Priority**: When memories contradict each other, follow this strict order:")
+ lines.append(" - **Primary**: Information appearing EARLIER in the above numbered list takes precedence")
+ lines.append(" - **Secondary**: Current conversation context overrides historical memory when directly contradicted")
+ lines.append(" - **Tertiary**: Higher relevance scores indicate more trustworthy information")
+ lines.append("")
+ lines.append("2. **Memory Integration Best Practices**:")
+ lines.append(" - Seamlessly weave relevant memories into your responses without explicitly saying \"I remember\", \"based on memory\" or \"based on context\"")
+ lines.append(" - Use memories to inform your tone, approach, and technical level appropriate for this user")
+ lines.append(" - Let memories guide your assumptions about user preferences and context")
+ lines.append("")
+ lines.append("3. **Level-Specific Considerations**:")
+ lines.append(" - **tenant**: Organizational constraints and policies (non-negotiable)")
+ lines.append(" - **user_agent**: Specific interaction dynamics and established workflow patterns")
+ lines.append(" - **user**: Individual preferences, skills, and historical context")
+ lines.append(" - **agent**: Your established behavioral patterns and capabilities, usually shared by all users (least important)")
+
+ return "\n".join(lines)
+
+
+def _format_skills_description(
+ skills: List[Dict[str, str]],
+ language: str = "zh",
+) -> str:
+ """Format skill descriptions with full 6-step usage process.
+
+ Jinja2 templates have ~50 lines of "技能使用流程" text that must be
+ included here for semantic equivalence.
+ """
+ if not skills:
+ return ""
+
+ lines = []
+
+ # Build the block
+ skills_block_lines = [""]
+ for skill in skills:
+ name = skill.get("name", "")
+ desc = skill.get("description", "")
+ skills_block_lines.append(" ")
+ skills_block_lines.append(f" {name}")
+ skills_block_lines.append(f" {desc}")
+ skills_block_lines.append(" ")
+ skills_block_lines.append("")
+ skills_block = "\n".join(skills_block_lines)
+
+ if language == "zh":
+ lines.append("### 可用技能")
+ lines.append("")
+ lines.append("你拥有以下技能(Skills)。技能是预定义的专业能力模块,包含详细执行指南和可选的附加脚本。")
+ lines.append("")
+ lines.append(skills_block)
+ lines.append("")
+ lines.append("**技能使用流程**:")
+ lines.append("1. 收到用户请求后,首先审视 `` 中每个技能的 description,判断是否有匹配的技能。")
+ lines.append("2. **加载技能**:根据不同场景选择读取方式:")
+ lines.append(" - **首次加载**:调用 `read_skill_md(\"skill_name\")` 读取技能的完整执行指南(默认读取 SKILL.md)")
+ lines.append(" - **精确读取**:如只需特定文件(如示例、参考文档),可指定 additional_files:")
+ lines.append(" ")
+ lines.append(" skill_content = read_skill_md(\"skill_name\", [\"examples.md\", \"reference/api_doc\"])")
+ lines.append(" print(skill_content)")
+ lines.append(" ")
+ lines.append(" 注意:当 additional_files 非空时,默认不再自动读取 SKILL.md,如需同时读取请显式指定。")
+ lines.append("")
+ lines.append(" - **加载技能配置**:如果技能需要读取配置变量,可先调用 `read_skill_config(\"skill_name\")` 读取配置字符串,通过 `json.loads` 方法转化为配置字典,再从中获取所需值:")
+ lines.append(" ")
+ lines.append(" import json")
+ lines.append(" config = json.loads(read_skill_config(\"skill_name\"))")
+ lines.append(" # 返回示例: {\"key_a\": {\"key2\": \"value2\"}, \"others\": {...}}")
+ lines.append(" value = config[\"key1\"][\"key2\"]")
+ lines.append(" print(value)")
+ lines.append(" ")
+ lines.append("")
+ lines.append("3. **遵循技能指南**:技能内容注入后,严格按其中的步骤执行。不要跳过技能指南中的步骤,也不要用自行编写的代码替代技能定义的流程。")
+ lines.append("")
+ lines.append("4. **执行技能脚本**:如果技能指南中引用了附加脚本(形如 ``),使用以下格式调用:")
+ lines.append(" 代码:")
+ lines.append(" ")
+ lines.append(" result = run_skill_script(\"skill_name\", \"script_path\")")
+ lines.append(" print(result)")
+ lines.append(" ")
+ lines.append(" 对于需要附加参数的脚本,需要参照脚本调用说明,将参数直接以字符串形式传递。")
+ lines.append(" 例如对于希望附加的参数:--param1 value1 --flag,则使用以下格式调用run_skill_script:")
+ lines.append(" ")
+ lines.append(" result = run_skill_script(\"skill_name\", \"script_path\", \"--param1 value1 --flag\")")
+ lines.append(" print(result)")
+ lines.append(" ")
+ lines.append(" 注意:只执行技能指南中明确声明的脚本路径,绝不自行构造脚本路径。")
+ lines.append("")
+ lines.append("5. **整合输出**:根据技能指南要求的输出格式,结合脚本执行结果生成最终回答。")
+ lines.append("")
+ lines.append("6. **引用场景处理**:当技能内容中出现引用标记或需要引用其他文件时,需要识别并再次调用 read_skill_md:")
+ lines.append(" - **引用模板识别**:注意技能内容中形如 `` 或自然语言式的引用声明(如\"详见 examples.md\"、\"请参考 reference/api_doc\")")
+ lines.append(" - **自动补全**:发现引用后,尝试读取被引用的文件获取更多信息")
+ lines.append(" - **示例**:")
+ lines.append(" ")
+ lines.append(" # 技能内容提示\"请参考 examples.md 获取详细示例\"")
+ lines.append(" additional_info = read_skill_md(\"skill_name\", [\"examples.md\"])")
+ lines.append(" print(additional_info)")
+ lines.append(" ")
+ else:
+ lines.append("### Available Skills")
+ lines.append("")
+ lines.append("You have the following Skills. Skills are predefined professional capability modules with detailed execution guides and optional additional scripts.")
+ lines.append("")
+ lines.append(skills_block)
+ lines.append("")
+ lines.append("**Skill Usage Process**:")
+ lines.append("1. After receiving a user request, first examine the description of each skill in `` to determine if there is a matching skill.")
+ lines.append("2. **Load Skill**: Choose the appropriate reading method based on the scenario:")
+ lines.append(" - **First-time load**: Call `read_skill_md(\"skill_name\")` to read the complete execution guide (defaults to reading SKILL.md)")
+ lines.append(" - **Precise read**: If you only need specific files (like examples, reference docs), specify additional_files:")
+ lines.append(" ")
+ lines.append(" skill_content = read_skill_md(\"skill_name\", [\"examples.md\", \"reference/api_doc\"])")
+ lines.append(" print(skill_content)")
+ lines.append(" ")
+ lines.append(" Note: When additional_files is non-empty, SKILL.md is no longer auto-read. If you need both, explicitly specify it.")
+ lines.append("")
+ lines.append(" - **Load skill config**: If the skill needs configuration variables, call `read_skill_config(\"skill_name\")` to read the config string, convert to dict via `json.loads`, then access values:")
+ lines.append(" ")
+ lines.append(" import json")
+ lines.append(" config = json.loads(read_skill_config(\"skill_name\"))")
+ lines.append(" # Example: {\"key_a\": {\"key2\": \"value2\"}, \"others\": {...}}")
+ lines.append(" value = config[\"key1\"][\"key2\"]")
+ lines.append(" print(value)")
+ lines.append(" ")
+ lines.append("")
+ lines.append("3. **Follow Skill Guide**: After skill content is injected, strictly follow its steps. Do not skip steps or replace with your own code.")
+ lines.append("")
+ lines.append("4. **Execute Skill Script**: If the skill guide references additional scripts (like ``), call:")
+ lines.append(" ")
+ lines.append(" result = run_skill_script(\"skill_name\", \"script_path\")")
+ lines.append(" print(result)")
+ lines.append(" ")
+ lines.append(" For scripts needing extra params, pass them as a command-line string per the script's calling instructions.")
+ lines.append(" Example for --param1 value1 --flag:")
+ lines.append(" ")
+ lines.append(" result = run_skill_script(\"skill_name\", \"script_path\", \"--param1 value1 --flag\")")
+ lines.append(" print(result)")
+ lines.append(" ")
+ lines.append(" Note: Only execute script paths explicitly declared in the skill guide. Never construct paths yourself.")
+ lines.append("")
+ lines.append("5. **Integrate Output**: Generate the final answer based on the skill guide's output format and script execution results.")
+ lines.append("")
+ lines.append("6. **Handle References**: When the skill content has reference markers or needs to reference other files, identify and call read_skill_md again:")
+ lines.append(" - **Reference template recognition**: Look for patterns like `` or natural-language references (\"see examples.md\", \"refer to reference/api_doc\")")
+ lines.append(" - **Auto-complete**: After discovering a reference, try reading the referenced file for more info")
+ lines.append(" - **Example**:")
+ lines.append(" ")
+ lines.append(" # Skill content says \"see examples.md for detailed examples\"")
+ lines.append(" additional_info = read_skill_md(\"skill_name\", [\"examples.md\"])")
+ lines.append(" print(additional_info)")
+ lines.append(" ")
+
+ return "\n".join(lines)
+
+
+def _format_tools_description(
+ tools: Dict[str, Any],
+ knowledge_base_summary: Optional[str] = None,
+ language: str = "zh",
+ is_manager: bool = True,
+) -> str:
+ """Format tool descriptions with file URL usage guide.
+
+ Jinja2 templates have ~10 lines of "文件链接使用指南" text that must be
+ included here for semantic equivalence.
+
+ Note: Managed agents use different presigned_url guidance than manager agents.
+ """
+ if not tools:
+ no_tools_msg = "- 当前没有可用的工具" if language == "zh" else "- No tools are currently available"
+ return no_tools_msg
+
+ lines = []
+
+ if language == "zh":
+ lines.append("- 你只能使用以下工具,不得使用任何其他工具:")
+ else:
+ lines.append("- You can only use the following tools and may not use any other tools:")
+
+ for name, tool in tools.items():
+ if hasattr(tool, 'description'):
+ desc = tool.description
+ inputs = tool.inputs
+ output_type = tool.output_type
+ source = getattr(tool, 'source', 'local')
+ else:
+ desc = tool.get('description', '')
+ inputs = tool.get('inputs', '')
+ output_type = tool.get('output_type', '')
+ source = tool.get('source', 'local')
+
+ # MCP tools have [MCP] prefix
+ if source == 'mcp':
+ if language == "zh":
+ lines.append(f"- [MCP] {name}: {desc}")
+ lines.append(f" 接受输入: {inputs}")
+ lines.append(f" 返回输出类型: {output_type}")
+ else:
+ lines.append(f"- [MCP] {name}: {desc}")
+ lines.append(f" Accepts input: {inputs}")
+ lines.append(f" Returns output type: {output_type}")
+ else:
+ if language == "zh":
+ lines.append(f"- {name}: {desc}")
+ lines.append(f" 接受输入: {inputs}")
+ lines.append(f" 返回输出类型: {output_type}")
+ else:
+ lines.append(f"- {name}: {desc}")
+ lines.append(f" Accepts input: {inputs}")
+ lines.append(f" Returns output type: {output_type}")
+
+ # Knowledge base summary
+ if knowledge_base_summary:
+ if language == "zh":
+ lines.append("- knowledge_base_search工具只能使用以下知识库索引,请根据用户问题选择最相关的一个或多个知识库索引:")
+ lines.append(f" {knowledge_base_summary}")
+ else:
+ lines.append("- knowledge_base_search tool can only use the following knowledge base indexes, please select the most relevant one or more knowledge base indexes based on the user's question:")
+ lines.append(f" {knowledge_base_summary}")
+
+ # File URL usage guide
+ lines.append("")
+ if language == "zh":
+ lines.append("### 文件链接使用指南")
+ lines.append("当处理用户上传的文件时,请根据工具类型选择正确的 URL:")
+ lines.append("1. **调用标记为 [MCP] 的工具**(外部工具,运行在 Nexent 之外):")
+ if is_manager:
+ lines.append(" → 使用 **Download URL**(格式:`https://minio.example.com/...?token=xxx`)")
+ lines.append(" 原因:MCP 工具运行在外部服务,无法访问内部 S3 存储")
+ else:
+ lines.append(" → 使用 **presigned_url**(已包含代理前缀,格式:`http://.../api/nb/v1/file/fetch?presigned_url=...`)")
+ lines.append(" 直接使用用户上传文件信息中提供的 **presigned_url** 字段,无需拼接。")
+ lines.append("2. **调用其他所有工具**(内部工具,如 analyze_text_file、analyze_image 等):")
+ lines.append(" → 使用 **S3 URL**(格式:`s3:/nexent/attachments/xxx.pdf`)")
+ lines.append(" 原因:内部工具运行在 Nexent 内部,可以直接访问 MinIO 存储")
+ else:
+ lines.append("### File URL Usage Guide")
+ lines.append("When processing user-uploaded files, choose the correct URL based on tool type:")
+ lines.append("1. **Calling tools marked with [MCP]** (external tools that run outside Nexent):")
+ if is_manager:
+ lines.append(" → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`)")
+ lines.append(" Reason: MCP tools run on external services and cannot access internal S3 storage")
+ else:
+ lines.append(" → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`)")
+ lines.append(" Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything.")
+ lines.append("2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):")
+ lines.append(" → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`)")
+ lines.append(" Reason: Internal tools run inside Nexent and can directly access MinIO storage")
+
+ return "\n".join(lines)
+
+
+def _format_managed_agents_description(
+ managed_agents: Dict[str, Any],
+ language: str = "zh",
+) -> str:
+ """Format managed sub-agent descriptions with calling specifications.
+
+ Jinja2 templates have ~15 lines of "内部助手调用规范" text that must be
+ included here for semantic equivalence.
+ """
+ if not managed_agents:
+ return ""
+
+ lines = []
+
+ if language == "zh":
+ lines.append("你可以使用以下内部助手(通过函数调用方式协作):")
+ for name, agent in managed_agents.items():
+ desc = agent.description if hasattr(agent, 'description') else agent.get('description', '')
+ lines.append(f" - {name}: {desc}")
+ lines.append("")
+ lines.append("内部助手调用规范:")
+ lines.append(" 1. 调用方式:")
+ lines.append(" - 接受输入:{\"task\": {\"type\": \"string\", \"description\": \"任务描述\"}}")
+ lines.append(" - 返回输出类型:{\"type\": \"string\", \"description\": \"执行结果\"}")
+ lines.append(" 2. 使用策略:")
+ lines.append(" - 任务分解:单次调用中不要让助手一次做过多的事情,任务拆分是你的工作,你需要将复杂任务分解为可管理的子任务")
+ lines.append(" - 专业匹配:根据助手的专长分配任务")
+ lines.append(" - 信息整合:整合不同助手的输出生成连贯解决方案")
+ lines.append(" - 效率优化:避免重复工作")
+ lines.append(" 3. 协作要求:")
+ lines.append(" - 评估助手返回的结果")
+ lines.append(" - 必要时提供额外指导或重新分配任务")
+ lines.append(" - 在助手结果基础上进行工作,避免重复工作")
+ lines.append(" - 注意保留子助手回答中的特殊符号,如索引溯源信息等")
+ else:
+ lines.append("You can use the following internal agents (via function calls):")
+ for name, agent in managed_agents.items():
+ desc = agent.description if hasattr(agent, 'description') else agent.get('description', '')
+ lines.append(f" - {name}: {desc}")
+ lines.append("")
+ lines.append("Internal agent calling specifications:")
+ lines.append(" 1. Calling method:")
+ lines.append(" - Accepts input: {\"task\": {\"type\": \"string\", \"description\": \"task description\"}}")
+ lines.append(" - Returns output type: {\"type\": \"string\", \"description\": \"execution result\"}")
+ lines.append(" 2. Usage strategy:")
+ lines.append(" - Task decomposition: Don't let agents do too many things in a single call, task breakdown is your job, you need to decompose complex tasks into manageable subtasks")
+ lines.append(" - Professional matching: Assign tasks based on agent expertise")
+ lines.append(" - Information integration: Integrate outputs from different agents to generate coherent solutions")
+ lines.append(" - Efficiency optimization: Avoid duplicate work")
+ lines.append(" 3. Collaboration requirements:")
+ lines.append(" - Evaluate agent returned results")
+ lines.append(" - Provide additional guidance or reassign tasks when necessary")
+ lines.append(" - Work based on agent results, avoid duplicate work")
+ lines.append(" - Pay attention to preserving special symbols in sub-agent answers, such as index traceability information")
+
+ return "\n".join(lines)
+
+
+def _format_external_agents_description(
+ external_a2a_agents: Dict[str, Any],
+ language: str = "zh",
+) -> str:
+ """Format external A2A agent descriptions with calling specifications.
+
+ Jinja2 templates have ~5 lines of "外部助手调用规范" text that must be
+ included here for semantic equivalence.
+ """
+ if not external_a2a_agents:
+ return ""
+
+ lines = []
+
+ if language == "zh":
+ lines.append("你还可以使用以下外部助手(通过 A2A 协议远程调用):")
+ for agent_id, agent in external_a2a_agents.items():
+ name = agent.name if hasattr(agent, 'name') else agent.get('name', '')
+ desc = agent.description if hasattr(agent, 'description') else agent.get('description', '')
+ lines.append(f" - {name}: {desc}")
+ lines.append("")
+ lines.append("外部助手调用规范:")
+ lines.append(" 1. 调用格式:`agent_name(task=\"自然语言任务描述\")`,注意:只需要 task 参数,不需要其他参数")
+ lines.append(" 2. 例如:`tool_assistant(task=\"北京天气怎么样\")`")
+ lines.append(" 3. 任务描述使用自然语言,让外部助手自动识别和处理")
+ else:
+ lines.append("You can also use the following external agents (called via A2A protocol remotely):")
+ for agent_id, agent in external_a2a_agents.items():
+ name = agent.name if hasattr(agent, 'name') else agent.get('name', '')
+ desc = agent.description if hasattr(agent, 'description') else agent.get('description', '')
+ lines.append(f" - {name}: {desc}")
+ lines.append("")
+ lines.append("External agent calling specifications:")
+ lines.append(" 1. Call format: `agent_name(task=\"natural language task description\")`, NOTE: only task parameter is needed, no other parameters")
+ lines.append(" 2. Example: `tool_assistant(task=\"What's the weather in Beijing?\")`")
+ lines.append(" 3. Use natural language for task description, let the external agent handle the rest")
+
+ return "\n".join(lines)
+
+
+def _format_skills_usage_requirements(
+ skills: List[Dict[str, str]],
+ language: str = "zh",
+) -> str:
+ """Format skills usage requirements section.
+
+ This is the "技能使用要求" section that appears after the skills reference
+ in the Available Resources section.
+ """
+ if not skills:
+ no_skills_msg = "- 当前没有可用的技能" if language == "zh" else "- No skills are currently available"
+ return no_skills_msg
+
+ lines = []
+
+ if language == "zh":
+ lines.append("- 你拥有上述 `` 中列出的技能。技能中引用的脚本通过 `run_skill_script()` 函数调用,该函数由平台提供,不需要导入。")
+ lines.append("")
+ lines.append("### 技能使用要求")
+ lines.append("1. **技能优先**:如果用户请求匹配了某个技能的 description,必须先调用 `read_skill_md()` 加载技能指南,再按指南执行。不得跳过技能自行编写代码解决。")
+ lines.append("2. **忠实执行**:读取技能内容后,严格按技能指南中的步骤操作。不要自行修改流程、跳过步骤或用通用代码替代技能定义的流程。")
+ lines.append("3. **脚本调用规范**:只使用 `run_skill_script` 工具执行技能指南中明确要求的脚本。传入的 `skill_name` 和 `script_path` 必须与技能指南中的声明完全一致,不要自行拼接或猜测路径。如果需要附加参数,将参数以命令行字符串形式传递给`run_skill_script`。")
+ lines.append("4. **失败回退**:如果 `read_skill_md` 返回错误或 `run_skill_script` 执行失败,向用户说明情况,并尝试用通用推理模式提供替代方案。")
+ lines.append("5. **技能组合**:如果一个任务需要多个技能配合,按逻辑依赖顺序依次加载和执行,前一个技能的输出可作为后一个技能的输入。")
+ else:
+ lines.append("- You have the skills listed in `` above. Scripts referenced in skills are called via the `run_skill_script()` function, which is provided by the platform and does not need to be imported.")
+ lines.append("")
+ lines.append("### Skill Usage Requirements")
+ lines.append("1. **Skill Priority**: If a user request matches a skill's description, you must first call `read_skill_md()` to load the skill guide, then execute per the guide. Do not skip skills and write your own code.")
+ lines.append("2. **Faithful Execution**: After reading skill content, strictly follow the skill guide's steps. Do not modify the flow, skip steps, or replace with generic code.")
+ lines.append("3. **Script Calling Specification**: Only use `run_skill_script` to execute scripts explicitly required in the skill guide. The `skill_name` and `script_path` must match the skill guide's declaration exactly. Do not construct or guess paths. For extra params, pass them as a command-line string to `run_skill_script`.")
+ lines.append("4. **Failure Fallback**: If `read_skill_md` returns an error or `run_skill_script` fails, explain to the user and try to provide an alternative via general reasoning mode.")
+ lines.append("5. **Skill Combination**: If a task needs multiple skills, load and execute in logical dependency order. The output of one skill can be input to the next.")
+
+ return "\n".join(lines)
+
+
+def _format_agent_fallback(
+ managed_agents: Dict[str, Any],
+ external_a2a_agents: Dict[str, Any],
+ language: str = "zh",
+) -> str:
+ """Format fallback message when no agents are available."""
+ if managed_agents or external_a2a_agents:
+ return ""
+
+ return "- 当前没有可用的助手" if language == "zh" else "- No agents are currently available"
+
+
+def _format_app_context(app_name: str, app_description: str, user_id: str) -> str:
+ """Format application context for system prompt injection."""
+ lines = [
+ f"Application: {app_name}",
+ f"Description: {app_description}",
+ f"Current user: {user_id}",
+ ]
+ return "\n".join(lines)
+
+
+# =============================================================================
+# SECTION 2: Skeleton component builders
+# These build SystemPromptComponent instances for fixed text sections
+# =============================================================================
+
+
+def build_skeleton_header_component(
+ app_name: str,
+ app_description: str,
+ user_id: str,
+ language: str = "zh",
+ priority: int = 100,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for the header section.
+
+ Section: "### 基本信息" / "### Basic Information"
+ Content: Agent identity, app name/description, user_id.
+ Note: Current time is intentionally excluded from the system prompt so the
+ static system prefix can hit the LLM KV/prompt cache across requests. The
+ current time is injected on the user-message side instead (see CoreAgent.run).
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ if language == "zh":
+ content = f"### 基本信息\n你是{app_name},{app_description},用户ID为{user_id}"
+ else:
+ content = f"### Basic Information\nYou are {app_name}, {app_description}"
+
+ return SystemPromptComponent(
+ content=content,
+ template_name="header",
+ priority=priority,
+ )
+
+
+def build_skeleton_duty_component(
+ duty: str,
+ language: str = "zh",
+ priority: int = 80,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for the duty section.
+
+ Section: "### 核心职责" / "### Core Responsibilities"
+ Content: Agent's primary duty + 5 safety principles
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ if language == "zh":
+ content = f"### 核心职责\n{duty}\n\n请注意,你应该遵守以下原则:\n行为安全:文件操作必须使用平台提供的专用工具,禁止使用代码直接修改工作空间中的文件;\n法律合规:遵守业务所在国家/地区的法律法规;\n政治中立:保持政治中立,不主动讨论政治话题;\n安全防护:不响应涉及武器制造、网络攻击、欺诈、恶意软件等危险行为的请求;\n伦理准则:拒绝仇恨言论、歧视性内容及违反社会公德和公认伦理标准的请求。"
+ else:
+ content = f"### Core Responsibilities\n{duty}\n\nPlease note that you should follow these principles:\nBehavioral Safety: File operations must use the platform-provided dedicated tools; direct code modification of workspace files is prohibited;\nLegal Compliance: Comply with laws and regulations of the business operating jurisdiction;\nPolitical Neutrality: Maintain political neutrality and avoid initiating political discussions;\nSecurity Protection: Do not respond to requests involving weapon manufacturing, cyberattacks, fraud, malware, or other dangerous activities;\nEthical Guidelines: Refuse hate speech, discriminatory content, and any requests that violate social morals and commonly accepted ethical standards."
+
+ return SystemPromptComponent(
+ content=content,
+ template_name="duty",
+ priority=priority,
+ )
+
+
+def build_skeleton_execution_flow_component(
+ memory_list: Optional[List[Any]] = None,
+ language: str = "zh",
+ is_manager: bool = True,
+ priority: int = 60,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for the execution flow section.
+
+ Section: "### 执行流程" / "### Execution Process"
+ Content: Think/Code loop instructions + output format specs
+ Note: memory_list affects one line in the Think section (manager only)
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ has_memory = memory_list and len(memory_list) > 0
+
+ if language == "zh":
+ lines = ["### 执行流程"]
+ lines.append("要解决任务,你必须通过一系列步骤向前规划,以'思考:'和'代码:'序列循环进行。**注意:禁止在代码执行前输出'观察结果:',观察结果只能由代码执行后产生。**")
+ lines.append("")
+ lines.append("1. 思考:")
+ lines.append(" - 分析当前任务状态和进展")
+ if is_manager and has_memory:
+ lines.append(" - 合理参考之前交互中的上下文记忆信息")
+ lines.append(" - 定下一步最佳行动(使用工具或分配给助手)")
+ lines.append(" - 解释你的决策逻辑和预期结果")
+ lines.append("")
+ lines.append("2. 代码:")
+ lines.append(" - 用简单的Python编写代码")
+ lines.append(" - 遵循python代码规范和python语法")
+ lines.append(" - 正确调用工具或助手解决问题")
+ lines.append(" - 考虑到代码执行与展示用户代码的区别,使用'代码'表达运行代码,使用'代码'表达展示代码")
+ lines.append(" - 注意运行的代码不会被用户看到,所以如果用户需要看到代码,你需要使用'代码'表达展示代码。")
+ lines.append(" - **重要**:代码执行后,系统会返回 \"Observation:\" 标记的内容(这是真实的执行结果)。请基于这些真实结果继续下一步思考,**不要在代码执行前自行编造观察结果**。")
+ lines.append("")
+ lines.append("3. 自验证:")
+ lines.append(" - 关键事件(工具调用、检索结果、代码执行、助手返回、准备最终回答)后,系统会进行显式自验证。")
+ lines.append(" - 如果自验证提示存在错误、证据不足、参数不完整或结果不可靠,必须优先修正、补充证据、重新调用工具,或清晰说明无法完成的部分。")
+ lines.append(" - 最终回答只有在自验证通过后才会展示给用户;如果系统返回 Verification feedback,请把它视为真实观察结果继续修正,不要忽略。")
+ lines.append("")
+ lines.append("在思考结束后,当你认为可以回答用户问题,那么可以不生成代码,直接生成最终回答给到用户并停止循环。")
+ lines.append("")
+ lines.append("生成最终回答时,你需要遵循以下规范:")
+ lines.append("1. Markdown格式要求:")
+ lines.append(" - 使用标准Markdown语法格式化输出,支持标题、列表、表格、代码块、链接等")
+ lines.append(" - 展示图片和视频使用链接方式,不需要外套代码块,格式:[链接文本](URL),图片格式:,视频格式:")
+ lines.append(" - 段落之间使用单个空行分隔,避免多个连续空行")
+ lines.append(" - 数学公式使用标准Markdown格式:行内公式用 $公式$,块级公式用 $$公式$$")
+ lines.append("")
+ lines.append("2. 引用标记规范(仅在使用了检索工具时):")
+ lines.append(" - 引用标记格式必须严格为:`[[字母+数字]]`,例如:`[[a1]]`、`[[b2]]`、`[[c3]]`")
+ lines.append(" - 字母部分必须是单个小写字母(a-e),数字部分必须是整数")
+ lines.append(" - 引用标记的字母和数字必须与检索工具的检索结果一一对应")
+ lines.append(" - 引用标记应紧跟在相关信息或句子之后,通常放在句末或段落末尾")
+ lines.append(" - 多个引用标记可以连续使用,例如:`[[a1]][[b2]]`")
+ lines.append(" - **重要**:仅添加引用标记,不要添加链接、参考文献列表等多余内容")
+ lines.append(" - 如果检索结果中没有匹配的引用,则不显示该引用标记")
+ lines.append("")
+ lines.append("3. 格式细节要求:")
+ lines.append(" - 避免在Markdown中使用HTML标签,优先使用Markdown原生语法")
+ lines.append(" - 代码块中的代码应保持原始格式,不要添加额外的转义字符")
+ lines.append(" - 若未使用检索工具,则不添加任何引用标记")
+ else:
+ lines = ["### Execution Process"]
+ lines.append("To solve tasks, you must plan forward through a series of steps in a loop of 'Think:' and 'Code:' sequences. **IMPORTANT: You must NOT output 'Observe Results:' before code execution. Observation results can ONLY be generated after code execution.**")
+ lines.append("")
+ lines.append("1. Think:")
+ lines.append(" - Analyze current task status and progress")
+ if is_manager and has_memory:
+ lines.append(" - Reference relevant contextual memories from previous interactions when applicable")
+ lines.append(" - Determine the best next action (use tools or delegate to agents)")
+ lines.append(" - Explain your decision logic and expected results")
+ lines.append("")
+ lines.append("2. Code:")
+ lines.append(" - Write code in simple Python")
+ lines.append(" - Follow Python coding standards and Python syntax")
+ lines.append(" - Correctly call tools or agents to solve problems")
+ lines.append(" - To distinguish between code execution and displaying user code, use 'code' for executing code and 'code' for displaying code")
+ lines.append(" - Note that executed code is not visible to users. If users need to see the code, use 'code' for displaying code.")
+ lines.append(" - **IMPORTANT**: After code execution, the system will return content with \"Observation:\" marker (this is the real execution result). Please continue your next thinking based on these real results. **Do NOT fabricate observation results before code execution.**")
+ lines.append("")
+ lines.append("3. Self-verification:")
+ lines.append(" - After critical events (tool calls, retrieval results, code execution, agent handoffs, and final-answer preparation), the system may run explicit verification.")
+ lines.append(" - If verification reports errors, insufficient evidence, incomplete parameters, or unreliable results, you must repair the issue, gather more evidence, call tools again, or clearly state what cannot be completed.")
+ lines.append(" - The final answer is shown to the user only after verification passes. If the system returns Verification feedback, treat it as a real observation and continue revising.")
+ lines.append("")
+ lines.append("After thinking, when you believe you can answer the user's question, you can generate a final answer directly to the user without generating code and stop the loop.")
+ lines.append("")
+ lines.append("When generating the final answer, you need to follow these specifications:")
+ lines.append("1. **Markdown Format Requirements**:")
+ lines.append(" - Use standard Markdown syntax to format your output, supporting headings, lists, tables, code blocks, and links.")
+ lines.append(" - Display images and videos using links instead of wrapping them in code blocks. Use `[link text](URL)` for links, `` for images, and `` for videos.")
+ lines.append(" - Use a single blank line between paragraphs, avoid multiple consecutive blank lines")
+ lines.append(" - Mathematical formulas use standard Markdown format: inline formulas use $formula$, block formulas use $$formula$$")
+ lines.append("")
+ lines.append("2. **Reference Mark Specifications** (only when retrieval tools are used):")
+ lines.append(" - Reference mark format must strictly be: `[[letter+number]]`, for example: `[[a1]]`, `[[b2]]`, `[[c3]]`")
+ lines.append(" - The letter part must be a single lowercase letter (a-e), the number part must be an integer")
+ lines.append(" - The letters and numbers of reference marks must correspond one-to-one with the retrieval results of retrieval tools")
+ lines.append(" - Reference marks should be placed immediately after relevant information or sentences, usually at the end of sentences or paragraphs")
+ lines.append(" - Multiple reference marks can be used consecutively, for example: `[[a1]][[b2]]`")
+ lines.append(" - **Important**: Only add reference marks, do not add links, reference lists, or other extraneous content")
+ lines.append(" - If there is no matching reference in the retrieval results, do not display that reference mark")
+ lines.append("")
+ lines.append("3. **Format Detail Requirements**:")
+ lines.append(" - Avoid using HTML tags in Markdown, prioritize native Markdown syntax")
+ lines.append(" - Code in code blocks should maintain original format, do not add extra escape characters")
+ lines.append(" - If no retrieval tools are used, do not add any reference marks")
+
+ content = "\n".join(lines)
+
+ return SystemPromptComponent(
+ content=content,
+ template_name="execution_flow",
+ priority=priority,
+ )
+
+
+def build_skeleton_constraint_component(
+ constraint: str,
+ language: str = "zh",
+ priority: int = 30,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for the constraint section.
+
+ Section: "### 资源使用要求" / "### Resource Usage Requirements"
+ Content: User-defined constraint text
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ if language == "zh":
+ content = f"### 资源使用要求\n{constraint}"
+ else:
+ content = f"### Resource Usage Requirements\n{constraint}"
+
+ return SystemPromptComponent(
+ content=content,
+ template_name="constraint",
+ priority=priority,
+ )
+
+
+def build_skeleton_code_norms_component(
+ language: str = "zh",
+ is_manager: bool = True,
+ priority: int = 20,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for the Python code norms section.
+
+ Section: "### python代码规范" / "### Python Code Specifications"
+ Content: 12 fixed code rules (11 for managed agents)
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ if language == "zh":
+ lines = ["### python代码规范"]
+ lines.append("1. 如果认为是需要执行的代码,使用'代码'格式;如果是不需要执行仅用于展示的代码,使用'代码'格式,其中语言类型例如python、java、javascript等;")
+ lines.append("2. 只使用已定义的变量,变量将在多次调用之间持续保持;")
+ lines.append("3. 使用\"print()\"函数让下一次的模型调用看到对应变量信息;")
+ lines.append("4. 正确使用工具/助手的入参,使用关键字参数,不要用字典形式;")
+ lines.append("5. 避免在一轮对话中进行过多的工具/助手调用,这会导致输出格式难以预测;")
+ lines.append("6. 只在需要时调用工具/助手,不重复相同参数的调用;")
+ lines.append("7. 使用变量名保存函数调用结果,在每个中间步骤中,您可以使用\"print()\"来保存您需要的任何重要信息。被保存的信息在代码执行之间保持。print()输出的内容应被视为字符串,不要对其进行字典相关操作如.get()、[]等,避免类型错误;")
+ lines.append("9. 示例中的代码避免出现**if**、**for**等逻辑,仅调用工具/助手,示例中的每一次的行动都是确定事件。如果有不同的条件,你应该给出不同条件下的示例;")
+ lines.append("10. 工具调用使用关键字参数,如:tool_name(param1=\"value1\", param2=\"value2\");")
+ if is_manager:
+ lines.append("11. 助手调用必须使用task参数,如:assistant_name(task=\"任务描述\");")
+ lines.append("12. 不要放弃!你负责解决任务,而不是提供解决方向。")
+ else:
+ lines = ["### Python Code Specifications"]
+ lines.append("1. If it is considered to be code that needs to be executed, use 'code'. If the code does not need to be executed for display only, use 'code', where language_type can be python, java, javascript, etc;")
+ lines.append("2. Only use defined variables, variables will persist between multiple calls;")
+ lines.append("3. Use \"print()\" function to let the next model call see corresponding variable information;")
+ lines.append("4. Use tool/agent input parameters correctly, use keyword arguments, not dictionary format;")
+ lines.append("5. Avoid making too many tool/agent calls in one round of conversation, as this will make the output format unpredictable;")
+ lines.append("6. Only call tools/agents when needed, do not repeat calls with the same parameters;")
+ lines.append("7. Use variable names to save function call results. In each intermediate step, you can use \"print()\" to save any important information you need. The saved information persists between code executions. The content printed by print() should be treated as a string, do not perform dictionary-related operations such as .get(), [] etc., to avoid type errors;")
+ lines.append("8. Avoid **if**, **for** and other logic in example code, only call tools/agents. Each action in the example is a deterministic event. If there are different conditions, you should provide examples under different conditions;")
+ lines.append("9. Tool calls use keyword arguments, such as: tool_name(param1=\"value1\", param2=\"value2\");")
+ if is_manager:
+ lines.append("10. Agent calls must use task parameter, such as: agent_name(task=\"task description\");")
+ lines.append("11. Don't give up! You are responsible for solving the task, not providing solution directions.")
+
+ content = "\n".join(lines)
+
+ return SystemPromptComponent(
+ content=content,
+ template_name="code_norms",
+ priority=priority,
+ )
+
+
+def build_skeleton_footer_component(
+ few_shots: str,
+ language: str = "zh",
+ priority: int = 10,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for the footer section.
+
+ Section: "### 示例模板" + ending
+ Content: few_shots + "$1M reward" ending
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ if language == "zh":
+ content = f"### 示例模板\n{few_shots}\n\n现在开始!如果你正确解决任务,你将获得100万美元的奖励。"
+ else:
+ content = f"### Example Templates\n{few_shots}\n\nNow start! If you solve the task correctly, you will receive a reward of 1 million dollars."
+
+ return SystemPromptComponent(
+ content=content,
+ template_name="footer",
+ priority=priority,
+ )
+
+
+# =============================================================================
+# SECTION 3: Piecewise component builders (existing, enhanced)
+# =============================================================================
+
+
+def build_tools_component(
+ tools: Dict[str, Any],
+ knowledge_base_summary: Optional[str] = None,
+ language: str = "zh",
+ is_manager: bool = True,
+ priority: int = 50,
+) -> "ToolsComponent":
+ """Build ToolsComponent from tool configurations.
+
+ Args:
+ tools: Dict of tool name -> ToolConfig or tool dict
+ knowledge_base_summary: Summary text from knowledge bases
+ language: Language code ('zh' or 'en')
+ is_manager: Whether this is a manager agent
+ priority: Component priority for selection
+
+ Returns:
+ ToolsComponent instance
+ """
+ from nexent.core.agents.agent_model import ToolsComponent
+
+ tool_list = []
+ for name, tool in tools.items():
+ if hasattr(tool, 'description'):
+ tool_dict = {
+ "name": name,
+ "description": tool.description,
+ "inputs": getattr(tool, 'inputs', ''),
+ "output_type": getattr(tool, 'output_type', ''),
+ "source": getattr(tool, 'source', 'local'),
+ }
+ else:
+ tool_dict = {
+ "name": name,
+ "description": tool.get('description', ''),
+ "inputs": tool.get('inputs', ''),
+ "output_type": tool.get('output_type', ''),
+ "source": tool.get('source', 'local'),
+ }
+ tool_list.append(tool_dict)
+
+ formatted_desc = _format_tools_description(
+ tools,
+ knowledge_base_summary=knowledge_base_summary,
+ language=language,
+ is_manager=is_manager,
+ )
+ return ToolsComponent(
+ tools=tool_list,
+ formatted_description=formatted_desc,
+ priority=priority,
+ )
+
+
+def build_skills_component(
+ skills: List[Dict[str, str]],
+ language: str = "zh",
+ priority: int = 70,
+) -> "SkillsComponent":
+ """Build SkillsComponent from skill configurations.
+
+ Args:
+ skills: List of skill dicts with name and description
+ language: Language code ('zh' or 'en')
+ priority: Component priority for selection
+
+ Returns:
+ SkillsComponent instance
+ """
+ from nexent.core.agents.agent_model import SkillsComponent
+
+ formatted_desc = _format_skills_description(skills, language=language)
+ return SkillsComponent(
+ skills=skills,
+ formatted_description=formatted_desc,
+ priority=priority,
+ )
+
+
+def build_memory_component(
+ memory_list: List[Any],
+ search_query: Optional[str] = None,
+ language: str = "zh",
+ priority: int = 90,
+) -> "MemoryComponent":
+ """Build MemoryComponent from memory search results.
+
+ Args:
+ memory_list: List of memory search results
+ search_query: Query used to search memory
+ language: Language code ('zh' or 'en')
+ priority: Component priority for selection
+
+ Returns:
+ MemoryComponent instance
+ """
+ from nexent.core.agents.agent_model import MemoryComponent
+
+ memories = []
+ for mem in memory_list:
+ if isinstance(mem, dict):
+ memories.append({
+ "content": mem.get('memory', '') or mem.get('content', ''),
+ "memory_type": mem.get('memory_type', 'user'),
+ "metadata": mem.get('metadata', {}),
+ })
+ elif isinstance(mem, str):
+ memories.append({
+ "content": mem,
+ "memory_type": "user",
+ "metadata": {},
+ })
+
+ formatted_content = _format_memory_context(memory_list, language=language)
+ return MemoryComponent(
+ memories=memories,
+ formatted_content=formatted_content,
+ search_query=search_query,
+ priority=priority,
+ )
+
+
+def build_knowledge_base_component(
+ knowledge_base_summary: str,
+ kb_ids: Optional[List[str]] = None,
+ priority: int = 10,
+) -> "KnowledgeBaseComponent":
+ """Build KnowledgeBaseComponent from knowledge base summary.
+
+ Args:
+ knowledge_base_summary: Summary text from knowledge bases
+ kb_ids: List of knowledge base IDs used
+ priority: Component priority for selection
+
+ Returns:
+ KnowledgeBaseComponent instance
+ """
+ from nexent.core.agents.agent_model import KnowledgeBaseComponent
+
+ return KnowledgeBaseComponent(
+ summary=knowledge_base_summary,
+ kb_ids=kb_ids or [],
+ priority=priority,
+ )
+
+
+def build_managed_agents_component(
+ managed_agents: Dict[str, Any],
+ language: str = "zh",
+ priority: int = 45,
+) -> "ManagedAgentsComponent":
+ """Build ManagedAgentsComponent from managed sub-agent configurations.
+
+ Args:
+ managed_agents: Dict of agent name -> AgentConfig
+ language: Language code ('zh' or 'en')
+ priority: Component priority for selection
+
+ Returns:
+ ManagedAgentsComponent instance
+ """
+ from nexent.core.agents.agent_model import ManagedAgentsComponent
+
+ agent_list = []
+ for name, agent in managed_agents.items():
+ if hasattr(agent, 'description'):
+ agent_dict = {
+ "name": name,
+ "description": agent.description,
+ "tools": [],
+ }
+ if hasattr(agent, 'tools'):
+ agent_dict["tools"] = [t.name for t in agent.tools if hasattr(t, 'name')]
+ else:
+ agent_dict = {
+ "name": name,
+ "description": agent.get('description', ''),
+ "tools": [],
+ }
+ agent_list.append(agent_dict)
+
+ formatted_desc = _format_managed_agents_description(managed_agents, language=language)
+ return ManagedAgentsComponent(
+ agents=agent_list,
+ formatted_description=formatted_desc,
+ priority=priority,
+ )
+
+
+def build_external_agents_component(
+ external_a2a_agents: Dict[str, Any],
+ language: str = "zh",
+ priority: int = 44,
+) -> "ExternalAgentsComponent":
+ """Build ExternalAgentsComponent from external A2A agent configurations.
+
+ Args:
+ external_a2a_agents: Dict of agent_id -> ExternalA2AAgentConfig
+ language: Language code ('zh' or 'en')
+ priority: Component priority for selection
+
+ Returns:
+ ExternalAgentsComponent instance
+ """
+ from nexent.core.agents.agent_model import ExternalAgentsComponent
+
+ agent_list = []
+ for agent_id, agent in external_a2a_agents.items():
+ if hasattr(agent, 'agent_id'):
+ agent_dict = {
+ "agent_id": str(agent.agent_id),
+ "name": agent.name,
+ "description": agent.description,
+ "url": getattr(agent, 'url', ''),
+ }
+ else:
+ agent_dict = {
+ "agent_id": str(agent_id),
+ "name": agent.get('name', ''),
+ "description": agent.get('description', ''),
+ "url": agent.get('url', ''),
+ }
+ agent_list.append(agent_dict)
+
+ formatted_desc = _format_external_agents_description(external_a2a_agents, language=language)
+ return ExternalAgentsComponent(
+ agents=agent_list,
+ formatted_description=formatted_desc,
+ priority=priority,
+ )
+
+
+def build_system_prompt_component(
+ content: str,
+ template_name: Optional[str] = None,
+ priority: int = 100,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent with rendered content.
+
+ Args:
+ content: Rendered system prompt content
+ template_name: Source template name for reference
+ priority: Component priority (highest by default)
+
+ Returns:
+ SystemPromptComponent instance
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ return SystemPromptComponent(
+ content=content,
+ template_name=template_name,
+ priority=priority,
+ )
+
+
+def build_skills_usage_component(
+ skills: List[Dict[str, str]],
+ language: str = "zh",
+ priority: int = 40,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for skills usage requirements.
+
+ This is a skeleton-like component but its content depends on
+ whether skills exist, so it's built dynamically.
+
+ Args:
+ skills: List of skill dicts
+ language: Language code ('zh' or 'en')
+ priority: Component priority
+
+ Returns:
+ SystemPromptComponent instance
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ content = _format_skills_usage_requirements(skills, language=language)
+ return SystemPromptComponent(
+ content=content,
+ template_name="skills_usage",
+ priority=priority,
+ )
+
+
+def build_agent_fallback_component(
+ managed_agents: Dict[str, Any],
+ external_a2a_agents: Dict[str, Any],
+ language: str = "zh",
+ priority: int = 5,
+) -> "SystemPromptComponent":
+ """Build SystemPromptComponent for agent fallback message.
+
+ Only emits content when no agents are available.
+
+ Args:
+ managed_agents: Dict of managed agents
+ external_a2a_agents: Dict of external agents
+ language: Language code
+ priority: Component priority
+
+ Returns:
+ SystemPromptComponent instance (may have empty content)
+ """
+ from nexent.core.agents.agent_model import SystemPromptComponent
+
+ content = _format_agent_fallback(managed_agents, external_a2a_agents, language=language)
+ return SystemPromptComponent(
+ content=content,
+ template_name="agent_fallback",
+ priority=priority,
+ )
+
+
+# =============================================================================
+# SECTION 4: Main assembly function - build_context_components
+# =============================================================================
+
+
+def build_context_components(
+ # Raw params for piecewise assembly (NEW in Goal 3)
+ duty: Optional[str] = None,
+ constraint: Optional[str] = None,
+ few_shots: Optional[str] = None,
+ app_name: Optional[str] = None,
+ app_description: Optional[str] = None,
+ user_id: Optional[str] = None,
+ language: str = "zh",
+ is_manager: bool = True,
+ # Piecewise data sources
+ tools: Optional[Dict[str, Any]] = None,
+ skills: Optional[List[Dict[str, str]]] = None,
+ managed_agents: Optional[Dict[str, Any]] = None,
+ external_a2a_agents: Optional[Dict[str, Any]] = None,
+ memory_list: Optional[List[Any]] = None,
+ memory_search_query: Optional[str] = None,
+ knowledge_base_summary: Optional[str] = None,
+ kb_ids: Optional[List[str]] = None,
+ # Legacy param for fallback (removed short-circuit in Goal 3)
+ system_prompt: Optional[str] = None,
+ # Inclusion flags (kept for backward compatibility)
+ include_tools: bool = True,
+ include_skills: bool = True,
+ include_memory: bool = True,
+ include_knowledge_base: bool = True,
+ include_managed_agents: bool = True,
+ include_external_agents: bool = True,
+ include_app_context: bool = True,
+) -> List["ContextComponent"]:
+ """Build list of ContextComponents from agent configuration data.
+
+ Piecewise assembly: Each semantic section is emitted as a dedicated
+ ContextComponent, assembled in the exact order matching Jinja2 templates.
+
+ Assembly order (12 sections):
+ 1. Header (基本信息)
+ 2. Memory (上下文记忆) - if memory_list exists
+ 3. Duty (核心职责 + 安全准则)
+ 4. Skills (可用技能 + 6步流程) - if skills exist
+ 5. Execution Flow (执行流程 + 输出规范)
+ 6. Tools (可用资源/1. 工具 + 文件链接指南)
+ 7. Managed Agents (可用资源/2. 助手) - if managed_agents exist
+ 8. External Agents (外部助手) - if external_a2a_agents exist
+ 9. Agent Fallback (当前没有可用的助手) - if no agents
+ 10. Skills Usage (可用资源/3. 技能 + 使用要求)
+ 11. Constraint (资源使用要求)
+ 12. Code Norms (python代码规范)
+ 13. Footer (示例模板 + 结尾)
+
+ Note: The a330d815 short-circuit (if system_prompt: return [single])
+ has been REMOVED. All callers must provide raw params for piecewise assembly.
+ The system_prompt param is kept for future fallback use but not currently
+ used in the piecewise path.
+
+ Args:
+ duty: Agent's primary duty text
+ constraint: Resource usage constraint text
+ few_shots: Example templates text
+ app_name: Application name
+ app_description: Application description
+ user_id: Current user ID
+ language: Language code ('zh' or 'en')
+ is_manager: Whether this is a manager agent
+ tools: Dict of tool name -> ToolConfig
+ skills: List of skill dicts with name and description
+ managed_agents: Dict of agent name -> AgentConfig
+ external_a2a_agents: Dict of agent_id -> ExternalA2AAgentConfig
+ memory_list: List of memory search results
+ memory_search_query: Query used to search memory
+ knowledge_base_summary: Summary text from knowledge bases
+ kb_ids: List of knowledge base IDs
+ system_prompt: (Legacy) Pre-rendered system prompt - NOT USED in piecewise path
+ include_*: Flags for backward compatibility
+
+ Returns:
+ List of ContextComponent instances ready for ContextManager
+ """
+ components: List = []
+
+ # 1. Header
+ if app_name and app_description and user_id:
+ components.append(
+ build_skeleton_header_component(
+ app_name=app_name,
+ app_description=app_description,
+ user_id=user_id,
+ language=language,
+ )
+ )
+
+ # 2. Memory (if exists)
+ if include_memory and memory_list:
+ components.append(
+ build_memory_component(
+ memory_list=memory_list,
+ search_query=memory_search_query,
+ language=language,
+ )
+ )
+
+ # 3. Duty + Safety Principles
+ if duty:
+ components.append(
+ build_skeleton_duty_component(
+ duty=duty,
+ language=language,
+ )
+ )
+
+ # 4. Skills (if exists) - includes 6-step process
+ if include_skills and skills:
+ components.append(
+ build_skills_component(
+ skills=skills,
+ language=language,
+ )
+ )
+
+ # 5. Execution Flow
+ components.append(
+ build_skeleton_execution_flow_component(
+ memory_list=memory_list,
+ language=language,
+ is_manager=is_manager,
+ )
+ )
+
+ # 6. Tools + File URL Guide
+ if include_tools and tools:
+ components.append(
+ build_tools_component(
+ tools=tools,
+ knowledge_base_summary=knowledge_base_summary,
+ language=language,
+ is_manager=is_manager,
+ )
+ )
+
+ # 7. Managed Agents (if exists) - manager only
+ if is_manager and include_managed_agents and managed_agents:
+ components.append(
+ build_managed_agents_component(
+ managed_agents=managed_agents,
+ language=language,
+ )
+ )
+
+ # 8. External Agents (if exists) - manager only
+ if is_manager and include_external_agents and external_a2a_agents:
+ components.append(
+ build_external_agents_component(
+ external_a2a_agents=external_a2a_agents,
+ language=language,
+ )
+ )
+
+ # 9. Agent Fallback (if no agents available) - manager only
+ if is_manager and not managed_agents and not external_a2a_agents:
+ fallback_comp = build_agent_fallback_component(
+ managed_agents=managed_agents or {},
+ external_a2a_agents=external_a2a_agents or {},
+ language=language,
+ )
+ if fallback_comp.content: # Only add if has content
+ components.append(fallback_comp)
+
+ # 10. Skills Usage Requirements
+ if include_skills:
+ components.append(
+ build_skills_usage_component(
+ skills=skills or [],
+ language=language,
+ )
+ )
+
+ # 11. Constraint
+ if constraint:
+ components.append(
+ build_skeleton_constraint_component(
+ constraint=constraint,
+ language=language,
+ )
+ )
+
+ # 12. Code Norms
+ components.append(
+ build_skeleton_code_norms_component(
+ language=language,
+ is_manager=is_manager,
+ )
+ )
+
+ # 13. Footer
+ if few_shots:
+ components.append(
+ build_skeleton_footer_component(
+ few_shots=few_shots,
+ language=language,
+ )
+ )
+
+ return components
+
+
+def build_app_context_string(
+ app_name: str,
+ app_description: str,
+ user_id: str,
+) -> str:
+ """Build app context string for template injection.
+
+ Args:
+ app_name: Application name
+ app_description: Application description
+ user_id: Current user ID
+
+ Returns:
+ Formatted app context string
+ """
+ return _format_app_context(app_name, app_description, user_id)
diff --git a/backend/utils/file_management_utils.py b/backend/utils/file_management_utils.py
index 7d31a74bb..83c3957e7 100644
--- a/backend/utils/file_management_utils.py
+++ b/backend/utils/file_management_utils.py
@@ -2,6 +2,7 @@
import logging
import os
import subprocess
+import time
import traceback
from pathlib import Path
from typing import List
@@ -15,7 +16,6 @@
from consts.model import ProcessParams
from database.attachment_db import get_file_size_from_minio
from utils.auth_utils import get_current_user_id
-from utils.config_utils import tenant_config_manager
logger = logging.getLogger("file_management_utils")
@@ -45,18 +45,13 @@ async def trigger_data_process(files: List[dict], process_params: ProcessParams)
if not files:
return None
- # Get chunking size according to the embedding model
- embedding_model_id = None
+ # Get tenant_id from authorization for downstream task processing
+ embedding_model_id = process_params.model_id
tenant_id = None
try:
_, tenant_id = get_current_user_id(process_params.authorization)
- # Get embedding model ID from tenant config
- tenant_config = tenant_config_manager.load_config(tenant_id)
- embedding_model_id_str = tenant_config.get("EMBEDDING_ID") if tenant_config else None
- if embedding_model_id_str:
- embedding_model_id = int(embedding_model_id_str)
except Exception as e:
- logger.warning(f"Failed to get embedding model ID for tenant: {e}")
+ logger.warning(f"Failed to get tenant_id from authorization: {e}")
# Build headers with authorization
headers = {
@@ -134,19 +129,23 @@ async def trigger_data_process(files: List[dict], process_params: ProcessParams)
async def get_all_files_status(index_name: str):
"""
- Get status for all files according to index_name, matching corresponding tasks,
+ Get status for all files according to index_name, matching corresponding tasks,
and then convert to custom state
-
+
Args:
index_name: Index name to filter tasks
-
+
Returns:
Dictionary with path_or_url as keys and dict values: {state, latest_task_id}
"""
+ start_time = time.time()
try:
try:
async with httpx.AsyncClient() as client:
response = await client.get(f"{DATA_PROCESS_SERVICE}/tasks/indices/{index_name}", timeout=10.0)
+ http_duration = time.time() - start_time
+ logger.info(f"[get_all_files_status] HTTP request to {DATA_PROCESS_SERVICE}/tasks/indices/{index_name} "
+ f"completed in {http_duration:.3f}s, status={response.status_code}")
if response.status_code == 200:
tasks_list = response.json()
else:
@@ -214,41 +213,46 @@ async def get_all_files_status(index_name: str):
file_state['total_chunks'] = task_info.get(
'total_chunks', file_state.get('total_chunks'))
result = {}
+ # Use local fallback logic for state conversion (avoiding HTTP call to external service)
+ # The conversion logic is simple and can be done locally
+ step_local_start = time.time()
+
+ # Batch fetch progress info from Redis for all task_ids (single round-trip)
+ redis_progress_batch = {}
+ if file_states:
+ try:
+ from services.redis_service import get_redis_service
+ redis_service = get_redis_service()
+ all_task_ids = [fs.get('latest_task_id', '') for fs in file_states.values()]
+ all_task_ids = [tid for tid in all_task_ids if tid]
+ if all_task_ids:
+ redis_progress_batch = redis_service.batch_get_progress_info(all_task_ids) or {}
+ except Exception as e:
+ logger.debug(f"Failed to batch get Redis progress info: {e}")
+
for path_or_url, file_state in file_states.items():
- # Call remote state conversion API so this service no longer depends on Celery
- custom_state = await _convert_to_custom_state(
+ custom_state = _convert_to_custom_state_local(
process_celery_state=file_state['process_state'] or '',
forward_celery_state=file_state['forward_state'] or ''
)
- # Try to get progress from Redis - always check Redis for real-time progress
- # especially when task is in progress (FORWARDING or PROCESSING)
+
+ # Get progress from pre-fetched batch Redis data
processed_chunks = file_state.get('processed_chunks')
total_chunks = file_state.get('total_chunks')
task_id = file_state['latest_task_id'] or ''
- # Always try to get latest progress from Redis if task_id exists
- # Redis has the most up-to-date progress during vectorization
- if task_id:
- try:
- from services.redis_service import get_redis_service
- redis_service = get_redis_service()
- progress_info = redis_service.get_progress_info(task_id)
- if progress_info:
- # Use Redis progress as primary source (it's updated in real-time)
- redis_processed = progress_info.get('processed_chunks')
- redis_total = progress_info.get('total_chunks')
- if redis_processed is not None:
- processed_chunks = redis_processed
- if redis_total is not None:
- total_chunks = redis_total
- logger.debug(
- f"Retrieved progress from Redis for task {task_id}: {processed_chunks}/{total_chunks}")
- else:
- logger.debug(
- f"No progress info in Redis for task {task_id}, using task state values: {processed_chunks}/{total_chunks}")
- except Exception as e:
+ # Use pre-fetched batch Redis data for progress
+ if task_id and task_id in redis_progress_batch:
+ progress_info = redis_progress_batch.get(task_id)
+ if progress_info:
+ redis_processed = progress_info.get('processed_chunks')
+ redis_total = progress_info.get('total_chunks')
+ if redis_processed is not None:
+ processed_chunks = redis_processed
+ if redis_total is not None:
+ total_chunks = redis_total
logger.debug(
- f"Failed to get progress from Redis for task {task_id}: {str(e)}")
+ f"Retrieved progress from batch Redis for task {task_id}: {processed_chunks}/{total_chunks}")
result[path_or_url] = {
'state': custom_state,
@@ -259,41 +263,26 @@ async def get_all_files_status(index_name: str):
'processed_chunks': processed_chunks,
'total_chunks': total_chunks,
}
+ step_local_duration = time.time() - step_local_start
+ logger.info(f"[get_all_files_status] Local processing: {len(result)} files in {step_local_duration:.3f}s")
+ total_duration = time.time() - start_time
+ logger.info(f"[get_all_files_status] Complete: {len(result)} files processed in {total_duration:.3f}s")
return result
except Exception as e:
logger.error(f"Error getting all files status for index {index_name}, details: {str(e)} {traceback.format_exc()}")
return {} # Return empty dict on error
-async def _convert_to_custom_state(process_celery_state: str, forward_celery_state: str) -> str:
- """Delegates Celery-state conversion to the data-process service.
-
- This removes the direct dependency on the *celery* package for callers of
- `file_management_utils`.
+def _convert_to_custom_state_local(process_celery_state: str, forward_celery_state: str) -> str:
+ """
+ Local state conversion logic - handles all known Celery states.
+ Returns "UNKNOWN" only if the states are not recognized.
"""
- try:
- payload = {
- "process_state": process_celery_state,
- "forward_state": forward_celery_state,
- }
-
- async with httpx.AsyncClient() as client:
- response = await client.post(f"{DATA_PROCESS_SERVICE}/tasks/convert_state", json=payload, timeout=5.0)
-
- if response.status_code == 200:
- return response.json().get("state", "WAIT_FOR_PROCESSING")
- else:
- logger.warning(
- "State conversion service error: %s - %s", response.status_code, response.text
- )
- except Exception as e:
- logger.warning("Failed to convert state via service: %s", str(e))
-
- # Fallback mapping without Celery dependency (string comparison only)
success = "SUCCESS"
failure = "FAILURE"
pending = "PENDING"
started = "STARTED"
+ unknown = "UNKNOWN"
if process_celery_state == failure:
return "PROCESS_FAILED"
@@ -304,6 +293,11 @@ async def _convert_to_custom_state(process_celery_state: str, forward_celery_sta
if not process_celery_state and not forward_celery_state:
return "WAIT_FOR_PROCESSING"
+ # Check if states are known Celery states
+ known_states = {success, failure, pending, started, ""}
+ if process_celery_state not in known_states or forward_celery_state not in known_states:
+ return unknown
+
forward_state_map = {
pending: "WAIT_FOR_FORWARDING",
started: "FORWARDING",
diff --git a/backend/utils/http_client_utils.py b/backend/utils/http_client_utils.py
new file mode 100644
index 000000000..fd215c067
--- /dev/null
+++ b/backend/utils/http_client_utils.py
@@ -0,0 +1,22 @@
+"""HTTP client factory utilities shared across services."""
+
+import httpx
+from httpx import AsyncClient
+
+
+def create_httpx_client(
+ headers: dict[str, str] | None = None,
+ timeout: httpx.Timeout | None = None,
+ auth: httpx.Auth | None = None,
+ follow_redirects: bool = True,
+ **extra_kwargs,
+) -> AsyncClient:
+ return AsyncClient(
+ headers=headers,
+ timeout=timeout,
+ auth=auth,
+ follow_redirects=follow_redirects,
+ trust_env=False,
+ verify=False,
+ **extra_kwargs,
+ )
diff --git a/backend/utils/llm_utils.py b/backend/utils/llm_utils.py
index d1aa6fcf3..f7caba37d 100644
--- a/backend/utils/llm_utils.py
+++ b/backend/utils/llm_utils.py
@@ -6,6 +6,7 @@
from consts.exceptions import AppException
from database.model_management_db import get_model_by_model_id
from nexent.core.models import OpenAIModel
+from nexent.monitor import set_monitoring_context, set_monitoring_operation
from utils.config_utils import get_model_name_from_config
logger = logging.getLogger("llm_utils")
@@ -66,6 +67,14 @@ def call_llm_for_system_prompt(
"""
llm_model_config = get_model_by_model_id(model_id=model_id, tenant_id=tenant_id)
+ display_name = llm_model_config.get("display_name", "") if llm_model_config else ""
+ if tenant_id:
+ set_monitoring_context(tenant_id=tenant_id)
+ set_monitoring_operation("system_prompt_generation",
+ display_name=display_name or None)
+
+ timeout_seconds = llm_model_config.get("timeout_seconds") if llm_model_config else None
+
llm = OpenAIModel(
model_id=get_model_name_from_config(llm_model_config) if llm_model_config else "",
api_base=llm_model_config.get("base_url", "") if llm_model_config else "",
@@ -74,6 +83,8 @@ def call_llm_for_system_prompt(
top_p=0.95,
model_factory=llm_model_config.get("model_factory") if llm_model_config else None,
ssl_verify=llm_model_config.get("ssl_verify", True) if llm_model_config else True,
+ display_name=display_name or None,
+ timeout_seconds=timeout_seconds,
)
messages = [
{"role": MESSAGE_ROLE["SYSTEM"], "content": system_prompt},
@@ -92,9 +103,21 @@ def call_llm_for_system_prompt(
reasoning_content_seen = False
content_tokens_seen = 0
for chunk in current_request:
- delta = chunk.choices[0].delta
+ choices = getattr(chunk, "choices", None)
+ if choices is None:
+ logger.warning("Received non-standard chunk without choices during prompt generation.")
+ continue
+ if not choices:
+ logger.debug("Received empty choices chunk during prompt generation; skipping.")
+ continue
+
+ delta = getattr(choices[0], "delta", None)
+ if delta is None:
+ logger.debug("Skipping LLM stream chunk without delta")
+ continue
+
reasoning_content = getattr(delta, "reasoning_content", None)
- new_token = delta.content
+ new_token = getattr(delta, "content", None)
# Note: reasoning_content is separate metadata and doesn't affect content filtering
# We only filter content based on tags in delta.content
diff --git a/backend/utils/memory_utils.py b/backend/utils/memory_utils.py
index ada7019a1..e3ba01d6d 100644
--- a/backend/utils/memory_utils.py
+++ b/backend/utils/memory_utils.py
@@ -1,4 +1,5 @@
import logging
+import re
from typing import Dict, Any
from urllib.parse import urlparse
@@ -9,6 +10,11 @@
logger = logging.getLogger("memory_utils")
+def _sanitize_index_component(value: str) -> str:
+ """Convert arbitrary text into an Elasticsearch-safe index component."""
+ return re.sub(r"[^a-z0-9_.-]", "_", value.lower())
+
+
def build_memory_config(tenant_id: str) -> Dict[str, Any]:
"""Return a fully-validated configuration dictionary for *mem0* ``Memory``.
"""
@@ -30,9 +36,8 @@ def build_memory_config(tenant_id: str) -> Dict[str, Any]:
es_host = f"{parsed.scheme}://{parsed.hostname}"
es_port = parsed.port
# Normalize repo/name to avoid problematic characters in index names
- safe_repo = embed_raw["model_repo"].lower().replace(
- "/", "_") if embed_raw["model_repo"] else ""
- safe_name = embed_raw["model_name"].lower().replace("/", "_")
+ safe_repo = _sanitize_index_component(embed_raw["model_repo"]) if embed_raw["model_repo"] else ""
+ safe_name = _sanitize_index_component(embed_raw["model_name"])
index_name = (
f"mem0_{safe_repo}_{safe_name}_{embed_raw['max_tokens']}"
if embed_raw["model_repo"]
@@ -73,4 +78,4 @@ def build_memory_config(tenant_id: str) -> Dict[str, Any]:
},
"telemetry": {"enabled": False},
}
- return memory_config
\ No newline at end of file
+ return memory_config
diff --git a/backend/utils/monitoring.py b/backend/utils/monitoring.py
index eb20d88ec..e6da57041 100644
--- a/backend/utils/monitoring.py
+++ b/backend/utils/monitoring.py
@@ -2,12 +2,12 @@
Global Monitoring Manager for Backend
This module initializes and configures the global monitoring manager instance
-with backend environment variables. All other backend modules should import
-`monitoring_manager` directly from this module.
+with backend environment variables using OTLP protocol. All other backend modules
+should import `monitoring_manager` directly from this module.
Usage:
from utils.monitoring import monitoring_manager
-
+
@monitoring_manager.monitor_endpoint("my_service.my_function")
async def my_function():
return {"status": "ok"}
@@ -17,67 +17,88 @@ async def my_function():
MonitoringConfig,
get_monitoring_manager
)
-# Import configuration from backend (support both relative and absolute imports)
try:
- # Try relative import first (when running from backend directory)
from consts.const import (
ENABLE_TELEMETRY,
- SERVICE_NAME,
- JAEGER_ENDPOINT,
- PROMETHEUS_PORT,
- TELEMETRY_SAMPLE_RATE,
- LLM_SLOW_REQUEST_THRESHOLD_SECONDS,
- LLM_SLOW_TOKEN_RATE_THRESHOLD
+ MONITORING_PROVIDER,
+ MONITORING_PROJECT_NAME,
+ OTEL_SERVICE_NAME,
+ OTEL_EXPORTER_OTLP_ENDPOINT,
+ OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
+ OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
+ OTEL_EXPORTER_OTLP_PROTOCOL,
+ OTEL_EXPORTER_OTLP_METRICS_ENABLED,
+ MONITORING_INSTRUMENT_REQUESTS,
+ MONITORING_FASTAPI_INCLUDED_URLS,
+ MONITORING_FASTAPI_EXCLUDED_URLS,
+ MONITORING_FASTAPI_EXCLUDE_SPANS,
+ MONITORING_TRACE_CONTENT_MODE,
+ MONITORING_TRACE_MAX_CHARS,
+ MONITORING_TRACE_MAX_ITEMS,
+ OTLP_HEADERS,
+ TELEMETRY_SAMPLE_RATE
)
except ImportError:
- # Fallback to absolute import (when running from project root)
from backend.consts.const import (
ENABLE_TELEMETRY,
- SERVICE_NAME,
- JAEGER_ENDPOINT,
- PROMETHEUS_PORT,
- TELEMETRY_SAMPLE_RATE,
- LLM_SLOW_REQUEST_THRESHOLD_SECONDS,
- LLM_SLOW_TOKEN_RATE_THRESHOLD
+ MONITORING_PROVIDER,
+ MONITORING_PROJECT_NAME,
+ OTEL_SERVICE_NAME,
+ OTEL_EXPORTER_OTLP_ENDPOINT,
+ OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
+ OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
+ OTEL_EXPORTER_OTLP_PROTOCOL,
+ OTEL_EXPORTER_OTLP_METRICS_ENABLED,
+ MONITORING_INSTRUMENT_REQUESTS,
+ MONITORING_FASTAPI_INCLUDED_URLS,
+ MONITORING_FASTAPI_EXCLUDED_URLS,
+ MONITORING_FASTAPI_EXCLUDE_SPANS,
+ MONITORING_TRACE_CONTENT_MODE,
+ MONITORING_TRACE_MAX_CHARS,
+ MONITORING_TRACE_MAX_ITEMS,
+ OTLP_HEADERS,
+ TELEMETRY_SAMPLE_RATE
)
import logging
logger = logging.getLogger(__name__)
-# ============================================================================
-# Global Monitoring Manager Instance
-# ============================================================================
-
-# Get the global monitoring manager instance
monitoring_manager = get_monitoring_manager()
-# Initialize monitoring configuration immediately when this module is imported
-
def _initialize_monitoring():
- """Initialize monitoring configuration with backend environment variables."""
+ """Initialize monitoring configuration with OTLP settings."""
config = MonitoringConfig(
enable_telemetry=ENABLE_TELEMETRY,
- service_name=SERVICE_NAME,
- jaeger_endpoint=JAEGER_ENDPOINT,
- prometheus_port=PROMETHEUS_PORT,
+ service_name=OTEL_SERVICE_NAME,
+ provider=MONITORING_PROVIDER or "otlp",
+ otlp_endpoint=OTEL_EXPORTER_OTLP_ENDPOINT,
+ otlp_traces_endpoint=OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or None,
+ otlp_metrics_endpoint=OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or None,
+ otlp_protocol=OTEL_EXPORTER_OTLP_PROTOCOL,
+ otlp_headers=OTLP_HEADERS,
+ export_metrics=OTEL_EXPORTER_OTLP_METRICS_ENABLED,
+ instrument_requests=MONITORING_INSTRUMENT_REQUESTS,
+ fastapi_included_urls=MONITORING_FASTAPI_INCLUDED_URLS,
+ fastapi_excluded_urls=MONITORING_FASTAPI_EXCLUDED_URLS,
+ fastapi_exclude_spans=MONITORING_FASTAPI_EXCLUDE_SPANS,
+ project_name=MONITORING_PROJECT_NAME or None,
telemetry_sample_rate=TELEMETRY_SAMPLE_RATE,
- llm_slow_request_threshold_seconds=LLM_SLOW_REQUEST_THRESHOLD_SECONDS,
- llm_slow_token_rate_threshold=LLM_SLOW_TOKEN_RATE_THRESHOLD
+ trace_content_mode=MONITORING_TRACE_CONTENT_MODE,
+ trace_max_chars=MONITORING_TRACE_MAX_CHARS,
+ trace_max_items=MONITORING_TRACE_MAX_ITEMS
)
- # Configure the SDK monitoring system using the singleton
monitoring_manager.configure(config)
logger.info(
- f"Global monitoring initialized: service_name={SERVICE_NAME}, enable_telemetry={ENABLE_TELEMETRY}")
+ f"OTLP monitoring initialized: service_name={OTEL_SERVICE_NAME}, "
+ f"enable_telemetry={config.enable_telemetry}, provider={config.provider}, "
+ f"endpoint={config.otlp_endpoint}, trace_endpoint={config.get_trace_endpoint()}, "
+ f"protocol={OTEL_EXPORTER_OTLP_PROTOCOL}"
+ )
-# Initialize monitoring when module is imported
_initialize_monitoring()
-
-# Export the global monitoring manager instance
-__all__ = [
- 'monitoring_manager'
-]
+__all__ = ['monitoring_manager']
diff --git a/backend/utils/nacos_client.py b/backend/utils/nacos_client.py
new file mode 100644
index 000000000..0fa87410a
--- /dev/null
+++ b/backend/utils/nacos_client.py
@@ -0,0 +1,624 @@
+"""
+Nacos Client for service discovery.
+
+Provides functionality to query service instances from Nacos service registry.
+Used by A2A agent discovery to find external A2A agents registered in Nacos.
+"""
+import logging
+from typing import Any, Dict, Optional
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+class NacosClientError(Exception):
+ """Base exception for Nacos client errors."""
+ pass
+
+
+class NacosConnectionError(NacosClientError):
+ """Raised when connection to Nacos fails."""
+ pass
+
+
+class NacosServiceNotFoundError(NacosClientError):
+ """Raised when the requested service is not found in Nacos."""
+ pass
+
+
+class NacosClient:
+ """Async client for Nacos service registry operations.
+
+ Provides methods to query service instances for A2A agent discovery.
+ """
+
+ def __init__(
+ self,
+ nacos_addr: str,
+ username: Optional[str] = None,
+ password: Optional[str] = None
+ ):
+ """Initialize Nacos client.
+
+ Args:
+ nacos_addr: Nacos server address (e.g., http://nacos-server:8848).
+ username: Optional Nacos username for authentication.
+ password: Optional Nacos password for authentication.
+ """
+ self.nacos_addr = nacos_addr.rstrip("/")
+ self.username = username
+ self.password = password
+ self._session: Optional[aiohttp.ClientSession] = None
+ self._access_token: Optional[str] = None
+
+ async def _get_session(self) -> aiohttp.ClientSession:
+ """Get or create an aiohttp session."""
+ if self._session is None or self._session.closed:
+ timeout = aiohttp.ClientTimeout(total=30)
+ self._session = aiohttp.ClientSession(timeout=timeout)
+ return self._session
+
+ async def close(self) -> None:
+ """Close the client session."""
+ if self._session and not self._session.closed:
+ await self._session.close()
+ self._session = None
+
+ def _build_auth_params(self) -> Dict[str, str]:
+ """Build authentication parameters for Nacos API requests."""
+ params = {}
+ if self.username:
+ params["username"] = self.username
+ if self.password:
+ params["password"] = self.password
+ return params
+
+ async def query_a2a_agent(
+ self,
+ agent_name: str,
+ namespace: str = "public"
+ ) -> Optional[Dict[str, Any]]:
+ """Query A2A agent info from Nacos using the dedicated A2A endpoint.
+
+ Args:
+ agent_name: The name of the A2A agent to query.
+ namespace: Nacos namespace ID (defaults to "public").
+
+ Returns:
+ A dict containing agent information:
+ - agent_name: Agent name
+ - agent_url: A2A agent endpoint URL
+ - metadata: Additional metadata
+ Or None if no agent is found.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ params = self._build_auth_params()
+ agent_name = agent_name.strip()
+ params["agentName"] = agent_name
+ params["namespaceId"] = namespace.strip() if namespace else "public"
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ai/a2a"
+
+ try:
+ session = await self._get_session()
+ async with session.get(url, params=params) as response:
+ text = await response.text()
+
+ if response.status == 200:
+ data = await response.json()
+ return self._parse_a2a_response(data, agent_name)
+ elif response.status == 404:
+ logger.warning(
+ f"A2A agent '{agent_name}' not found in Nacos namespace '{namespace}'"
+ )
+ return None
+ else:
+ raise NacosConnectionError(
+ f"Nacos A2A API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}")
+ raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e
+
+ def _parse_a2a_response(
+ self,
+ response_data: Dict[str, Any],
+ agent_name: str
+ ) -> Optional[Dict[str, Any]]:
+ """Parse Nacos A2A agent response.
+
+ Args:
+ response_data: Response data from Nacos A2A API.
+ agent_name: Agent name for logging.
+
+ Returns:
+ Agent info dict or None if no agent found.
+ """
+ if response_data.get("code") != 0:
+ msg = response_data.get("message", "unknown error")
+ logger.warning(f"Nacos A2A API error for '{agent_name}': {msg}")
+ return None
+
+ data = response_data.get("data")
+ if not data:
+ logger.info(f"No A2A agent data found for '{agent_name}'")
+ return None
+
+ logger.info(f"[Nacos A2A Parse] Found agent: {data}")
+ return data
+
+ async def query_service_instance(
+ self,
+ service_name: str,
+ namespace: str = "public",
+ clusters: Optional[str] = None,
+ healthy_only: bool = False,
+ group_name: str = "DEFAULT_GROUP"
+ ) -> Optional[Dict[str, Any]]:
+ """Query service instance(s) from Nacos using v3 client API.
+
+ Args:
+ service_name: The name of the service to query.
+ namespace: Nacos namespace ID (defaults to "public").
+ clusters: Comma-separated cluster names (optional).
+ healthy_only: If True, only return healthy instances.
+ group_name: Nacos group name (defaults to "DEFAULT_GROUP").
+
+ Returns:
+ A dict containing instance information with keys:
+ - ip: Instance IP address
+ - port: Instance port
+ - metadata: Instance metadata dict (may contain 'a2a_card_url')
+ Or None if no instance is found.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ NacosServiceNotFoundError: If the service does not exist.
+ """
+ params = self._build_auth_params()
+ service_name = service_name.strip()
+ params["serviceName"] = service_name
+ params["namespaceId"] = namespace.strip() if namespace else "public"
+ params["groupName"] = group_name
+ if clusters:
+ params["clusterName"] = clusters
+ if healthy_only:
+ params["healthyOnly"] = "true"
+
+ url = f"{self.nacos_addr}/nacos/v3/client/ns/instance/list"
+
+ logger.info(
+ f"[Nacos Query] URL: {url}, params: "
+ f"serviceName='{service_name}', namespaceId='{namespace}', groupName='{group_name}'"
+ )
+
+ try:
+ session = await self._get_session()
+ async with session.get(url, params=params) as response:
+ text = await response.text()
+ logger.info(
+ f"[Nacos Response] status={response.status}, "
+ f"body_len={len(text)}, body={text[:300]}"
+ )
+
+ if response.status == 200:
+ data = await response.json()
+ return self._parse_v3_instance_response(data, service_name)
+ elif response.status == 404:
+ logger.warning(
+ f"Service '{service_name}' not found in Nacos namespace '{namespace}'"
+ )
+ return None
+ else:
+ raise NacosConnectionError(
+ f"Nacos API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}")
+ raise NacosConnectionError(f"Failed to connect to Nacos: {e}") from e
+
+ def _parse_v3_instance_response(
+ self,
+ response_data: Dict[str, Any],
+ service_name: str
+ ) -> Optional[Dict[str, Any]]:
+ """Parse Nacos v3 client API instance list response.
+
+ Nacos v3 API returns: { "code": 0, "message": "success", "data": [...] }
+
+ Args:
+ response_data: Response data from Nacos v3 API.
+ service_name: Service name for fallback metadata.
+
+ Returns:
+ First instance as a dict or None if no instances exist.
+ """
+ if response_data.get("code") != 0:
+ msg = response_data.get("message", "unknown error")
+ logger.warning(f"Nacos API error for '{service_name}': {msg}")
+ return None
+
+ data = response_data.get("data")
+ if data is None:
+ logger.info(f"[Nacos Parse] No data field in response for service '{service_name}'")
+ return None
+
+ hosts = data if isinstance(data, list) else []
+ logger.info(f"[Nacos Parse] Found {len(hosts)} instances for service '{service_name}'")
+
+ if not hosts:
+ logger.info(f"[Nacos Parse] No hosts found for service '{service_name}'")
+ return None
+
+ for instance in hosts:
+ instance_data = {
+ "ip": instance.get("ip"),
+ "port": instance.get("port"),
+ "healthy": instance.get("healthy", False),
+ "weight": instance.get("weight", 1.0),
+ "enabled": instance.get("enabled", True),
+ "metadata": instance.get("metadata") or {}
+ }
+
+ if instance_data["enabled"] and instance_data.get("healthy", False):
+ logger.info(
+ f"[Nacos Parse] Found healthy instance for '{service_name}': "
+ f"{instance_data['ip']}:{instance_data['port']}"
+ )
+ return instance_data
+
+ first_instance = hosts[0]
+ logger.info(
+ f"[Nacos Parse] No healthy instance found, returning first instance for '{service_name}': "
+ f"{first_instance.get('ip')}:{first_instance.get('port')}"
+ )
+ return {
+ "ip": first_instance.get("ip"),
+ "port": first_instance.get("port"),
+ "healthy": first_instance.get("healthy", False),
+ "weight": first_instance.get("weight", 1.0),
+ "enabled": first_instance.get("enabled", True),
+ "metadata": first_instance.get("metadata") or {}
+ }
+
+ def _parse_instance_response(
+ self,
+ data: Dict[str, Any],
+ service_name: str
+ ) -> Optional[Dict[str, Any]]:
+ """Parse Nacos instance list response (v1 API legacy format).
+
+ Args:
+ data: Response data from Nacos /instance/list API.
+ service_name: Service name for fallback metadata.
+
+ Returns:
+ First instance as a dict or None if no instances exist.
+ """
+ hosts = data.get("hosts") or []
+
+ if not hosts:
+ logger.debug(f"No hosts found for service '{service_name}'")
+ return None
+
+ for instance in hosts:
+ instance_data = {
+ "ip": instance.get("ip"),
+ "port": instance.get("port"),
+ "healthy": instance.get("healthy", False),
+ "weight": instance.get("weight", 1.0),
+ "enabled": instance.get("enabled", True),
+ "metadata": instance.get("metadata") or {}
+ }
+
+ if instance_data["enabled"] and instance_data.get("healthy", False):
+ logger.debug(
+ f"Found healthy instance for '{service_name}': "
+ f"{instance_data['ip']}:{instance_data['port']}"
+ )
+ return instance_data
+
+ first_instance = hosts[0]
+ return {
+ "ip": first_instance.get("ip"),
+ "port": first_instance.get("port"),
+ "healthy": first_instance.get("healthy", False),
+ "weight": first_instance.get("weight", 1.0),
+ "enabled": first_instance.get("enabled", True),
+ "metadata": first_instance.get("metadata") or {}
+ }
+
+ async def list_services(
+ self,
+ namespace: str = "public",
+ page_no: int = 1,
+ page_size: int = 100,
+ group_name: str = "DEFAULT_GROUP"
+ ) -> Dict[str, Any]:
+ """List all services in a namespace using v3 Admin API.
+
+ Args:
+ namespace: Nacos namespace ID (defaults to "public").
+ page_no: Page number (1-indexed).
+ page_size: Number of services per page.
+ group_name: Group name filter (defaults to "DEFAULT_GROUP").
+
+ Returns:
+ Dict containing:
+ - count: Total number of services
+ - services: List of service names
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ session = await self._get_session()
+ access_token = None
+ if self.username and self.password:
+ access_token = await self._get_access_token(session)
+ if not access_token:
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+
+ params = {
+ "pageNo": page_no,
+ "pageSize": page_size,
+ "namespaceId": namespace,
+ "groupName": group_name
+ }
+ headers = {}
+ if access_token:
+ headers["AccessToken"] = access_token
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ns/service"
+
+ try:
+ async with session.get(url, params=params, headers=headers) as response:
+ if response.status == 200:
+ data = await response.json()
+ if data.get("code") == 0:
+ return {
+ "count": data.get("data", {}).get("count", 0),
+ "services": data.get("data", {}).get("doms", [])
+ }
+ elif data.get("code") == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ raise NacosConnectionError(
+ f"Nacos API error: {data.get('message', 'unknown')}"
+ )
+ elif response.status == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ text = await response.text()
+ raise NacosConnectionError(
+ f"Nacos API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to list services from Nacos: {e}")
+ raise NacosConnectionError(f"Failed to list services from Nacos: {e}") from e
+
+ async def get_service_detail(
+ self,
+ service_name: str,
+ namespace: str = "public",
+ group_name: str = "DEFAULT_GROUP"
+ ) -> Optional[Dict[str, Any]]:
+ """Get detailed information about a service using v3 Admin API.
+
+ Args:
+ service_name: The name of the service.
+ namespace: Nacos namespace ID (defaults to "public").
+ group_name: Nacos group name (defaults to "DEFAULT_GROUP").
+
+ Returns:
+ Service detail dict or None if not found.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ session = await self._get_session()
+ access_token = None
+ if self.username and self.password:
+ access_token = await self._get_access_token(session)
+ if not access_token:
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+
+ params = {
+ "serviceName": service_name,
+ "namespaceId": namespace,
+ "groupName": group_name
+ }
+ headers = {}
+ if access_token:
+ headers["AccessToken"] = access_token
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ns/service"
+
+ try:
+ async with session.get(url, params=params, headers=headers) as response:
+ if response.status == 200:
+ data = await response.json()
+ if data.get("code") == 0:
+ return data.get("data")
+ elif data.get("code") == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ msg = data.get("message", "")
+ if "not found" in msg.lower() or "not exist" in msg.lower():
+ return None
+ raise NacosConnectionError(
+ f"Nacos API error: {msg}"
+ )
+ elif response.status == 404:
+ return None
+ elif response.status == 403:
+ self._clear_access_token()
+ raise NacosConnectionError("Authentication failed. Please check username and password.")
+ else:
+ text = await response.text()
+ raise NacosConnectionError(
+ f"Nacos API returned status {response.status}: {text}"
+ )
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to get service detail from Nacos: {e}")
+ raise NacosConnectionError(
+ f"Failed to get service detail from Nacos: {e}"
+ ) from e
+
+ async def check_health(
+ self,
+ host: str,
+ port: int,
+ namespace: str = "public"
+ ) -> bool:
+ """Check if an instance is healthy.
+
+ Args:
+ host: Instance IP address.
+ port: Instance port.
+ namespace: Nacos namespace ID.
+
+ Returns:
+ True if the instance is healthy, False otherwise.
+
+ Raises:
+ NacosConnectionError: If connection to Nacos fails.
+ """
+ params = self._build_auth_params()
+ params["serviceName"] = "__nacos^naming*"
+ params["ip"] = host
+ params["port"] = port
+ params["namespaceId"] = namespace
+
+ url = f"{self.nacos_addr}/nacos/v1/ns/instance/health"
+
+ try:
+ session = await self._get_session()
+ async with session.get(url, params=params) as response:
+ if response.status == 200:
+ text = await response.text()
+ return text.lower() == "ok"
+ return False
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to check instance health: {e}")
+ return False
+
+ async def test_connectivity(
+ self,
+ namespace: str = "public"
+ ) -> Dict[str, Any]:
+ """Test connectivity to the Nacos server.
+
+ Args:
+ namespace: Nacos namespace ID to test connectivity with.
+
+ Returns:
+ Dict containing:
+ - success: Whether the connection was successful
+ - message: Human-readable message about the result
+ """
+ try:
+ session = await self._get_session()
+
+ access_token = None
+ if self.username and self.password:
+ access_token = await self._get_access_token(session)
+ if not access_token:
+ return {
+ "success": False,
+ "message": "Authentication failed. Please check username and password."
+ }
+
+ url = f"{self.nacos_addr}/nacos/v3/admin/ns/ops/metrics"
+ headers = {}
+ if access_token:
+ headers["AccessToken"] = access_token
+
+ async with session.get(url, headers=headers) as response:
+ if response.status == 200:
+ data = await response.json()
+ if data.get("code") == 0:
+ return {
+ "success": True,
+ "message": "Successfully connected to Nacos server"
+ }
+ else:
+ return {
+ "success": False,
+ "message": f"Nacos API error: {data.get('message', 'unknown')}"
+ }
+ elif response.status == 403:
+ return {
+ "success": False,
+ "message": "Authentication failed. Please check username and password."
+ }
+ else:
+ text = await response.text()
+ return {
+ "success": False,
+ "message": f"Nacos server returned status {response.status}: {text}"
+ }
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to connect to Nacos at {self.nacos_addr}: {e}")
+ return {
+ "success": False,
+ "message": f"Failed to connect to Nacos server: {e}"
+ }
+
+ async def _get_access_token(self, session: aiohttp.ClientSession) -> Optional[str]:
+ """Get access token from Nacos authentication endpoint with caching.
+
+ Args:
+ session: aiohttp session to use for the request.
+
+ Returns:
+ Access token string if authentication successful, None otherwise.
+ """
+ if self._access_token:
+ return self._access_token
+
+ try:
+ url = f"{self.nacos_addr}/nacos/v1/auth/login"
+ form_data = aiohttp.FormData()
+ form_data.add_field("username", self.username)
+ form_data.add_field("password", self.password)
+
+ async with session.post(url, data=form_data) as response:
+ if response.status == 200:
+ result = await response.json()
+ token = result.get("accessToken")
+ if token:
+ self._access_token = token
+ return token
+ logger.warning(f"Nacos login failed: {result.get('message', 'unknown')}")
+ else:
+ text = await response.text()
+ logger.warning(f"Nacos login request returned status {response.status}: {text}")
+ return None
+
+ except aiohttp.ClientError as e:
+ logger.error(f"Failed to login to Nacos: {e}")
+ return None
+
+ def _clear_access_token(self) -> None:
+ """Clear the cached access token."""
+ self._access_token = None
+
+ async def __aenter__(self) -> "NacosClient":
+ """Async context manager entry."""
+ return self
+
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
+ """Async context manager exit."""
+ await self.close()
diff --git a/backend/utils/prompt_template_utils.py b/backend/utils/prompt_template_utils.py
index 643e6cd40..299d3bf94 100644
--- a/backend/utils/prompt_template_utils.py
+++ b/backend/utils/prompt_template_utils.py
@@ -5,9 +5,56 @@
import yaml
from consts.const import LANGUAGE
+from consts.prompt_template import (
+ PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP,
+ PROMPT_GENERATE_TEMPLATE_FIELDS,
+)
logger = logging.getLogger("prompt_template_utils")
+PROMPT_GENERATE_TEMPLATE_KEY_MAP = PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP
+PROMPT_GENERATE_TEMPLATE_KEYS = PROMPT_GENERATE_TEMPLATE_FIELDS
+
+
+def get_prompt_generate_template_keys() -> list[str]:
+ """Return the supported prompt generation template keys."""
+ return list(PROMPT_GENERATE_TEMPLATE_FIELDS)
+
+
+def normalize_prompt_generate_template_content(
+ template_content: Optional[Dict[str, Any]]
+) -> Dict[str, str]:
+ """Normalize prompt generation template content and keep non-empty fields only."""
+ normalized: Dict[str, str] = {}
+ if not isinstance(template_content, dict):
+ return normalized
+
+ for key in PROMPT_GENERATE_TEMPLATE_FIELDS:
+ legacy_key = PROMPT_GENERATE_TEMPLATE_FIELD_ALIAS_MAP[key]
+ value = template_content.get(key)
+ if value is None:
+ value = template_content.get(legacy_key)
+ if isinstance(value, str) and value.strip():
+ normalized[key] = value
+
+ return normalized
+
+
+def merge_prompt_generate_templates(
+ *template_contents: Optional[Dict[str, Any]]
+) -> Dict[str, str]:
+ """Merge multiple prompt generation templates with first-non-empty priority."""
+ merged: Dict[str, str] = {}
+
+ for template_content in template_contents:
+ normalized = normalize_prompt_generate_template_content(template_content)
+ for key in PROMPT_GENERATE_TEMPLATE_FIELDS:
+ value = normalized.get(key)
+ if value and key not in merged:
+ merged[key] = value
+
+ return merged
+
def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kwargs) -> Dict[str, Any]:
"""
@@ -16,6 +63,7 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
Args:
template_type: Template type, supports the following values:
- 'prompt_generate': Prompt generation template
+ - 'prompt_optimize': Prompt section optimization template
- 'agent': Agent template including manager and managed agents
- 'generate_title': Title generation template
- 'document_summary': Document summary template (Map stage)
@@ -33,6 +81,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
LANGUAGE["ZH"]: 'backend/prompts/utils/prompt_generate_zh.yaml',
LANGUAGE["EN"]: 'backend/prompts/utils/prompt_generate_en.yaml'
},
+ 'prompt_optimize': {
+ LANGUAGE["ZH"]: 'backend/prompts/utils/prompt_optimize_zh.yaml',
+ LANGUAGE["EN"]: 'backend/prompts/utils/prompt_optimize_en.yaml'
+ },
'agent': {
LANGUAGE["ZH"]: {
'manager': 'backend/prompts/manager_system_prompt_template_zh.yaml',
@@ -47,6 +99,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
LANGUAGE["ZH"]: 'backend/prompts/utils/generate_title_zh.yaml',
LANGUAGE["EN"]: 'backend/prompts/utils/generate_title_en.yaml'
},
+ 'greeting_generate': {
+ LANGUAGE["ZH"]: 'backend/prompts/utils/greeting_generate_zh.yaml',
+ LANGUAGE["EN"]: 'backend/prompts/utils/greeting_generate_en.yaml'
+ },
'document_summary': {
LANGUAGE["ZH"]: 'backend/prompts/document_summary_agent_zh.yaml',
LANGUAGE["EN"]: 'backend/prompts/document_summary_agent_en.yaml'
@@ -58,6 +114,10 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
'skill_creation_simple': {
LANGUAGE["ZH"]: 'backend/prompts/skill_creation_simple_zh.yaml',
LANGUAGE["EN"]: 'backend/prompts/skill_creation_simple_en.yaml'
+ },
+ 'skill_creation_complicated': {
+ LANGUAGE["ZH"]: 'backend/prompts/skill_creation_complicate_zh.yaml',
+ LANGUAGE["EN"]: 'backend/prompts/skill_creation_complicate_en.yaml'
}
}
@@ -77,7 +137,7 @@ def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kw
# Go up one level from utils to backend, then use the template path
backend_dir = os.path.dirname(current_dir)
absolute_template_path = os.path.join(backend_dir, template_path.replace('backend/', ''))
-
+
# Read and return template content
with open(absolute_template_path, 'r', encoding='utf-8') as f:
return yaml.safe_load(f)
@@ -97,6 +157,19 @@ def get_prompt_generate_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[
return get_prompt_template('prompt_generate', language)
+def get_prompt_optimize_prompt_template(language: str = LANGUAGE["ZH"]) -> Dict[str, Any]:
+ """
+ Get prompt optimization template.
+
+ Args:
+ language: Language code ('zh' or 'en')
+
+ Returns:
+ dict: Loaded prompt optimization template configuration
+ """
+ return get_prompt_template('prompt_optimize', language)
+
+
def get_agent_prompt_template(is_manager: bool, language: str = LANGUAGE["ZH"]) -> Dict[str, Any]:
"""
Get agent prompt template
@@ -152,30 +225,42 @@ def get_cluster_summary_reduce_prompt_template(language: str = LANGUAGE["ZH"]) -
def get_skill_creation_simple_prompt_template(
language: str = LANGUAGE["ZH"],
- existing_skill: Optional[Dict[str, Any]] = None
+ existing_skill: Optional[Dict[str, Any]] = None,
+ complexity: str = "simple"
) -> Dict[str, str]:
"""
- Get skill creation simple prompt template with Jinja2 rendering.
+ Get skill creation prompt template with Jinja2 rendering.
This template is structured YAML with system_prompt and user_prompt sections.
Supports Jinja2 template syntax for dynamic content based on existing_skill.
+ Supports both simple and complicated skill creation templates.
Args:
language: Language code ('zh' or 'en')
existing_skill: Optional dict containing existing skill info for update scenarios.
Expected keys: name, description, tags, content
+ complexity: Complexity level ('simple' or 'complicated')
Returns:
Dict[str, str]: Template with keys 'system_prompt' and 'user_prompt', rendered with variables
"""
from jinja2 import Template
+ # Select template based on complexity
template_path_map = {
- LANGUAGE["ZH"]: 'backend/prompts/skill_creation_simple_zh.yaml',
- LANGUAGE["EN"]: 'backend/prompts/skill_creation_simple_en.yaml'
+ "simple": {
+ LANGUAGE["ZH"]: 'backend/prompts/skill_creation_simple_zh.yaml',
+ LANGUAGE["EN"]: 'backend/prompts/skill_creation_simple_en.yaml'
+ },
+ "complicated": {
+ LANGUAGE["ZH"]: 'backend/prompts/skill_creation_complicate_zh.yaml',
+ LANGUAGE["EN"]: 'backend/prompts/skill_creation_complicate_en.yaml'
+ }
}
- template_path = template_path_map.get(language, template_path_map[LANGUAGE["ZH"]])
+ # Default to simple if complexity is not recognized
+ template_type = template_path_map.get(complexity, template_path_map["simple"])
+ template_path = template_type.get(language, template_type[LANGUAGE["ZH"]])
current_dir = os.path.dirname(os.path.abspath(__file__))
backend_dir = os.path.dirname(current_dir)
diff --git a/backend/utils/tool_utils.py b/backend/utils/tool_utils.py
index f06f36bc3..f1d9147e3 100644
--- a/backend/utils/tool_utils.py
+++ b/backend/utils/tool_utils.py
@@ -46,7 +46,8 @@ def get_local_tools_description_zh() -> Dict[str, Dict]:
if hasattr(param.default, 'exclude') and param.default.exclude:
continue
- param_description_zh = param.default.description_zh if hasattr(param.default, 'description_zh') else None
+ # Note: Pydantic Field doesn't have description_zh attribute
+ param_description_zh = getattr(param.default, 'description_zh', None) if hasattr(param.default, 'description_zh') else None
if param_description_zh is None and param_name in init_param_descriptions:
param_description_zh = init_param_descriptions[param_name].get('description_zh')
diff --git a/doc/docs/.vitepress/config.mts b/doc/docs/.vitepress/config.mts
index 6ee76ff5d..87e79a831 100644
--- a/doc/docs/.vitepress/config.mts
+++ b/doc/docs/.vitepress/config.mts
@@ -385,6 +385,7 @@ export default defineConfig({
],
},
{ text: "性能监控", link: "/zh/sdk/monitoring" },
+ { text: "OpenTelemetry 设计", link: "/zh/sdk/opentelemetry-design" },
{ text: "向量数据库", link: "/zh/sdk/vector-database" },
{ text: "数据处理", link: "/zh/sdk/data-process" },
],
diff --git a/doc/docs/en/backend/overview.md b/doc/docs/en/backend/overview.md
index 962233f18..d77dfee3c 100644
--- a/doc/docs/en/backend/overview.md
+++ b/doc/docs/en/backend/overview.md
@@ -202,4 +202,6 @@ python backend/mcp_service.py # MCP service
- Resource pool management
- Auto-scaling capabilities
-For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview).
\ No newline at end of file
+For detailed backend development guidelines, see the [Developer Guide](../developer-guide/overview).
+
+For skill development and management, see the [Skills System Documentation](./skills/index).
\ No newline at end of file
diff --git a/doc/docs/en/backend/skills/index.md b/doc/docs/en/backend/skills/index.md
new file mode 100644
index 000000000..7824260fa
--- /dev/null
+++ b/doc/docs/en/backend/skills/index.md
@@ -0,0 +1,37 @@
+# Backend Skills Documentation
+
+This section covers Nexent's Skills system in the backend infrastructure, including skill definitions, skill package structures, and system architecture.
+
+## Available Documentation
+
+### Overview and Architecture
+- [Skills System Overview](./overview): Skill types, lifecycle, and version management
+
+## Skills vs. Tools
+
+In Nexent, **Tools** and **Skills** are two distinct layers:
+
+- **Tool**: A single atomic operation the agent can call, such as `read_file` or `tavily_search`. When enabled, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for this conversation, the LLM still consumes context tokens to "see" it.
+- **Skill**: A workflow of multiple tools bundled with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance; it decides whether to activate a skill based on the user's actual needs. The corresponding toolset is only loaded when activated — effectively saving Token consumption.
+
+## Quick Start
+
+1. **Explore capabilities**: Read [Skills System Overview](./overview) to understand the supported skill types
+2. **Try creation**: Experience NL-to-Skill creation on the [Skill Management](../../user-guide/skills) page
+3. **Create manually**: Upload `SKILL.md` or a ZIP package to create a custom skill
+4. **Configure for agents**: Enable skills in the agent's tool configuration
+
+## Related References
+
+- [Skill Management (User Guide)](../../user-guide/skills)
+- [Agent Development Guide](../../user-guide/agent-development)
+- [Local Tools Overview](../../user-guide/local-tools/index)
+- [SDK Tool Development Guide](../../sdk/core/tools)
+- [MCP Tool Development](../tools/mcp)
+- [FAQ](../../quick-start/faq)
+
+## Getting Help
+
+- Check the [FAQ](../../quick-start/faq) for common skill usage questions
+- Ask questions in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions)
+- Review [GitHub Issues](https://github.com/ModelEngine-Group/nexent/issues) for known issues
diff --git a/doc/docs/en/backend/skills/overview.md b/doc/docs/en/backend/skills/overview.md
new file mode 100644
index 000000000..34fbd2f97
--- /dev/null
+++ b/doc/docs/en/backend/skills/overview.md
@@ -0,0 +1,138 @@
+# Skills System Overview
+
+A Skill is Nexent's way of extending an agent's capabilities. Each skill consists of:
+
+- **Skill description**: What this skill does and when to use it
+- **Tool bundle**: A package of one or more Nexent SDK methods or user-defined tools
+- **Parameter template**: Which parameters users can fill in for this skill
+- **Usage examples**: How this skill is typically used
+
+Compared to selecting tools one by one, skills make configuring complex capabilities simple — install one skill package instead of configuring each tool separately.
+
+## Skill Package Structure
+
+A skill can be a single `SKILL.md` file or a ZIP package with multiple files:
+
+```
+skill-name/
+├── SKILL.md # Skill definition file (required)
+├── config/
+│ ├── config.yaml # Default parameter values (optional)
+│ └── schema.yaml # Parameter types and descriptions (optional)
+├── scripts/
+│ └── *.py # Python scripts (optional)
+├── examples.md # Usage examples (optional)
+└── assets/ # Static assets (optional)
+```
+
+### SKILL.md Structure
+
+Each skill must have a `SKILL.md` file, consisting of two parts:
+
+**Part 1: YAML Frontmatter (required)**
+
+```yaml
+---
+name: skill-name
+description: |
+ A description of what this skill does and when to use it.
+ Write in third person, e.g., "This skill is used for..."
+tags:
+ - tag1
+ - tag2
+---
+```
+
+**Part 2: Skill Body**
+
+Below the frontmatter, you can write Markdown content including:
+- Detailed usage instructions and guidelines
+- Example code for tool invocation
+- Error handling instructions
+- Usage limits and caveats
+
+### Two Skill Types
+
+Skills fall into two categories based on their purpose:
+
+**Tool Skills**: Used to expose the capabilities of one or more Nexent SDK methods. The body should include tool parameter descriptions, usage examples, return formats, and error handling. Once the user configures the parameters, the agent can call these tools directly.
+
+**Agent Skills**: Used to teach an agent how to perform a complex task. The body should include workflow instructions, domain knowledge, best practices, and sometimes helper scripts. The body will contain detailed step-by-step guidance.
+
+## Official Skills Overview
+
+### File Operations
+
+| Skill Name | Description |
+|-----------|-------------|
+| `read-file` | Read file content and metadata within the workspace |
+| `create-file-directory` | Create files or directories |
+| `delete-file-directory` | Delete files or directories |
+| `move-file-directory` | Move or rename files/directories |
+| `list-directory` | List directory structure in a tree view |
+
+### Knowledge Base Search
+
+| Skill Name | Description |
+|-----------|-------------|
+| `search-knowledge-base` | Local knowledge base semantic search (supports hybrid / accurate / semantic modes) |
+| `search-dify` | Dify knowledge base search |
+| `search-idata` | iData knowledge base search |
+| `search-datamate` | DataMate knowledge base search (with similarity threshold control) |
+
+### Web Search
+
+| Skill Name | Description |
+|-----------|-------------|
+| `search-web-tavily` | Tavily real-time web search |
+| `search-web-linkup` | Linkup image and text mixed search |
+| `search-web-exa` | Exa deep web search |
+
+### Multimodal Analysis
+
+| Skill Name | Description |
+|-----------|-------------|
+| `analyze-image` | VLM-based image content analysis and Q&A |
+| `analyze-text-file` | PDF/Word/Excel file content extraction and Q&A |
+
+### Communication and Remote Operations
+
+| Skill Name | Description |
+|-----------|-------------|
+| `email-utils` | IMAP receive / SMTP send (supports HTML / CC / BCC) |
+| `run-shell-ssh` | Persistent SSH session for remote command execution |
+
+## Skill Lifecycle
+
+### Version Management
+
+Each skill supports two version states:
+
+- **Draft version (version=0)**: Development and debugging stage, changes take effect immediately, suitable for iterative adjustments
+- **Published version (version>=1)**: Production use, parameters locked to prevent accidental changes
+
+### Skill Instances
+
+The same skill can be configured with different parameter values for different agents, independently.
+
+For example, a search skill can be configured for a "Technical Documentation Agent" to search only the technical knowledge base, and for a "Customer Service Agent" to search only the customer service knowledge base.
+
+### Common Workflow
+
+```
+Create skill → Configure parameters → Select skill for agent → Debug → Publish
+ ↓
+ Edit draft version
+```
+
+## Security Notes
+
+- **Path isolation**: Files within a skill package can only be accessed within the skill directory scope
+- **Parameter validation**: Parameters defined in schema.yaml are validated by the frontend form
+- **Permission control**: Skill instances are tenant-isolated; APIs require authentication tokens
+
+## Related References
+
+- [Skill Management (User Guide)](../../user-guide/skills)
+- [Agent Development Guide](../../user-guide/agent-development)
+- [Local Tools Overview](../../user-guide/local-tools/index)
diff --git a/doc/docs/en/backend/tools/index.md b/doc/docs/en/backend/tools/index.md
index 2d2d2c185..82d73b82c 100644
--- a/doc/docs/en/backend/tools/index.md
+++ b/doc/docs/en/backend/tools/index.md
@@ -12,6 +12,10 @@ Integrate with the LangChain ecosystem for advanced AI workflows.
Model Context Protocol tools for standardized AI agent communication.
→ [MCP Tools Development](./mcp)
+### Skills System
+Create reusable skill packages through natural language or ZIP files, giving agents more flexible tool-calling capabilities.
+→ [Skills Documentation](../skills/index)
+
## Quick Start
1. **Choose your tool type**: LangChain for general AI workflows, MCP for standardized agent communication
diff --git a/doc/docs/en/deployment/devcontainer.md b/doc/docs/en/deployment/devcontainer.md
index 84a49f47e..ce6efe7be 100644
--- a/doc/docs/en/deployment/devcontainer.md
+++ b/doc/docs/en/deployment/devcontainer.md
@@ -25,7 +25,7 @@ This development container configuration sets up a complete Nexent development e
1. Clone the project locally
2. Open project folder in Cursor/VS Code
-3. Run `docker/deploy.sh` script in `infrastructure` mode to start containers
+3. Run `./deploy.sh --components infrastructure,application --port-policy development` from the `docker` directory to start base containers
4. Enter `nexent-minio` and `nexent-elasticsearch` containers, copy `MINIO_ACCESS_KEY`, `MINIO_SECRET_KEY`, `ELASTICSEARCH_API_KEY` environment variables to corresponding positions in `docker/docker-compose.dev.yml`
5. Press `F1` or `Ctrl+Shift+P`, type `Dev Containers: Reopen in Container ...`
6. Cursor will start the development container based on configuration in `.devcontainer` directory
diff --git a/doc/docs/en/deployment/docker-build.md b/doc/docs/en/deployment/docker-build.md
index 47f51d891..bf36dc5d4 100644
--- a/doc/docs/en/deployment/docker-build.md
+++ b/doc/docs/en/deployment/docker-build.md
@@ -178,6 +178,11 @@ Notes:
## 🚀 Deployment Recommendations
-After building is complete, you can use the docker/deploy.sh script for deployment, or directly start the services using docker-compose.
+After building is complete, you can deploy local images from the `docker` directory:
-> When starting a test of locally built images, you need to change APP_VERSION="$(get_app_version)" to APP_VERSION="latest" in docker/deploy.sh, because the deployment will default to using the image corresponding to the current version.
+```bash
+cd docker
+bash deploy.sh --image-source local-latest
+```
+
+> `local-latest` uses local `latest` Nexent application images and avoids pulling those images again. You do not need to modify `docker/deploy.sh`.
diff --git a/doc/docs/en/developer-guide/environment-setup.md b/doc/docs/en/developer-guide/environment-setup.md
index 21f3cb6af..e2b0b9ed3 100644
--- a/doc/docs/en/developer-guide/environment-setup.md
+++ b/doc/docs/en/developer-guide/environment-setup.md
@@ -23,7 +23,7 @@ Before backend work, start core services (PostgreSQL, Redis, Elasticsearch, MinI
```bash
# Run from the docker directory at the project root
cd docker
-./deploy.sh --mode infrastructure
+./deploy.sh --components infrastructure --port-policy development
```
:::: info Important Notes
@@ -139,4 +139,3 @@ This adds:
- Testing framework (pytest)
- Data processing dependencies (unstructured)
- Other developer utilities
-
diff --git a/doc/docs/en/getting-started/features.md b/doc/docs/en/getting-started/features.md
index e699c1f8c..2216d7163 100644
--- a/doc/docs/en/getting-started/features.md
+++ b/doc/docs/en/getting-started/features.md
@@ -1,78 +1,73 @@
# Key Features
-Nexent provides powerful capabilities for building and deploying AI agents with minimal effort. Here are the core features that make Nexent unique.
+Nexent v2.0 delivers powerful capabilities for building and deploying AI agents. Here are the core features that make Nexent unique.
-## 🧠 Smart Agent Prompt Generation
+## ⚙️ Multi-Model Integration
-Turn plain language into runnable prompts. Nexent automatically chooses the right tools and plans the best action path for every request.
+Nexent is compatible with any OpenAI-compatible model provider, offering one-stop coverage for LLM, Embedding, VLM, STT, and TTS model types. Supports seamless synchronization with the ModelEngine platform, with built-in connection monitoring and automatic failover. The platform supports connecting to any service that follows the OpenAI API protocol, making it easy to diversify models or switch to domestic alternatives.
-
+## 🤖 Zero-Code Agent Generation
-## ⚡ Scalable Data Process Engine
+Describe your needs in natural language and Nexent automatically transforms them into executable agent configurations. The system intelligently selects appropriate tools, plans the optimal execution path, and generates professional prompts. No code, no drag-and-drop configuration — experience true "what you imagine is what you get" agent creation. Agents can also be imported and exported for easy sharing and reuse. Built-in debugging provides online testing so you can iterate and refine rapidly.
-Process 20+ data formats with fast OCR and table structure extraction, scaling smoothly from a single process to large-batch pipelines.
+## 🤝 A2A Protocol & Agent Collaboration
-
+Nexent supports the **Agent-to-Agent (A2A)** communication protocol, enabling seamless multi-agent collaboration. A main agent can invoke sub-agents to complete specific tasks; once a sub-agent finishes execution, results are aggregated back to the main agent. Multiple collaborative sub-agents can be configured, each with its own toolset, model configuration, and execution strategy — making it easy to build complex distributed agent workflows.
-## 📚 Personal-Grade Knowledge Base
+## 🧠 Layered Memory Architecture
-Import files in real time, auto-summarise them, and let agents access both personal and global knowledge instantly, also knowing what it can get from each knowledge base.
+Intelligent context management is the key to agents that truly understand you. Nexent provides a two-tier memory system:
-
+- **User-Level Memory**: Personal preferences, habits, and usage patterns
+- **User-Agent Memory**: Collaboration history and context for a specific user with a specific agent
-## 🌐 Internet Knowledge Search
+The system automatically extracts key information from conversations to generate memory entries — no manual input required. Memory entries can also be added or modified manually for greater flexibility. Smart retrieval ensures every conversation automatically pulls in the most relevant contextual memories, enabling truly personalized service.
-Connect to 5+ web search providers so agents can mix fresh internet facts with your private data.
+## 📝 Progressive Skill Disclosure
-
+Nexent introduces a **Progressive Skill Disclosure** mechanism. As users input tasks, the system dynamically reveals the most relevant Skill suggestions based on the current context — helping users quickly find the tools and methods best suited to the current task. This mechanism helps preventing context explosion and maximizing context window efficiency.
-## 🔍 Knowledge-Level Traceability
+## 🗄️ Personal-Grade Knowledge Base
-Serve answers with precise citations from web and knowledge-base sources, making every fact verifiable.
+Create personal knowledge bases on the Nexent platform. Import files in real time with automatic parsing and vectorization, enabling agents to access private data instantly. Supports 20+ document formats including text, PDF, Word, PowerPoint, Excel, and CSV — with fast OCR and table structure extraction built in. Each knowledge base automatically generates its own summary, helping the agent accurately determine when to retrieve from it. Fine-grained access controls can be set: private, department-wide, or organization-wide visibility.
-
+## 🔧 MCP Tool Ecosystem
-## 🎭 Multimodal Understanding & Dialogue
+Nexent builds its tool ecosystem on the **Model Context Protocol (MCP)** — described as the "USB-C of AI" — a universal interface standard for connecting AI agents to the external world.
-Speak, type, files, or show images. Nexent understands voice, text, and pictures, and can even generate new images on demand.
+- Add third-party MCP services quickly via URL or JSON configuration
+- Develop local MCP tools with LangChain integrations and custom Python plugins
+- Hot-swap tools, models, and toolchains without touching core code
+- Built-in tool testing lets you verify whether tools work as expected before building an agent
-
+## 🌐 Internet Knowledge Integration
-## 🔧 MCP Tool Ecosystem
+Connect to multiple web search providers so agents can blend the freshest internet information with your private data. Hybrid search mode balances real-time accuracy with relevance.
-Drop in or build Python plug-ins that follow the MCP spec; swap models, tools, and chains without touching core code.
+## 🔍 Knowledge Traceability & Citations
-
+Every answer comes with precise citations from web search results or knowledge base documents, making every fact transparent and verifiable. Source information is fully traceable with one click, building trust in agent responses.
-## 🏗️ Architecture Benefits
+## 🎭 Multimodal Interaction
-### ⚡ Distributed Processing Capabilities
-- **Asynchronous Architecture**: High-performance asynchronous processing based on asyncio
-- **Multi-threading Safety**: Thread-safe concurrent processing mechanisms
-- **Celery Integration**: Optimized for distributed task queues
-- **Batch Optimization**: Intelligent batch operations to reduce network overhead
+Supports multiple input modes: voice, text, images, and files. Agents can understand voice, text, and images, and can generate new images on demand — delivering a truly natural multimodal conversation experience.
-### 🏢 Enterprise-grade Scalability
-- **Modular Design**: Loose-coupled module architecture for easy extension
-- **Plugin-based Tools**: Standardized tool interfaces for rapid integration
-- **Configuration Management**: Flexible configuration system supporting multi-environment deployment
-- **Monitoring Friendly**: Comprehensive logging and status monitoring
+## 🔢 Agent Version Management
-### 🚀 High-performance Optimization
-- **Connection Pooling**: Intelligent reuse of database and HTTP connections
-- **Memory Management**: Stream processing of large files and memory optimization
-- **Concurrency Control**: Intelligent concurrency limiting and load balancing
-- **Caching Strategy**: Multi-layer caching to improve response speed
+A comprehensive version control system supports agent iteration and historical rollback. Every version is independently archived; view change history, compare versions, and roll back whenever needed. Agent configurations can also be imported and exported in JSON format, enabling seamless migration across environments and smooth team collaboration.
-For detailed information about Nexent's software architecture and technical advantages, see our **[Software Architecture](./software-architecture)** guide.
+## 🏪 Agent Market
-## 🎯 Use Cases
+A built-in agent marketplace brings together high-quality agents from both official and community creators. Download with one click to use immediately, or integrate them as sub-agents into your own agent workflows to rapidly build complex applications.
-Nexent is designed for various scenarios including:
-- **Business Intelligence**: Automated data analysis and reporting
-- **Customer Support**: Intelligent chat agents with knowledge base integration
-- **Content Processing**: Document analysis, summarization, and extraction
-- **Research Assistance**: Academic paper analysis and information synthesis
-- **Personal Productivity**: Smart assistants for daily tasks and information management
+## 👥 Multi-Tenant RBAC & User Management
-For detailed agent scenarios and real-world implementations, see our **[MCP Ecosystem Use Cases](../mcp-ecosystem/use-cases)**.
\ No newline at end of file
+Nexent provides a complete multi-tenant, role-based permission management system:
+
+- **Four Roles**: Super Administrator, Tenant Administrator, Developer, and Regular User — each with clearly defined responsibilities
+- **Multi-Tenant Isolation**: Complete data isolation between tenants, with platform-wide management support
+- **User Group Mechanism**: Manage resources and access permissions through groups, supporting flexible permission delegation
+- **Invitation Code Mechanism**: Controlled registration safeguards platform security
+- **Resource-Level Permissions**: Fine-grained access control on agents, knowledge bases, and more — down to the user group level
+
+For detailed information about Nexent's software architecture and technical advantages, see our **[Software Architecture](./software-architecture)** guide.
diff --git a/doc/docs/en/getting-started/overview.md b/doc/docs/en/getting-started/overview.md
index 0f3936ed0..e77107eb4 100644
--- a/doc/docs/en/getting-started/overview.md
+++ b/doc/docs/en/getting-started/overview.md
@@ -17,10 +17,10 @@ Nexent is a zero-code platform for auto-generating production-grade AI agents, b
> *If you want to go fast, go alone; if you want to go far, go together.*
-We have released **Nexent v1**, and the platform is now relatively stable. However, there may still be some bugs, and we are continuously improving and adding new features. Stay tuned: we will announce **v2.0** soon!
+We have released **Nexent v2.0** — a major upgrade over v1.0. This release brings A2A protocol support, progressive Skill disclosure, layered memory architecture, full-featured user management with RBAC, agent version management, and the Agent Market. Core capabilities like knowledge base integration, multimodal interaction, and the MCP tool ecosystem have been significantly enhanced. The platform is maturing rapidly and we welcome your feedback.
-* **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features.
-* **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab.
+- **🗺️ Check our [Feature Map](https://github.com/orgs/ModelEngine-Group/projects/6)** to explore current and upcoming features.
+- **🔍 Try the current build** and leave ideas or bugs in the [Issues](https://github.com/ModelEngine-Group/nexent/issues) tab.
> *Rome wasn't built in a day.*
@@ -32,15 +32,21 @@ Most of all, we need visibility. Star ⭐ and watch the [GitHub repository](http
## ✨ Key Features
-Nexent offers a comprehensive set of features for building powerful AI agents:
-
-- **🤖 Smart Agent Generation** - Zero-code agent creation using natural language
-- **📊 Scalable Data Processing** - Handle 20+ file formats with intelligent extraction
-- **🧠 Personal Knowledge Base** - Real-time file import with auto-summarization
-- **🌐 Internet Integration** - Connect to multiple search providers and web sources
-- **🔍 Knowledge Traceability** - Precise citation and source verification
-- **🎭 Multimodal Support** - Voice, text, images, and file processing
-- **🔧 MCP Ecosystem** - Extensible tool integration and custom development
+Nexent v2.0 delivers a comprehensive feature set for building powerful AI agents:
+
+- **⚙️ Multi-Model Integration** — OpenAI-compatible any provider, with full Embedding/VLM/STT/TTS support
+- **🤖 Zero-Code Agent Generation** — Describe in plain language, deploy in one click
+- **🤝 A2A Agent Collaboration** — Agent-to-Agent protocol for seamless multi-agent workflows
+- **🧠 Layered Memory Architecture** — Two-tier memory system with cross-conversation context accumulation
+- **📝 Progressive Skill Disclosure** — Context-aware tool suggestions that reveal as you go
+- **🗄️ Personal-Grade Knowledge Base** — 20+ format document import with intelligent retrieval
+- **🔧 MCP Tool Ecosystem** — Plug-and-play extensibility with custom tool development
+- **🌐 Internet Knowledge Integration** — Multi-source hybrid search blending real-time web with private data
+- **🔍 Knowledge-Level Traceability** — Precise citations and verifiable sources on every answer
+- **🎭 Multimodal Interaction** — Voice, text, images, and files for fully natural conversations
+- **🔢 Agent Version Management** — Version iteration and rollback for safe, controlled deployments
+- **🏪 Agent Market** — Official and community agents ready to install and use
+- **👥 Multi-Tenant RBAC** — Tenant isolation, role-based permissions, and fine-grained resource access
For detailed feature information and examples, see our **[Features Guide](./features)**.
@@ -49,20 +55,23 @@ For detailed feature information and examples, see our **[Features Guide](./feat
Nexent adopts a modern distributed microservices architecture designed to provide high-performance, scalable AI agent platform. The entire system is based on containerized deployment, supporting cloud-native and enterprise-grade application scenarios.
### 🌐 Layered Architecture Design
-- **Frontend Layer** - Modern user interface built with Next.js + React + TypeScript
-- **API Gateway Layer** - FastAPI high-performance web framework for request routing and load balancing
-- **Business Logic Layer** - Agent management, conversation management, knowledge base management, and model management
-- **Data Layer** - Distributed storage architecture with PostgreSQL, Elasticsearch, Redis, and MinIO
+
+- **Frontend Layer** — Modern user interface built with Next.js + React + TypeScript
+- **API Gateway Layer** — FastAPI high-performance web framework for request routing and load balancing
+- **Business Logic Layer** — Agent management, conversation management, knowledge base management, and model management
+- **Data Layer** — Distributed storage architecture with PostgreSQL, Elasticsearch, Redis, and MinIO
### 🚀 Core Service Architecture
-- **Agent Services** - Agent generation and execution based on SmolAgents framework
-- **Data Processing Services** - Real-time and batch processing supporting 20+ file formats
-- **MCP Ecosystem** - Standardized tool interfaces and plugin architecture
+
+- **Agent Services** — Agent generation and execution based on SmolAgents framework
+- **Data Processing Services** — Real-time and batch processing supporting 20+ file formats
+- **MCP Ecosystem** — Standardized tool interfaces and plugin architecture
### ⚡ Distributed Features
-- **Asynchronous Processing** - High-performance async processing architecture based on asyncio
-- **Microservices Design** - Service decoupling with independent scaling and deployment
-- **Containerized Deployment** - Docker Compose service orchestration supporting cloud-native deployment
+
+- **Asynchronous Processing** — High-performance async processing architecture based on asyncio
+- **Microservices Design** — Service decoupling with independent scaling and deployment
+- **Containerized Deployment** — Docker Compose service orchestration supporting cloud-native deployment
For detailed architectural design and technical implementation, see our **[Software Architecture](./software-architecture)**.
@@ -70,9 +79,9 @@ For detailed architectural design and technical implementation, see our **[Softw
Ready to get started? Here are your next steps:
-1. **📋 [Installation & Deployment](../quick-start/installation)** - System requirements and deployment guide
-2. **🔧 [Developer Guide](../developer-guide/overview)** - Build from source and customize
-3. **❓ [FAQ](../quick-start/faq)** - Common questions and troubleshooting
+1. **📋 [Installation & Deployment](../quick-start/installation)** — System requirements and deployment guide
+2. **🔧 [Developer Guide](../developer-guide/overview)** — Build from source and customize
+3. **❓ [FAQ](../quick-start/faq)** — Common questions and troubleshooting
## 💬 Community & contact
diff --git a/doc/docs/en/getting-started/software-architecture.md b/doc/docs/en/getting-started/software-architecture.md
index 701d89319..99e38a5f9 100644
--- a/doc/docs/en/getting-started/software-architecture.md
+++ b/doc/docs/en/getting-started/software-architecture.md
@@ -1,8 +1,8 @@
# Software Architecture
-Nexent adopts a modern distributed microservices architecture designed to provide high-performance, scalable AI agent platform. The entire system is based on containerized deployment, supporting cloud-native and enterprise-grade application scenarios.
+Nexent adopts a modern distributed microservices architecture designed to provide a high-performance, scalable AI agent platform. The entire system is containerized with Docker and supports cloud-native and enterprise-grade deployment scenarios.
-
+
## 🏗️ Overall Architecture Design
@@ -11,156 +11,284 @@ Nexent's software architecture follows layered design principles, structured int
### 🌐 Frontend Layer
- **Technology Stack**: Next.js + React + TypeScript
- **Functions**: User interface, agent interaction, multimodal input processing
-- **Features**: Responsive design, real-time communication, internationalization support
+- **Features**: Responsive design, real-time WebSocket communication, internationalization (i18n)
### 🔌 API Gateway Layer
-- **Core Service**: FastAPI high-performance web framework
-- **Responsibilities**: Request routing, authentication, API version management, load balancing
-- **Ports**: 5010 (main service), 5012 (data processing service)
+Distributed API services built on FastAPI:
+
+| Service | Port | Description |
+|---------|------|-------------|
+| **nexent-config** | 5010 | Main API service - agent CRUD, configuration management |
+| **nexent-runtime** | 5014 | Runtime service - agent execution, streaming responses |
+| **nexent-mcp** | 5011/5015 | MCP service - tool protocol management, FastMCP server |
+| **nexent-northbound** | 5013 | External API service - A2A protocol, partner integrations |
+| **nexent-data-process** | 5012 | Data processing service - document parsing, vectorization |
### 🧠 Business Logic Layer
-- **Agent Management**: Agent generation, execution, monitoring
-- **Conversation Management**: Multi-turn dialogue, context maintenance, history tracking
-- **Knowledge Base Management**: Document processing, vectorization, retrieval
-- **Model Management**: Multi-model support, health checks, load balancing
+The backend implements a clean layered architecture:
+
+#### App Layer (`backend/apps/`)
+- **Purpose**: HTTP boundary layer - parse/validate inputs, call services, map errors to HTTP
+- **Key Modules**:
+ - `agent_app.py` - Agent CRUD, version management, streaming execution
+ - `conversation_management_app.py` - Multi-turn dialogue, history tracking
+ - `model_managment_app.py` - Model configuration, health checks
+ - `skill_app.py` - Skill creation and management
+ - `knowledge_summary_app.py` - Knowledge base operations
+ - `remote_mcp_app.py` - Remote MCP tool management
+ - `a2a_client_app.py` / `a2a_server_app.py` - A2A protocol support
+
+#### Service Layer (`backend/services/`)
+- **Purpose**: Core business logic orchestration, coordinate repositories/SDKs
+- **Key Modules**:
+ - `agent_service.py` - Agent lifecycle, execution orchestration, memory management
+ - `agent_version_service.py` - Version publishing, rollback, comparison
+ - `model_management_service.py` - Multi-model support, load balancing
+ - `memory_config_service.py` - Memory configuration, context building
+ - `conversation_management_service.py` - Session management, history persistence
+ - `skill_service.py` - Skill generation, template processing
+ - `data_process_service.py` - Document processing pipeline
+ - `mcp_container_service.py` - MCP container lifecycle management
+ - `remote_mcp_service.py` - Remote MCP server integration
+ - `a2a_client_service.py` / `a2a_server_service.py` - A2A agent communication
+ - `redis_service.py` - Caching, distributed locks, session storage
+
+#### Agent Core (`backend/agents/`)
+- **Purpose**: Agent execution framework built on SmolAgents
+- **Key Components**:
+ - `agent_run_manager.py` - Agent run lifecycle, streaming coordination
+ - `create_agent_info.py` - Agent configuration builder, tool integration
+ - `preprocess_manager.py` - Document preprocessing orchestration
+ - `skill_creation_agent.py` - LLM-powered skill generation
### 📊 Data Layer
Distributed data storage architecture with multiple specialized databases:
#### 🗄️ Structured Data Storage
-- **PostgreSQL**: Primary database storing user information, agent configurations, conversation records
-- **Port**: 5434
-- **Features**: ACID transactions, relational data integrity
-
-#### 🔍 Search Engine
-- **Elasticsearch**: Vector database and full-text search engine
-- **Port**: 9210
-- **Functions**: Vector similarity search, hybrid search, large-scale optimization
+- **PostgreSQL** (port 5434): Primary relational database
+ - User and tenant management (`user_tenant_db.py`)
+ - Agent configuration and versions (`agent_db.py`, `agent_version_db.py`)
+ - Tool definitions and instances (`tool_db.py`)
+ - Conversation history (`conversation_db.py`)
+ - Group and permission management (`group_db.py`, `role_permission_db.py`)
+ - Memory configuration (`memory_config_db.py`)
+ - Skill definitions (`skill_db.py`)
+- **Features**: ACID transactions, relation integrity, multi-tenancy support
+
+#### 🔍 Vector Search & Full-Text Search
+- **Elasticsearch** (port 9210): Vector and full-text search engine
+ - Knowledge base storage (`knowledge_db.py`)
+ - Vector similarity search, hybrid search
+ - Semantic chunking and indexing
+- **Features**: Scalable search, relevance ranking, large-scale optimization
#### 💾 Cache Layer
-- **Redis**: High-performance in-memory database
-- **Port**: 6379
-- **Usage**: Session caching, temporary data, distributed locks
+- **Redis** (port 6379): High-performance in-memory database
+ - Session caching
+ - Temporary data storage
+ - Distributed locks (`redis_service.py`)
+ - Celery task broker for async jobs
+- **Features**: Sub-millisecond latency, persistence with AOF
#### 📁 Object Storage
-- **MinIO**: Distributed object storage service
-- **Port**: 9010
-- **Functions**: File storage, multimedia resource management, large file processing
+- **MinIO** (port 9010/9011): Distributed object storage
+ - File uploads and attachments (`attachment_db.py`)
+ - Document storage for knowledge base
+ - Preview generation and temporary files
+- **Features**: S3-compatible API, large file handling
## 🔧 Core Service Architecture
### 🤖 Agent Services
```
-Agent framework based on SmolAgents, providing:
-├── Agent generation and configuration
-├── Tool calling and integration
-├── Reasoning and decision execution
-└── Lifecycle management
+Agent Framework (SmolAgents-based):
+├── Agent Creation & Configuration
+│ ├── Name/display name generation (LLM-powered)
+│ ├── Tool integration and selection
+│ ├── Sub-agent relationship management
+│ └── Version control and publishing
+├── Agent Execution Engine
+│ ├── Streaming response (SSE)
+│ ├── Tool calling and orchestration
+│ ├── Multi-model support (LLM + Business logic)
+│ └── Memory context building
+├── Version Management
+│ ├── Publishing and rollback
+│ ├── Version comparison
+│ └── A2A agent card registration
+└── Lifecycle Management
+ ├── Run registration and tracking
+ ├── Stop and cleanup
+ └── Preprocessing coordination
```
### 📈 Data Processing Services
```
-Distributed data processing architecture:
-├── Real-time document processing (20+ format support)
-├── Batch data processing pipelines
-├── OCR and table structure extraction
-└── Vectorization and index construction
+Distributed Data Processing Pipeline:
+├── Document Ingestion
+│ ├── Multi-format support (20+ formats)
+│ ├── PDF parsing with OCR
+│ └── Table structure extraction
+├── Chunking & Processing
+│ ├── Semantic chunking algorithms
+│ ├── Batch processing with Celery
+│ └── Ray distributed computing
+├── Vectorization & Indexing
+│ ├── Embedding generation
+│ ├── Elasticsearch indexing
+│ └── Incremental updates
+└── Preview Generation
+ ├── PDF to preview conversion
+ └── Image thumbnail generation
```
### 🌐 MCP Ecosystem
```
-Model Context Protocol tool integration:
-├── Standardized tool interfaces
-├── Plugin architecture
-├── Third-party service integration
-└── Custom tool development
+Model Context Protocol Integration:
+├── Local MCP Service
+│ ├── Stable built-in tools
+│ └── Docker-based tool containers
+├── Remote MCP Service
+│ ├── Dynamic remote MCP server proxy
+│ └── Outer API tool integration
+├── MCP Container Management
+│ ├── Container lifecycle (Docker)
+│ ├── Log aggregation
+│ └── Resource monitoring
+└── FastMCP Server
+ ├── Tool registration and discovery
+ └── Standardized tool interfaces
+```
+
+### 🔄 A2A Protocol Support
+```
+Agent-to-Agent Communication:
+├── A2A Client
+│ ├── Agent card discovery
+│ ├── Task submission and streaming
+│ └── Response handling
+├── A2A Server
+│ ├── Agent card registration
+│ ├── Task processing
+│ └── Message streaming
+└── Agent Adapter
+ ├── Nexent ↔ A2A protocol translation
+ └── Skill execution coordination
```
## 🚀 Distributed Architecture Features
### ⚡ Asynchronous Processing Architecture
-- **Foundation Framework**: High-performance async processing based on asyncio
+- **Foundation**: asyncio-based high-performance async processing
+- **Task Queue**: Celery + Redis for distributed task execution
+- **Computing Framework**: Ray for distributed computing in data processing
+- **Stream Processing**: Server-Sent Events (SSE) for real-time streaming
- **Concurrency Control**: Thread-safe concurrent processing mechanisms
-- **Task Queue**: Celery + Ray distributed task execution
-- **Stream Processing**: Real-time data and response streaming
### 🔄 Microservices Design
```
-Service decomposition strategy:
-├── nexent (main service) - Agent core logic
-├── nexent-data-process (data processing) - Document processing pipeline
-├── nexent-mcp-service (MCP service) - Tool protocol service
-└── Optional services (SSH, monitoring, etc.)
+Service Decomposition Strategy:
+├── nexent-config (5010)
+│ └── Agent CRUD, configuration, user management
+├── nexent-runtime (5014)
+│ └── Agent execution, streaming responses
+├── nexent-mcp (5011/5015)
+│ └── MCP tool protocol, container management
+├── nexent-northbound (5013)
+│ └── External APIs, A2A protocol, partner integration
+├── nexent-data-process (5012)
+│ └── Document processing, vectorization, Celery workers
+├── nexent-web (3000)
+│ └── Frontend Next.js application
+└── Optional Services
+ ├── nexent-redis (6379) - Caching and message broker
+ ├── nexent-elasticsearch (9210) - Vector search
+ ├── nexent-postgresql (5434) - Relational data
+ └── nexent-minio (9010) - Object storage
```
### 🌍 Containerized Deployment
```
-Docker Compose service orchestration:
-├── Application service containerization
-├── Database service isolation
-├── Network layer security configuration
-└── Volume mounting for data persistence
+Docker Compose Orchestration:
+├── Application Services Containerization
+├── Database Service Isolation
+├── Network Layer Security (bridge network)
+├── Volume Mounting for Data Persistence
+├── Health Checks and Auto-restart
+└── Kubernetes Support (IS_DEPLOYED_BY_KUBERNETES)
```
## 🔐 Security and Scalability
### 🛡️ Security Architecture
- **Authentication**: Multi-tenant support, user permission management
-- **Data Security**: End-to-end encryption, secure transmission protocols
-- **Network Security**: Inter-service secure communication, firewall configuration
+- **Authorization**: Role-based access control (RBAC), group-based permissions
+- **Data Security**: Tenant data isolation, secure transmission (HTTPS)
+- **Network Security**: Service间安全通信, Docker network isolation
### 📈 Scalability Design
- **Horizontal Scaling**: Independent microservice scaling, load balancing
- **Vertical Scaling**: Resource pool management, intelligent scheduling
-- **Storage Scaling**: Distributed storage, data sharding
+- **Storage Scaling**: Distributed storage (MinIO), data sharding (Elasticsearch)
+- **Cache Scaling**: Redis clustering for session and data caching
### 🔧 Modular Architecture
-- **Loose Coupling Design**: Low inter-service dependencies, standardized interfaces
+- **Loose Coupling**: Low inter-service dependencies, standardized interfaces
- **Plugin Architecture**: Hot-swappable tools and models
-- **Configuration Management**: Environment isolation, dynamic configuration updates
+- **Configuration Management**: Environment-based configuration, dynamic updates
+- **Single Source of Truth**: Environment variables centralized in `backend/consts/const.py`
## 🔄 Data Flow Architecture
### 📥 User Request Flow
```
-User Input → Frontend Validation → API Gateway → Route Distribution → Business Service → Data Access → Database
+User Input → Frontend Validation → API Gateway (nexent-config)
+ → Route Distribution → Business Service (Service Layer)
+ → Data Access (Database Layer) → PostgreSQL/Elasticsearch/Redis/MinIO
```
### 🤖 Agent Execution Flow
```
-User Message → Agent Creation → Tool Calling → Model Inference → Streaming Response → Result Storage
+User Message → nexent-runtime → Agent Service
+ → Memory Context Build → Tool Resolution
+ → Model Inference (Streaming) → SSE Response
+ → Conversation Save → History Storage
```
### 📚 Knowledge Base Processing Flow
```
-File Upload → Temporary Storage → Data Processing → Vectorization → Knowledge Base Storage → Index Update
+File Upload → nexent-config → nexent-data-process
+ → Document Parsing → Chunking → Vectorization
+ → Elasticsearch Index → Search Ready
```
### ⚡ Real-time Processing Flow
```
-Real-time Input → Instant Processing → Agent Response → Streaming Output
+Real-time Input → Streaming Endpoint → Async Processing
+ → SSE Stream → Frontend Display
```
## 🎯 Architecture Advantages
### 🏢 Enterprise-grade Features
-- **High Availability**: Multi-layer redundancy, failover capabilities
-- **High Performance**: Asynchronous processing, intelligent caching
+- **High Availability**: Multi-service redundancy, health checks, auto-restart
+- **High Performance**: Async processing, Redis caching, vector search optimization
- **High Concurrency**: Distributed architecture, load balancing
-- **Monitoring Friendly**: Comprehensive logging and status monitoring
+- **Monitoring Friendly**: OpenTelemetry observability, Grafana Tempo tracing, structured logging
### 🔧 Developer Friendly
-- **Modular Development**: Clear hierarchical structure
-- **Standardized Interfaces**: Unified API design
-- **Flexible Configuration**: Environment adaptation, feature toggles
-- **Easy Testing**: Unit testing and integration testing support
+- **Modular Development**: Clean layered architecture (App → Service → Database)
+- **Standardized Interfaces**: Unified API design with FastAPI
+- **Flexible Configuration**: Environment-based configuration, hot-reload
+- **Easy Testing**: Comprehensive test suites, dependency injection
### 🌱 Ecosystem Compatibility
-- **MCP Standard**: Compliant with Model Context Protocol
-- **Open Source Ecosystem**: Integration with rich open source tools
-- **Cloud Native**: Support for Kubernetes and Docker deployment
+- **MCP Standard**: Full Model Context Protocol implementation
+- **A2A Protocol**: Agent-to-agent communication support
+- **Open Source Ecosystem**: Integration with SmolAgents, FastMCP, LangChain
+- **Cloud Native**: Docker Compose and Kubernetes deployment support
- **Multi-model Support**: Compatible with mainstream AI model providers
---
-This architectural design ensures that Nexent can provide a stable, scalable AI agent service platform while maintaining high performance. Whether for individual users or enterprise-level deployments, it delivers excellent user experience and technical assurance.
\ No newline at end of file
+This architectural design ensures that Nexent can provide a stable, scalable AI agent service platform while maintaining high performance. Whether for individual users or enterprise-level deployments, it delivers excellent user experience and technical assurance.
diff --git a/doc/docs/en/quick-start/installation.md b/doc/docs/en/quick-start/installation.md
index f01576513..7b6a9cb76 100644
--- a/doc/docs/en/quick-start/installation.md
+++ b/doc/docs/en/quick-start/installation.md
@@ -1,13 +1,16 @@
-# Installation & Deployment
+# Docker Installation & Deployment
## 🎯 Prerequisites
-| Resource | Minimum |
-|----------|---------|
-| **CPU** | 2 cores |
-| **RAM** | 6 GiB |
-| **Architecture** | x86_64 / ARM64 |
-| **Software** | Docker & Docker Compose installed |
+| Resource | Minimum | Recommended |
+|----------|---------|-------------|
+| **CPU** | 4 cores | 8 cores |
+| **RAM** | 8 GiB | 16 GiB |
+| **Disk** | 40 GiB | 100 GiB |
+| **Architecture** | x86_64 / ARM64 | |
+| **Software** | Docker & Docker Compose installed | Docker 24+, Docker Compose v2+ |
+
+> **💡 Note**: The recommended configuration of **8 cores and 16 GiB RAM** provides good performance for production workloads.
## 🚀 Quick Start
@@ -16,10 +19,9 @@
```bash
git clone https://github.com/ModelEngine-Group/nexent.git
cd nexent/docker
-cp .env.example .env # Configure environment variables
```
-> **💡 Tip**: If there are no special requirements, you can directly use `.env.example` for deployment without making any changes. If you need to configure voice models (STT/TTS), you will need to set the relevant parameters in `.env`. We will work on making this configuration available through the frontend soon—stay tuned.
+> **💡 Tip**: `deploy.sh` automatically copies `.env.example` to `docker/.env` when `docker/.env` does not exist. If you need to configure voice models (STT/TTS), update the related values in `docker/.env` before or after deployment.
### 2. Deployment Options
@@ -29,27 +31,53 @@ Run the following command to start deployment:
bash deploy.sh
```
-After executing this command, the system will provide two different versions for you to choose from:
+After running the command, the script opens Bash TUI menus for deployment options. Use arrow keys or `j/k` to move, Space to toggle multi-select items, Enter to confirm, `b`/Backspace to go back, and `q` to quit.
+
+**Deployment Components:**
+- **infrastructure (required)**: Elasticsearch, PostgreSQL, Redis, MinIO
+- **application (selected by default, optional)**: config, runtime, mcp, northbound, web
+- **data-process (optional)**: data processing service
+- **supabase (optional)**: enables user, tenant, and authentication features
+- **terminal (optional)**: enables the OpenSSH terminal tool
+- **monitoring (optional)**: enables observability components and then prompts for a provider
+
+**Port Policy:**
+- **development (default)**: publishes debug and internal service ports for local troubleshooting
+- **production**: publishes only production entry ports
+
+**Image Source:**
+- **general (default)**: uses standard public registries
+- **mainland**: uses mainland China mirrors
+- **local-latest**: uses local `latest` Nexent images and avoids pulling Nexent application images
+
+You can also pass options directly:
-**Version Selection:**
-- **Speed version (Lightweight & Fast Deployment, Default)**: Quick startup of core features, suitable for individual users and small teams
-- **Full version (Complete Feature Edition)**: Provides enterprise-level tenant management and resource isolation features, but takes longer to install, suitable for enterprise users
+```bash
+# Default component set, development port policy, standard image source
+bash deploy.sh --components infrastructure,application --port-policy development --image-source general
+
+# Enable user/tenant features, data processing, and terminal
+bash deploy.sh --components infrastructure,application,supabase,data-process,terminal
+
+# Use mainland China image sources
+bash deploy.sh --image-source mainland
+
+# Use local latest images
+bash deploy.sh --image-source local-latest
+```
-**Deployment Modes:**
-- **Development mode (default)**: Exposes all service ports for debugging
-- **Infrastructure mode**: Only starts infrastructure services
-- **Production mode**: Only exposes port 3000 for security
+After a successful deployment, non-sensitive choices are saved to `docker/deploy.options`. The next interactive deployment can reuse the local config or run a full reconfiguration.
-**Optional Components:**
-- **Terminal Tool**: Enables openssh-server for AI agent shell command execution
-- **Regional optimization**: Mainland China users can use optimized image sources
+#### ⚠️ Important Notes
-### ⚠️ Important Notes
1️⃣ **When deploying v1.8.0 or later for the first time**, please pay special attention to the `suadmin` super administrator account information output in the Docker logs. This account has the highest system privileges, and the password is only displayed upon first generation. It cannot be viewed again later, so please be sure to save it securely.
+> This account is used for permission management only and cannot develop agents or create knowledge bases. Log in with this account and complete: Access tenant resources → Create tenant → Create tenant administrator, then log in with the tenant administrator account to use all features. For role permissions, see [User Management](../user-guide/user-management).
+
2️⃣ Forgot to note the `suadmin` account password? Follow these steps:
+
```bash
-# Step1: Delete su account record in supabase container
+# Step 1: Delete su account record in supabase container
docker exec -it supabase-db-mini bash
psql -U postgres
select id, email from auth.users;
@@ -57,12 +85,12 @@ select id, email from auth.users;
delete from auth.users where id = 'your_user_id';
delete from auth.identities where user_id = 'your_user_id';
-# Step2: Delete su account record in nexent database
+# Step 2: Delete su account record in nexent database
docker exec -it nexent-postgresql bash
psql -U root -d nexent
delete from nexent.user_tenant_t where user_id = 'your_user_id';
-# Step3: Redeploy and record the su account password
+# Step 3: Redeploy and record the su account password
```
### 3. Access Your Installation
@@ -77,21 +105,54 @@ When deployment completes successfully:
## 🏗️ Service Architecture
-Nexent uses a microservices architecture with the following core services:
+Nexent uses a microservices architecture deployed via Docker Compose.
-**Core Services:**
-- `nexent`: Backend service (port 5010)
-- `nexent-web`: Frontend interface (port 3000)
-- `nexent-data-process`: Data processing service (port 5012)
+**Application Services:**
+| Service | Description | Default Port |
+|---------|-------------|--------------|
+| nexent | Backend service | 5010 |
+| nexent-web | Web frontend | 3000 |
+| nexent-data-process | Data processing service | 5012 |
+| nexent-northbound | Northbound API service | 5013 |
**Infrastructure Services:**
-- `nexent-postgresql`: Database (port 5434)
-- `nexent-elasticsearch`: Search engine (port 9210)
-- `nexent-minio`: Object storage (port 9010, console 9011)
-- `redis`: Cache service (port 6379)
+| Service | Description |
+|---------|-------------|
+| nexent-postgresql | Relational database |
+| nexent-elasticsearch | Search and indexing engine |
+| nexent-minio | S3-compatible object storage |
+| redis | Caching layer |
+
+**Supabase Services (when `supabase` is selected):**
+| Service | Description |
+|---------|-------------|
+| supabase-kong | API Gateway |
+| supabase-auth | Authentication service |
+| supabase-db-mini | Database service |
**Optional Services:**
-- `nexent-openssh-server`: SSH server for Terminal tool (port 2222)
+| Service | Description |
+|---------|-------------|
+| nexent-openssh-server | SSH terminal for AI agents |
+| nexent-monitoring | Optional observability stack |
+
+Internal services communicate using the Docker internal network.
+
+## 💾 Data Persistence
+
+Nexent uses Docker volumes for data persistence:
+
+| Data Type | Volume Name | Default Host Path |
+|-----------|------------------|-------------------|
+| PostgreSQL | nexent-postgresql-data | `{dataDir}/postgresql` |
+| Elasticsearch | nexent-elasticsearch-data | `{dataDir}/elasticsearch` |
+| Redis | nexent-redis-data | `{dataDir}/redis` |
+| MinIO | nexent-minio-data | `{dataDir}/minio` |
+| Supabase DB (when `supabase` is selected) | nexent-supabase-db-data | `{dataDir}/supabase-db` |
+
+Default `dataDir` is `./volumes` (configurable via `ROOT_DIR` in `.env`).
+
+Uninstall is handled by `docker/uninstall.sh`. It prompts before deleting persistent data by default; you can also pass `--delete-volumes true|false`, `--remove-volumes`, `--keep-volumes`, or use `bash uninstall.sh delete-all` to remove containers and persistent data.
## 🔌 Port Mapping
@@ -100,6 +161,7 @@ Nexent uses a microservices architecture with the following core services:
| Web Interface | 3000 | 3000 | Main application access |
| Backend API | 5010 | 5010 | Backend service |
| Data Processing | 5012 | 5012 | Data processing API |
+| Northbound API | 5013 | 5013 | Northbound interface service (A2A/MCP integration) |
| PostgreSQL | 5432 | 5434 | Database connection |
| Elasticsearch | 9200 | 9210 | Search engine API |
| MinIO API | 9000 | 9010 | Object storage API |
@@ -109,6 +171,240 @@ Nexent uses a microservices architecture with the following core services:
For complete port mapping details, see our [Dev Container Guide](../deployment/devcontainer.md#port-mapping).
+## 🔧 Advanced Configuration
+
+### Monitoring Configuration
+
+Select the `monitoring` component in the deployment script UI to enable OpenTelemetry monitoring. The script synchronizes `ENABLE_TELEMETRY`, `MONITORING_PROVIDER`, and `MONITORING_DASHBOARD_URL` in `docker/.env`, then starts the matching observability services from `docker/docker-compose-monitoring.yml`.
+
+```bash
+cd nexent/docker
+bash deploy.sh
+```
+
+If `docker/deploy.options` already exists, the script asks whether to reuse local configuration. Choose to reconfigure/overwrite local configuration, then select `monitoring` in the component menu and manually choose `grafana`, `phoenix`, `langfuse`, `langsmith`, `zipkin`, or `otlp` in the provider menu.
+
+Supported providers:
+
+| Provider | Purpose | Default URL |
+|----------|---------|-------------|
+| `otlp` | OpenTelemetry Collector only, useful for forwarding to an external platform | No dashboard |
+| `phoenix` | Local Phoenix trace analysis | `http://localhost:6006` |
+| `langfuse` | Local Langfuse observability stack | `http://localhost:3001` |
+| `langsmith` | Forwarding to hosted LangSmith | `https://smith.langchain.com/` |
+| `grafana` | Local Grafana + Tempo | `http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1` |
+| `zipkin` | Local Zipkin | `http://localhost:9411` |
+
+To change ports, image versions, or local Langfuse bootstrap credentials, copy and edit the monitoring environment file first:
+
+```bash
+cp docker/monitoring/monitoring.env.example docker/monitoring/monitoring.env
+```
+
+Common variables:
+
+| Variable | Description |
+|----------|-------------|
+| `MONITORING_PROVIDER` | Default monitoring provider; updated when you choose a provider in the deployment script |
+| `OTEL_COLLECTOR_HTTP_PORT` / `OTEL_COLLECTOR_GRPC_PORT` | Published OTLP HTTP/gRPC ports |
+| `LANGSMITH_API_KEY` / `LANGSMITH_PROJECT` | LangSmith forwarding configuration |
+| `LANGFUSE_INIT_USER_EMAIL` / `LANGFUSE_INIT_USER_PASSWORD` | Local Langfuse bootstrap admin |
+| `GRAFANA_ADMIN_USER` / `GRAFANA_ADMIN_PASSWORD` | Local Grafana admin |
+
+Before choosing the `langsmith` provider, configure `LANGSMITH_API_KEY` in `docker/monitoring/monitoring.env`. If you only need to connect to an existing external Collector, adjust the OTLP target in `docker/.env`:
+
+```bash
+ENABLE_TELEMETRY=true
+MONITORING_PROVIDER=otlp
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
+OTEL_EXPORTER_OTLP_PROTOCOL=http
+MONITORING_DASHBOARD_URL=
+```
+
+> **Production note**: Replace default passwords, secrets, and the Langfuse `ENCRYPTION_KEY`. Restrict dashboard and Collector access with a reverse proxy or firewall.
+
+### OAuth Login Configuration
+
+OAuth login requires the `supabase` component. When enabling third-party login, deploy `supabase` and set `OAUTH_CALLBACK_BASE_URL` to the browser-accessible Nexent Web URL.
+
+```bash
+bash deploy.sh --components infrastructure,application,supabase
+```
+
+For Docker, configure OAuth in `docker/.env`:
+
+```bash
+# Web entry URL. The full callback path is generated as:
+# {OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=
+OAUTH_CALLBACK_BASE_URL=http://localhost:3000
+
+# GitHub OAuth
+GITHUB_OAUTH_CLIENT_ID=
+GITHUB_OAUTH_CLIENT_SECRET=
+
+# GDE OAuth
+GDE_URL=
+GDE_OAUTH_CLIENT_ID=
+GDE_OAUTH_CLIENT_SECRET=
+
+# Link App OAuth
+LINK_APP_URL=
+LINK_APP_OAUTH_CLIENT_ID=
+LINK_APP_OAUTH_CLIENT_SECRET=
+
+# WeChat OAuth
+ENABLE_WECHAT_OAUTH=false
+WECHAT_OAUTH_APP_ID=
+WECHAT_OAUTH_APP_SECRET=
+
+# TLS verification when contacting OAuth providers
+OAUTH_SSL_VERIFY=true
+OAUTH_CA_BUNDLE=
+```
+
+Provider enablement rules:
+
+| Provider | Required variables | Callback URL |
+|----------|--------------------|--------------|
+| GitHub | `GITHUB_OAUTH_CLIENT_ID`, `GITHUB_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=github` |
+| GDE | `GDE_URL`, `GDE_OAUTH_CLIENT_ID`, `GDE_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=gde` |
+| Link App | `LINK_APP_URL`, `LINK_APP_OAUTH_CLIENT_ID`, `LINK_APP_OAUTH_CLIENT_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=link_app` |
+| WeChat | `ENABLE_WECHAT_OAUTH=true`, `WECHAT_OAUTH_APP_ID`, `WECHAT_OAUTH_APP_SECRET` | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=wechat` |
+
+For local Docker, a GitHub callback example is `http://localhost:3000/api/user/oauth/callback?provider=github`. In production, use a public HTTPS domain such as `https://nexent.example.com/api/user/oauth/callback?provider=github` and register the exact same URL in the OAuth provider console.
+
+### CAS Login Configuration
+
+CAS SSO does not require the `supabase` component. Set `CAS_CALLBACK_BASE_URL` to the browser-accessible Nexent Web URL without a trailing `/`. `CAS_SERVER_URL` is the CAS Server root URL and should also not include a trailing `/`.
+
+For Docker, configure CAS in `docker/.env`:
+
+```bash
+CAS_ENABLED=true
+CAS_SERVER_URL=http://localhost:8080/cas
+CAS_VALIDATE_PATH=/p3/serviceValidate
+CAS_CALLBACK_BASE_URL=http://localhost:3000
+
+# disabled: disable the CAS login entry and automatic redirects
+# button: show CAS as an optional login button
+# force: redirect unauthenticated Nexent users to CAS automatically
+CAS_LOGIN_MODE=force
+
+# Empty means use ; set userName to read
+CAS_USER_ATTRIBUTE=
+CAS_EMAIL_ATTRIBUTE=email
+CAS_ROLE_ATTRIBUTE=role
+CAS_TENANT_ATTRIBUTE=tenant_id
+CAS_ROLE_MAP_JSON={"cas-admin":"ADMIN","cas-user":"USER"}
+CAS_SESSION_MAX_AGE_SECONDS=3600
+LOCAL_SESSION_MAX_AGE_SECONDS=3600
+CAS_RENEW_BEFORE_SECONDS=300
+CAS_RENEW_TIMEOUT_SECONDS=10
+CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local
+
+# Empty means Nexent logout will not call the CAS Server logout endpoint.
+# /logout is resolved against CAS_SERVER_URL.
+CAS_LOGOUT_URL=/logout
+CAS_SSL_VERIFY=true
+CAS_CA_BUNDLE=
+```
+
+Common CAS URLs:
+
+| Purpose | URL |
+|---------|-----|
+| Nexent login entry | `{CAS_CALLBACK_BASE_URL}/api/user/cas/login?redirect=/` |
+| CAS service callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/callback` |
+| CAS silent renewal callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/renew_callback` |
+| CAS single logout callback | `POST {CAS_CALLBACK_BASE_URL}/api/user/cas/logout_callback` |
+
+For Apereo CAS JSON Service Registry, create a service registration file such as `Nexent-10001.json` in the service registry directory configured by your CAS deployment. The `id` must be globally unique. This is a local Docker example:
+
+```json
+{
+ "@class": "org.apereo.cas.services.RegexRegisteredService",
+ "serviceId": "http://localhost:3000.*",
+ "name": "Nexent CAS Client",
+ "id": 10001,
+ "description": "Nexent CAS SSO client",
+ "evaluationOrder": 1,
+ "logoutType": "BACK_CHANNEL",
+ "logoutUrl": "http://localhost:3000/api/user/cas/logout_callback"
+}
+```
+
+In production, keep `CAS_SSL_VERIFY=true`; for self-signed certificates, prefer `CAS_CA_BUNDLE` and only use `CAS_SSL_VERIFY=false` for local testing.
+
+#### CAS Integration with ModelEngine
+
+When integrating with ModelEngine through the CAS protocol, deploy Nexent with the following configuration:
+
+```bash
+CAS_ENABLED=true
+CAS_SERVER_URL=https://:5443/SSOSvr
+CAS_VALIDATE_PATH=/p3/serviceValidate
+CAS_CALLBACK_BASE_URL=http://:3000
+CAS_LOGIN_MODE=force
+CAS_USER_ATTRIBUTE=userName
+CAS_EMAIL_ATTRIBUTE=email
+CAS_ROLE_ATTRIBUTE=userType
+CAS_TENANT_ATTRIBUTE=tenant_id
+CAS_ROLE_MAP_JSON={"1":"ADMIN","3":"DEV"}
+CAS_SESSION_MAX_AGE_SECONDS=3600
+LOCAL_SESSION_MAX_AGE_SECONDS=3600
+CAS_RENEW_BEFORE_SECONDS=300
+CAS_RENEW_TIMEOUT_SECONDS=10
+CAS_SYNTHETIC_EMAIL_DOMAIN=cas.local
+CAS_LOGOUT_URL=/logout?service=http://:3000
+CAS_SSL_VERIFY=false
+CAS_CA_BUNDLE=
+```
+
+You also need to add a CAS client service registration file in the OMS container. Use the following steps as a reference:
+
+```bash
+# Create the registration file, paste the JSON content into it, and save it.
+vim Nexent-10000001.json
+{
+ "@class": "org.apereo.cas.services.CasRegisteredService",
+ "serviceId": "http://:3000.*",
+ "name": "Nexent CAS Client",
+ "id": 1000001,
+ "description": "Nexent CAS SSO client",
+ "evaluationOrder": 1,
+ "logoutType": "BACK_CHANNEL",
+ "logoutUrl": "http://:3000/api/user/cas/logout_callback"
+}
+
+# Run the following command to copy the registration file into the container.
+kubectl cp Nexent-10000001.json model-engine/$(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}'):/opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json
+kubectl exec -i -n model-engine $(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}') -- chown tomcat:fusioncube /opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json
+```
+
+### Northbound Interface Configuration (NORTHBOUND_EXTERNAL_URL)
+
+If you need to use any of the following features, configure the `NORTHBOUND_EXTERNAL_URL` environment variable:
+
+1. **A2A Protocol Integration** - Third-party systems calling Nexent agents via A2A protocol
+2. **MCP Tool Access** - Using MCP protocol to access Nexent resources like documents
+
+**Configuration:**
+
+Set the publicly accessible URL in your `.env` file:
+
+```bash
+# Format: protocol://host:port/api
+# Local development (default):
+NORTHBOUND_EXTERNAL_URL=http://localhost:5013/api
+
+# Production - use your public IP or domain:
+NORTHBOUND_EXTERNAL_URL=http://your-public-ip:5013/api
+# or
+NORTHBOUND_EXTERNAL_URL=https://api.yourdomain.com/api
+```
+
+> **Important**: The URL must include the `/api` suffix because the Northbound service uses FastAPI's `root_path="/api"` configuration.
+
## 💡 Need Help
- Browse the [FAQ](./faq) for common install issues
@@ -119,4 +415,4 @@ For complete port mapping details, see our [Dev Container Guide](../deployment/d
Want to build from source or add new features? Check the [Docker Build Guide](../deployment/docker-build) for step-by-step instructions.
-For detailed setup instructions and customization options, see our [Developer Guide](../developer-guide/overview).
\ No newline at end of file
+For detailed setup instructions and customization options, see our [Developer Guide](../developer-guide/overview).
diff --git a/doc/docs/en/quick-start/kubernetes-installation.md b/doc/docs/en/quick-start/kubernetes-installation.md
index 44ca3c993..a10873c7c 100644
--- a/doc/docs/en/quick-start/kubernetes-installation.md
+++ b/doc/docs/en/quick-start/kubernetes-installation.md
@@ -35,21 +35,29 @@ cd nexent/k8s/helm
Run the deployment script:
```bash
-./deploy-helm.sh apply
+./deploy.sh
```
-After executing this command, the system will prompt for configuration options:
+After running the command, the script opens Bash TUI menus for configuration. Use arrow keys or `j/k` to move, Space to toggle multi-select items, Enter to confirm, `b`/Backspace to go back, and `q` to quit.
-**Version Selection:**
-- **Speed version (Lightweight & Fast Deployment, Default)**: Quick startup of core features, suitable for individual users and small teams
-- **Full version (Complete Feature Edition)**: Provides enterprise-level tenant management and resource isolation features, includes Supabase authentication
+**Deployment Components:**
+- **infrastructure (required)**: Elasticsearch, PostgreSQL, Redis, MinIO
+- **application (selected by default, optional)**: config, runtime, mcp, northbound, web
+- **data-process (optional)**: data processing service
+- **supabase (optional)**: enables user, tenant, and authentication features
+- **terminal (optional)**: enables the OpenSSH terminal tool
+- **monitoring (optional)**: enables observability components and then prompts for a provider
-**Image Source Selection:**
-- **Mainland China**: Uses optimized regional mirrors for faster image pulling
-- **General**: Uses standard Docker Hub registries
+**Port Policy:**
+- **development (default)**: uses NodePort for Web and selected debug/internal services
+- **production**: keeps internal services as ClusterIP and exposes only production entrypoints
-**Optional Components:**
-- **Terminal Tool**: Enables openssh-server for AI agent shell command execution
+**Image Source:**
+- **general (default)**: uses standard public registries
+- **mainland**: uses mainland China mirrors
+- **local-latest**: uses local `latest` images and local-friendly pull policies for Nexent application images
+
+After a successful deployment, non-sensitive choices are saved to `k8s/helm/deploy.options`. The next interactive deployment can reuse the local config or run a full reconfiguration.
### ⚠️ Important Notes
@@ -72,7 +80,7 @@ kubectl exec -it -n nexent deploy/nexent-postgresql -- psql -U root -d nexent -c
"DELETE FROM nexent.user_tenant_t WHERE user_id='your_user_id';"
# Step 3: Re-deploy and record the su account password
-./deploy-helm.sh apply
+./deploy.sh
```
### 4. Access Your Installation
@@ -113,7 +121,7 @@ Nexent uses a microservices architecture deployed via Helm charts:
| nexent-redis | Caching layer |
| nexent-minio | S3-compatible object storage |
-**Supabase Services (Full Version Only):**
+**Supabase Services (when `supabase` is selected):**
| Service | Description |
|---------|-------------|
| nexent-supabase-kong | API Gateway |
@@ -124,13 +132,14 @@ Nexent uses a microservices architecture deployed via Helm charts:
| Service | Description |
|---------|-------------|
| nexent-openssh-server | SSH terminal for AI agents |
+| nexent-monitoring | Optional observability stack |
## 🔌 Port Mapping
| Service | Internal Port | NodePort | Description |
|---------|---------------|----------|-------------|
| Web Interface | 3000 | 30000 | Main application access |
-| Northbound API | 5010 | 30013 | Northbound API service |
+| Northbound API | 5013 | 30013 | Northbound API service |
| SSH Server | 22 | 30022 | Terminal tool access |
For internal service communication, services use Kubernetes internal DNS (e.g., `http://nexent-config:5010`).
@@ -141,34 +150,261 @@ Nexent uses PersistentVolumes for data persistence:
| Data Type | PersistentVolume | Default Host Path |
|-----------|------------------|-------------------|
-| Elasticsearch | nexent-elasticsearch-pv | `{dataDir}/elasticsearch` |
-| PostgreSQL | nexent-postgresql-pv | `{dataDir}/postgresql` |
-| Redis | nexent-redis-pv | `{dataDir}/redis` |
-| MinIO | nexent-minio-pv | `{dataDir}/minio` |
-| Supabase DB (Full) | nexent-supabase-db-pv | `{dataDir}/supabase-db` |
+| Elasticsearch | nexent-elasticsearch-pv | `/var/lib/nexent-data/nexent-elasticsearch` |
+| PostgreSQL | nexent-postgresql-pv | `/var/lib/nexent-data/nexent-postgresql` |
+| Redis | nexent-redis-pv | `/var/lib/nexent-data/nexent-redis` |
+| MinIO | nexent-minio-pv | `/var/lib/nexent-data/nexent-minio` |
+| Supabase DB (when `supabase` is selected) | nexent-supabase-db-pv | `/var/lib/nexent-data/nexent-supabase-db` |
-Default `dataDir` is `/var/lib/nexent-data` (configurable in `values.yaml`).
+Helm uninstall does not delete local hostPath data by default. Use `./uninstall.sh --delete-local-data true` to delete known Nexent local volume contents under `/var/lib/nexent-data/nexent-*`, or `--keep-local-data` to preserve them explicitly.
## 🔧 Deployment Commands
```bash
# Deploy with interactive prompts
-./deploy-helm.sh apply
+./deploy.sh
+
+# Non-interactive deployment with the default component set
+./deploy.sh --components infrastructure,application --port-policy development --image-source general
+
+# Enable user/tenant features, data processing, and terminal
+./deploy.sh --components infrastructure,application,supabase,data-process,terminal
# Deploy with mainland China image sources
-./deploy-helm.sh apply --is-mainland Y
+./deploy.sh --image-source mainland
-# Deploy full version (with Supabase)
-./deploy-helm.sh apply --deployment-version full
+# Use local latest images
+./deploy.sh --image-source local-latest
# Clean helm state only (fixes stuck releases)
-./deploy-helm.sh clean
+./uninstall.sh clean
+
+# Uninstall; local data is preserved by default, with interactive prompts for namespace and local data deletion
+./uninstall.sh
+
+# Uninstall and delete the namespace
+./uninstall.sh --delete-namespace true
+
+# Uninstall and delete local hostPath data
+./uninstall.sh --delete-local-data true
+
+# Complete uninstall including namespace and local hostPath data
+./uninstall.sh delete-all
+
+# Complete uninstall but preserve local hostPath data
+./uninstall.sh delete-all --keep-local-data
+```
+
+## 🔧 Advanced Configuration
+
+### Monitoring Configuration
+
+Kubernetes deployments enable monitoring through the `monitoring` component in the deployment script UI. The deployment script renders runtime Helm values for `global.monitoring.enabled`, `global.monitoring.provider`, and `global.monitoring.dashboardUrl`, and enables the `nexent-monitoring` subchart.
+
+```bash
+cd nexent/k8s/helm
+./deploy.sh
+```
+
+If `k8s/helm/deploy.options` already exists, the script asks whether to reuse local configuration. Choose to reconfigure/overwrite local configuration, then select `monitoring` in the component menu and manually choose `grafana`, `phoenix`, `langfuse`, `langsmith`, `zipkin`, or `otlp` in the provider menu.
+
+Supported providers:
+
+| Provider | Purpose | Default URL |
+|----------|---------|-------------|
+| `otlp` | OpenTelemetry Collector only, useful for forwarding to an external platform | No dashboard |
+| `phoenix` | Local Phoenix trace analysis | `http://localhost:30006` |
+| `langfuse` | Local Langfuse observability stack | `http://localhost:30001` |
+| `langsmith` | Forwarding to hosted LangSmith | `https://smith.langchain.com/` |
+| `grafana` | Local Grafana + Tempo | `http://localhost:30002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1` |
+| `zipkin` | Local Zipkin | `http://localhost:30011` |
+
+Before choosing the `langsmith` provider, configure `global.monitoring.langsmithApiKey` and `global.monitoring.langsmithProject` in `k8s/helm/nexent/values.yaml`. To change local Grafana, Langfuse, or dashboard ports, adjust the values file first, then re-run the deployment script, choose to reconfigure, and manually select `monitoring`.
+
+Common Helm values:
+
+| Value | Description |
+|-------|-------------|
+| `global.monitoring.enabled` | Enables OpenTelemetry export in the Nexent backend |
+| `global.monitoring.provider` | Backend provider label: `otlp`, `phoenix`, `langfuse`, `langsmith`, `grafana`, `zipkin` |
+| `global.monitoring.otlpEndpoint` | Backend OTLP HTTP endpoint, default `http://nexent-otel-collector:4318` |
+| `global.monitoring.dashboardUrl` | Frontend monitoring entry URL; leave empty to hide the entry |
+| `global.monitoring.traceContentMode` | Trace content capture mode: `summary`, `metrics`, or `full` |
+| `nexent-monitoring..service.nodePort` | NodePort override for provider dashboards |
+| `nexent-monitoring.langfuse.init.*` | Local Langfuse bootstrap organization, project, and admin account |
+| `nexent-monitoring.grafana.adminUser` / `adminPassword` | Local Grafana admin credentials |
+
+Check monitoring status:
+
+```bash
+kubectl get pods -n nexent | grep -E 'otel|phoenix|grafana|tempo|zipkin|langfuse'
+kubectl get svc -n nexent | grep -E 'otel|phoenix|grafana|zipkin|langfuse'
+```
+
+> **Production note**: Replace default passwords, secrets, and the Langfuse `encryptionKey`. Prefer ClusterIP services or a controlled Ingress for dashboards.
+
+### OAuth Login Configuration
-# Uninstall but preserve data
-./deploy-helm.sh delete
+OAuth login requires the `supabase` component. When enabling third-party login, deploy `supabase` and set `config.oauth.callbackBaseUrl` to the browser-accessible Nexent Web URL.
-# Complete uninstall including all data
-./deploy-helm.sh delete-all
+```bash
+./deploy.sh --components infrastructure,application,supabase
+```
+
+Kubernetes writes OAuth settings into backend environment variables through `nexent-common` `config.oauth.*` values:
+
+```bash
+helm upgrade --install nexent nexent \
+ --namespace nexent --create-namespace \
+ --set global.deploymentComponents.supabase=true \
+ --set nexent-supabase-kong.enabled=true \
+ --set nexent-supabase-auth.enabled=true \
+ --set nexent-supabase-db.enabled=true \
+ --set nexent-common.config.oauth.callbackBaseUrl=https://nexent.example.com \
+ --set nexent-common.config.oauth.githubClientId=your_github_client_id \
+ --set nexent-common.config.oauth.githubClientSecret=your_github_client_secret
+```
+
+Configurable OAuth values:
+
+| Value | Environment variable | Description |
+|-------|----------------------|-------------|
+| `nexent-common.config.oauth.callbackBaseUrl` | `OAUTH_CALLBACK_BASE_URL` | Web entry URL; the callback path is appended automatically |
+| `nexent-common.config.oauth.githubClientId` | `GITHUB_OAUTH_CLIENT_ID` | GitHub OAuth Client ID |
+| `nexent-common.config.oauth.githubClientSecret` | `GITHUB_OAUTH_CLIENT_SECRET` | GitHub OAuth Client Secret |
+| `nexent-common.config.oauth.gdeUrl` | `GDE_URL` | GDE OAuth service URL |
+| `nexent-common.config.oauth.gdeClientId` | `GDE_OAUTH_CLIENT_ID` | GDE OAuth Client ID |
+| `nexent-common.config.oauth.gdeClientSecret` | `GDE_OAUTH_CLIENT_SECRET` | GDE OAuth Client Secret |
+| `nexent-common.config.oauth.enableWechat` | `ENABLE_WECHAT_OAUTH` | Enables WeChat OAuth |
+| `nexent-common.config.oauth.wechatClientId` | `WECHAT_OAUTH_APP_ID` | WeChat App ID |
+| `nexent-common.config.oauth.wechatClientSecret` | `WECHAT_OAUTH_APP_SECRET` | WeChat App Secret |
+| `nexent-common.config.oauth.sslVerify` | `OAUTH_SSL_VERIFY` | Whether to verify provider TLS certificates |
+| `nexent-common.config.oauth.caBundle` | `OAUTH_CA_BUNDLE` | Custom CA bundle path |
+
+Provider callback URLs:
+
+| Provider | Callback URL |
+|----------|--------------|
+| GitHub | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=github` |
+| GDE | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=gde` |
+| WeChat | `{OAUTH_CALLBACK_BASE_URL}/api/user/oauth/callback?provider=wechat` |
+
+For local NodePort, a GitHub callback example is `http://localhost:30000/api/user/oauth/callback?provider=github`. In production, use a public HTTPS domain and register the exact same URL in the OAuth provider console.
+
+### CAS Login Configuration
+
+CAS SSO does not require the `supabase` component. Set `nexent-common.config.cas.callbackBaseUrl` to the browser-accessible Nexent Web URL without a trailing `/`. `nexent-common.config.cas.serverUrl` is the CAS Server root URL and should also not include a trailing `/`.
+
+Kubernetes writes CAS settings into backend environment variables through `nexent-common` `config.cas.*` values:
+
+```bash
+helm upgrade --install nexent nexent \
+ --namespace nexent --create-namespace \
+ --set nexent-common.config.cas.enabled=true \
+ --set nexent-common.config.cas.serverUrl=https://cas.example.com/cas \
+ --set nexent-common.config.cas.callbackBaseUrl=https://nexent.example.com \
+ --set nexent-common.config.cas.loginMode=force \
+ --set nexent-common.config.cas.logoutUrl=/logout
+```
+
+Configurable CAS values:
+
+| Value | Environment variable | Description |
+|-------|----------------------|-------------|
+| `nexent-common.config.cas.enabled` | `CAS_ENABLED` | Enables CAS |
+| `nexent-common.config.cas.serverUrl` | `CAS_SERVER_URL` | CAS Server root URL |
+| `nexent-common.config.cas.validatePath` | `CAS_VALIDATE_PATH` | serviceValidate path, default `/p3/serviceValidate` |
+| `nexent-common.config.cas.callbackBaseUrl` | `CAS_CALLBACK_BASE_URL` | Web entry URL; CAS callback paths are appended automatically |
+| `nexent-common.config.cas.loginMode` | `CAS_LOGIN_MODE` | `disabled`, `button`, or `force` |
+| `nexent-common.config.cas.userAttribute` | `CAS_USER_ATTRIBUTE` | User identifier attribute. Empty means use `` |
+| `nexent-common.config.cas.emailAttribute` | `CAS_EMAIL_ATTRIBUTE` | Email attribute |
+| `nexent-common.config.cas.roleAttribute` | `CAS_ROLE_ATTRIBUTE` | Role attribute |
+| `nexent-common.config.cas.tenantAttribute` | `CAS_TENANT_ATTRIBUTE` | Tenant attribute |
+| `nexent-common.config.cas.roleMapJson` | `CAS_ROLE_MAP_JSON` | JSON mapping from CAS roles to Nexent roles |
+| `nexent-common.config.cas.sessionMaxAgeSeconds` | `CAS_SESSION_MAX_AGE_SECONDS` | Maximum local CAS session lifetime |
+| `nexent-common.config.cas.localSessionMaxAgeSeconds` | `LOCAL_SESSION_MAX_AGE_SECONDS` | Nexent local session lifetime |
+| `nexent-common.config.cas.renewBeforeSeconds` | `CAS_RENEW_BEFORE_SECONDS` | Trigger silent renewal within this many seconds before expiry |
+| `nexent-common.config.cas.renewTimeoutSeconds` | `CAS_RENEW_TIMEOUT_SECONDS` | Silent renewal timeout |
+| `nexent-common.config.cas.syntheticEmailDomain` | `CAS_SYNTHETIC_EMAIL_DOMAIN` | Domain used when CAS does not return an email |
+| `nexent-common.config.cas.logoutUrl` | `CAS_LOGOUT_URL` | CAS logout URL. Empty means Nexent logout will not call the CAS Server logout endpoint |
+| `nexent-common.config.cas.sslVerify` | `CAS_SSL_VERIFY` | Whether to verify CAS Server TLS certificates |
+| `nexent-common.config.cas.caBundle` | `CAS_CA_BUNDLE` | Custom CA bundle path |
+
+Common CAS URLs:
+
+| Purpose | URL |
+|---------|-----|
+| Nexent login entry | `{CAS_CALLBACK_BASE_URL}/api/user/cas/login?redirect=/` |
+| CAS service callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/callback` |
+| CAS silent renewal callback | `{CAS_CALLBACK_BASE_URL}/api/user/cas/renew_callback` |
+| CAS single logout callback | `POST {CAS_CALLBACK_BASE_URL}/api/user/cas/logout_callback` |
+
+For Apereo CAS JSON Service Registry, create a service registration file such as `Nexent-10001.json` in the service registry directory configured by your CAS deployment. The `id` must be globally unique. This is a local NodePort example:
+
+```json
+{
+ "@class": "org.apereo.cas.services.RegexRegisteredService",
+ "serviceId": "http://localhost:30000.*",
+ "name": "Nexent CAS Client",
+ "id": 10001,
+ "description": "Nexent CAS SSO client",
+ "evaluationOrder": 1,
+ "logoutType": "BACK_CHANNEL",
+ "logoutUrl": "http://localhost:30000/api/user/cas/logout_callback"
+}
+```
+
+In production, keep `CAS_SSL_VERIFY=true`; for self-signed certificates, prefer `CAS_CA_BUNDLE` and only use `CAS_SSL_VERIFY=false` for local testing.
+
+#### CAS Integration with ModelEngine
+
+When integrating with ModelEngine through the CAS protocol, use a values file to configure Nexent. This avoids complex command-line escaping for `CAS_ROLE_MAP_JSON`.
+
+Create `cas-modelengine-values.yaml`:
+
+```yaml
+nexent-common:
+ config:
+ cas:
+ enabled: true
+ serverUrl: "https://:5443/SSOSvr"
+ validatePath: "/p3/serviceValidate"
+ callbackBaseUrl: "http://:30000"
+ loginMode: "force"
+ userAttribute: "userName"
+ emailAttribute: "email"
+ roleAttribute: "userType"
+ tenantAttribute: "tenant_id"
+ roleMapJson: '{"1":"ADMIN","3":"DEV"}'
+ sessionMaxAgeSeconds: 3600
+ localSessionMaxAgeSeconds: 3600
+ renewBeforeSeconds: 300
+ renewTimeoutSeconds: 10
+ syntheticEmailDomain: "cas.local"
+ logoutUrl: "/logout?service=http://:30000"
+ sslVerify: false
+ caBundle: ""
+```
+
+You also need to add a CAS client service registration file in the OMS container. Use the following steps as a reference:
+
+```bash
+# Create the registration file, paste the JSON content into it, and save it.
+vim Nexent-10000001.json
+{
+ "@class": "org.apereo.cas.services.CasRegisteredService",
+ "serviceId": "http://:30000.*",
+ "name": "Nexent CAS Client",
+ "id": 1000001,
+ "description": "Nexent CAS SSO client",
+ "evaluationOrder": 1,
+ "logoutType": "BACK_CHANNEL",
+ "logoutUrl": "http://:30000/api/user/cas/logout_callback"
+}
+
+# Run the following command to copy the registration file into the container.
+kubectl cp Nexent-10000001.json model-engine/$(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}'):/opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json
+kubectl exec -i -n model-engine $(kubectl get pods -n model-engine -l app=oms --no-headers | awk '{print $1}') -- chown tomcat:fusioncube /opt/huawei/fce/apps/platform/webapps/SSOSvr/WEB-INF/classes/services/Nexent-10000001.json
```
## 🔍 Troubleshooting
diff --git a/doc/docs/en/quick-start/kubernetes-upgrade-guide.md b/doc/docs/en/quick-start/kubernetes-upgrade-guide.md
index 293358d2f..75afcfba9 100644
--- a/doc/docs/en/quick-start/kubernetes-upgrade-guide.md
+++ b/doc/docs/en/quick-start/kubernetes-upgrade-guide.md
@@ -15,7 +15,7 @@ Follow these steps to upgrade Nexent on Kubernetes safely:
Before updating, record the current deployment version and data directory information.
- Current Deployment Version Location: `APP_VERSION` in `backend/consts/const.py`
-- Data Directory Location: `global.dataDir` in `k8s/helm/nexent/values.yaml`
+- Local volume directories: each Helm sub-chart's `storage.hostPath`, defaulting to `/var/lib/nexent-data/nexent-*`
**Code downloaded via git**
@@ -28,7 +28,7 @@ git pull
**Code downloaded via ZIP package or other means**
1. Re-download the latest code from GitHub and extract it.
-2. Copy the `.deploy.options` file from the `k8s/helm` directory of your previous deployment to the new code directory. (If the file doesn't exist, you can ignore this step).
+2. Copy the `deploy.options` file from the `k8s/helm` directory of your previous deployment to the new code directory. (If the file doesn't exist, you can ignore this step).
## 🔄 Step 2: Execute the Upgrade
@@ -36,10 +36,10 @@ Navigate to the k8s/helm directory of the updated code and run the deployment sc
```bash
cd k8s/helm
-./deploy-helm.sh apply
+./deploy.sh
```
-The script will detect your previous deployment settings (version, image source, etc.) from the `.deploy.options` file. If the file is missing, you will be prompted to enter configuration details.
+The script will detect your saved deployment settings (components, port policy, image source, etc.) from `deploy.options`. If the file is missing, you will be prompted to enter configuration details.
> 💡 Tip
> If you need to configure voice models (STT/TTS), please edit the corresponding values in `values.yaml` or pass them via command line.
@@ -137,7 +137,7 @@ kubectl exec -i $POSTGRES_POD -n nexent -- psql -U root -d nexent < ./sql/v2.0.0
kubectl exec nexent/$POSTGRES_POD -n nexent -- pg_dump -U root nexent > backup_$(date +%F).sql
```
-> - For Supabase database (full version only), use `nexent-supabase-db` pod instead:
+> - For the Supabase database (when `supabase` is selected), use the `nexent-supabase-db` pod instead:
```bash
SUPABASE_POD=$(kubectl get pods -n nexent -l app=nexent-supabase-db -o jsonpath='{.items[0].metadata.name}')
diff --git a/doc/docs/en/quick-start/upgrade-guide.md b/doc/docs/en/quick-start/upgrade-guide.md
index 497212e06..3bc22f254 100644
--- a/doc/docs/en/quick-start/upgrade-guide.md
+++ b/doc/docs/en/quick-start/upgrade-guide.md
@@ -38,11 +38,11 @@ Navigate to the docker directory of the updated code and run the upgrade script:
bash upgrade.sh
```
-If deploy.options is missing, the script will prompt you to manually enter configuration details from the previous deployment, such as the current version and data directory. Enter the information you recorded earlier.
+If deploy.options is missing, the script will prompt you to select deployment settings again, such as components, port policy, and image source. Choose the same options you used for the previous deployment.
>💡 Tip
-> The default scenario is quick deployment, which uses .env.example.
-> If you need to configure voice models (STT/TTS), please add the relevant variables to .env.example in advance. We will provide a front-end configuration interface as soon as possible.
+> If `docker/.env` is missing, the deploy script automatically copies it from `.env.example`.
+> If you need to configure voice models (STT/TTS), add the relevant variables to `docker/.env`. We will provide a front-end configuration interface as soon as possible.
## 🌐 Step 3: Verify the deployment
diff --git a/doc/docs/en/sdk/data-process.md b/doc/docs/en/sdk/data-process.md
index 2d11202b1..614c4b438 100644
--- a/doc/docs/en/sdk/data-process.md
+++ b/doc/docs/en/sdk/data-process.md
@@ -43,10 +43,10 @@ def file_process(self,
## 📁 Supported File Formats
-- **Text files**: .txt, .md, .csv
-- **Documents**: .pdf, .docx, .pptx
+- **Text files**: .txt, .md, .csv, .json
+- **Documents**: .pdf, .docx, .pptx, .epub
- **Images**: .jpg, .png, .gif (with OCR)
-- **Web content**: HTML, URLs
+- **Web content**: HTML, URLs, XML
- **Archives**: .zip, .tar
## 💡 Usage Examples
diff --git a/doc/docs/en/sdk/monitoring.md b/doc/docs/en/sdk/monitoring.md
index 4aa625132..bb7c1db13 100644
--- a/doc/docs/en/sdk/monitoring.md
+++ b/doc/docs/en/sdk/monitoring.md
@@ -1,289 +1,327 @@
-# 🚀 Nexent LLM Monitoring System
+# Nexent Agent Observability (OTLP)
-Enterprise-grade monitoring solution specifically designed for monitoring LLM token generation speed and performance.
+Enterprise-grade observability for AI agents using OpenTelemetry OTLP protocol. Supports integration with observability platforms like Arize Phoenix, Langfuse, LangSmith, Grafana Tempo, Zipkin, and more.
-## 📊 System Architecture
+## Architecture
```
-┌─────────────────────────────────────────────────────────┐
-│ Nexent LLM Monitoring System │
-├─────────────────────────────────────────────────────────┤
-│ │
-│ Nexent API ──► OpenTelemetry ──► Jaeger (Tracing) │
-│ │ │ │
-│ │ └──────► Prometheus (Metrics) │
-│ │ │ │
-│ └─► OpenAI LLM └──► Grafana (Visualization) │
-│ (Token Monitoring) │
-└─────────────────────────────────────────────────────────┘
+NexentAgent ──► OpenTelemetry SDK ──► OTLP Collector ──► Arize Phoenix / Langfuse / LangSmith / Grafana Tempo / Zipkin / OTLP Backend
+ │ │
+ │ OpenInference Semantics │
+ │ (llm.*, agent.* attributes) │
+ └────────────────────────────────────────┘
```
-## ⚡ Quick Start (5 minutes)
+## Quick Start
```bash
-# 1. Start monitoring services
-./docker/start-monitoring.sh
+cd docker
+[ -f .env ] || cp .env.example .env
+cp monitoring/monitoring.env.example monitoring/monitoring.env
-# 2. Install performance monitoring dependencies
-uv sync --extra performance
+vim .env
+ENABLE_TELEMETRY=true
+MONITORING_PROVIDER=otlp
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
+OTEL_EXPORTER_OTLP_PROTOCOL=http
-# 3. Enable monitoring
-export ENABLE_TELEMETRY=true
+vim monitoring/monitoring.env
+MONITORING_PROVIDER=otlp
-# 4. Start backend service
-python backend/config_service.py
-python backend/runtime_service.py
+./start-monitoring.sh --stack collector
```
-## 📊 Access Monitoring Interfaces
+## AI Observability Platforms
-| Interface | URL | Purpose |
-|-----------|-----|---------|
-| **Grafana Dashboard** | http://localhost:3005 | LLM Performance Monitoring |
-| **Jaeger Tracing** | http://localhost:16686 | Request Trace Analysis |
-| **Prometheus Metrics** | http://localhost:9090 | Raw Monitoring Data |
+### Arize Phoenix
-### 🔐 Grafana Login Information
+Arize Phoenix provides AI-specific observability with OpenInference semantic support.
-When first accessing Grafana (http://localhost:3005), you need to login:
+**Configuration:**
+```bash
+MONITORING_PROVIDER=phoenix
+OTEL_EXPORTER_OTLP_ENDPOINT=https://app.phoenix.arize.com/s/YOUR_SPACE
+OTEL_EXPORTER_OTLP_AUTHORIZATION="Bearer YOUR_PHOENIX_API_KEY"
+OTEL_EXPORTER_OTLP_PROTOCOL=http
+OTEL_EXPORTER_OTLP_METRICS_ENABLED=false
```
-Username: admin
-Password: admin
-```
-
-**After first login, you'll be prompted to change password:**
-- Set a new password (recommended)
-- Click "Skip" to skip (development environment)
-**After login, you can see:**
-- 📊 **LLM Performance Dashboard** - Pre-configured performance dashboard
-- 📈 **Data Source Configuration** - Auto-connected to Prometheus and Jaeger
-- 🎯 **Real-time Monitoring Panel** - Key metrics like token generation speed, latency
+**Features:**
+- LLM trace visualization with prompt/completion
+- Token-level performance metrics
+- Agent step tracing
+- Cost analysis
-## 🎯 Core Features
+### Langfuse
-### ⚡ LLM-Specific Monitoring
-- **Token Generation Speed**: Real-time monitoring of tokens generated per second
-- **TTFT (Time to First Token)**: First token return latency
-- **Streaming Response Analysis**: Generation timestamp for each token
-- **Model Performance Comparison**: Performance benchmarks across different models
+Langfuse offers prompt management and LLM observability with OTLP support.
-### 🔍 Distributed Tracing
-- **Complete Request Chain**: End-to-end tracing from HTTP to LLM
-- **Performance Bottleneck Detection**: Automatically identify slow queries and anomalies
-- **Error Root Cause Analysis**: Quickly locate problem sources
+**Configuration:**
-### 🛠️ Developer-Friendly Design
-- **One-Line Integration**: Quick monitoring with decorators
-- **Zero-Dependency Degradation**: Auto-skip when monitoring dependencies are missing
-- **Zero-Touch Usage**: No need to manually check monitoring status, handled automatically
-- **Flexible Configuration**: Environment variable controlled behavior
-
-## 🛠️ Adding Monitoring to Code
+```bash
+MONITORING_PROVIDER=langfuse
+OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel
-### 🎯 Recommended Approach: Singleton Pattern (v2.1+)
+LANGFUSE_PUBLIC_KEY=pk-xxx
+LANGFUSE_SECRET_KEY=sk-xxx
-```python
-# Backend service usage - directly use globally configured monitoring_manager
-from utils.monitoring import monitoring_manager
-
-# API endpoint monitoring
-@monitoring_manager.monitor_endpoint("my_service.my_function")
-async def my_api_function():
- return {"status": "ok"}
+OTEL_EXPORTER_OTLP_AUTHORIZATION=Basic BASE64_ENCODED_KEY
+OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION=4
+```
-# LLM call monitoring
-@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion")
-def call_llm(messages):
- # Automatically get token-level monitoring
- return llm_response
+Generate the encoded key:
-# Manual monitoring events
-monitoring_manager.add_span_event("custom_event", {"key": "value"})
-monitoring_manager.set_span_attributes(user_id="123", action="process")
+```bash
+echo -n "$LANGFUSE_PUBLIC_KEY:$LANGFUSE_SECRET_KEY" | base64
```
-### 📦 Direct SDK Usage
+**Features:**
+- Prompt versioning and management
+- Session-based trace grouping
+- User feedback collection
+- Model cost tracking
-```python
-from nexent.monitor import get_monitoring_manager
-
-# Get global monitoring manager - already configured in backend
-monitor = get_monitoring_manager()
-
-# Use decorators
-@monitor.monitor_llm_call("claude-3", "completion")
-def my_llm_function():
- return "response"
-
-# Or use directly in business logic
-with monitor.trace_llm_request("custom_operation", "my_model") as span:
- # Execute business logic
- result = process_data()
- monitor.add_span_event("processing_completed")
- return result
-```
+### LangSmith
-### ✨ Global Configuration Automation
+LangSmith supports online OTLP trace ingestion through the OpenTelemetry endpoint. Nexent can send traces to a local Collector first, and the Collector forwards them to LangSmith.
-Monitoring configuration is auto-initialized in `backend/utils/monitoring.py`:
+**Collector forwarding:**
-```python
-# No manual configuration needed - auto-completed at system startup
-# monitoring_manager already configured with environment variables
-from utils.monitoring import monitoring_manager
+```bash
+cd docker
+vim monitoring/monitoring.env
-# Direct usage without checking if enabled
-@monitoring_manager.monitor_endpoint("my_function")
-def my_function():
- pass
+MONITORING_PROVIDER=langsmith
+LANGSMITH_API_KEY=lsv2_xxx
+LANGSMITH_PROJECT=nexent
+LANGSMITH_OTLP_TRACES_ENDPOINT=https://api.smith.langchain.com/otel/v1/traces
-# FastAPI application initialization
-monitoring_manager.setup_fastapi_app(app)
+./start-monitoring.sh --stack langsmith
```
-### 🔒 Auto Start/Stop Design
-
-- **Smart Monitoring**: Auto start/stop based on `ENABLE_TELEMETRY` environment variable
-- **Zero-Touch Usage**: External code doesn't need to check monitoring status, use all features directly
-- **Graceful Degradation**: Silent no-effect when disabled, normal operation when enabled
-- **Default Off**: Auto-disabled when not configured
+Nexent backend configuration when it sends OTLP to the Collector:
```bash
-# Enable monitoring
-export ENABLE_TELEMETRY=true
-
-# Disable monitoring
-export ENABLE_TELEMETRY=false
+ENABLE_TELEMETRY=true
+MONITORING_PROVIDER=langsmith
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
+OTEL_EXPORTER_OTLP_PROTOCOL=http
+OTEL_EXPORTER_OTLP_METRICS_ENABLED=false
```
-## 📊 Core Monitoring Metrics
+For direct backend-to-LangSmith export, set `OTEL_EXPORTER_OTLP_ENDPOINT=https://api.smith.langchain.com/otel`, `LANGSMITH_API_KEY`, and optionally `LANGSMITH_PROJECT`.
-| Metric | Description | Importance |
-|--------|-------------|------------|
-| `llm_token_generation_rate` | Token generation speed (tokens/s) | ⭐⭐⭐ |
-| `llm_time_to_first_token_seconds` | First token latency | ⭐⭐⭐ |
-| `llm_request_duration_seconds` | Complete request duration | ⭐⭐⭐ |
-| `llm_total_tokens` | Input/output token count | ⭐⭐ |
-| `llm_error_count` | LLM call error count | ⭐⭐⭐ |
+### Zipkin
-## 🔧 Environment Configuration
+Zipkin provides a lightweight local trace query UI. For local deployment, Nexent sends OTLP to the Collector, and the Collector forwards traces to Zipkin.
```bash
-# Add to .env file
-cat >> .env << EOF
-ENABLE_TELEMETRY=true
-SERVICE_NAME=nexent-backend
-JAEGER_ENDPOINT=http://localhost:14268/api/traces
-LLM_SLOW_REQUEST_THRESHOLD_SECONDS=5.0
-LLM_SLOW_TOKEN_RATE_THRESHOLD=10.0
-TELEMETRY_SAMPLE_RATE=1.0 # Development environment, production recommended 0.1
-EOF
+MONITORING_PROVIDER=zipkin
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
+OTEL_EXPORTER_OTLP_PROTOCOL=http
+MONITORING_DASHBOARD_URL=http://localhost:9411
```
-## 🛠️ System Verification
+Set `MONITORING_DASHBOARD_URL` to the browser-accessible monitoring UI URL. The backend returns this value to the frontend top bar without deriving a provider-specific path.
```bash
-# Check metrics endpoint
-curl http://localhost:8000/metrics
-
-# Verify dependency installation
-python -c "from backend.utils.monitoring import MONITORING_AVAILABLE; print(f'Monitoring Available: {MONITORING_AVAILABLE}')"
+MONITORING_DASHBOARD_URL=http://localhost:6006
+MONITORING_DASHBOARD_URL=http://localhost:3001/project/nexent
+MONITORING_DASHBOARD_URL=http://localhost:3002/d/nexent-llm-agent/nexent-agent-trace-monitoring?orgId=1
+MONITORING_DASHBOARD_URL=http://localhost:9411
```
-## 🆘 Troubleshooting
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `ENABLE_TELEMETRY` | `false` | Enable/disable monitoring |
+| `MONITORING_PROVIDER` | `otlp` | Provider profile: `otlp`, `phoenix`, `langfuse`, `langsmith`, `grafana`, `zipkin` |
+| `MONITORING_DASHBOARD_URL` | (empty) | Browser-accessible monitoring UI URL used by the frontend top bar |
+| `MONITORING_PROJECT_NAME` | `nexent` | Observability platform project name |
+| `MONITORING_TRACE_CONTENT_MODE` | `summary` | Trace payload mode: `summary` records bounded previews plus metadata, `metrics` records only structure/size metadata, `full` keeps full payloads subject to `MONITORING_TRACE_MAX_CHARS` |
+| `MONITORING_TRACE_MAX_CHARS` | `4000` | Maximum characters for each payload preview written to trace attributes |
+| `MONITORING_TRACE_MAX_ITEMS` | `20` | Maximum dict keys/list items included in payload previews |
+| `OTEL_SERVICE_NAME` | `nexent-backend` | Service identifier |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | `http://localhost:4318` | OTLP base endpoint; SDK derives `/v1/traces` and `/v1/metrics` |
+| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | (empty) | Optional trace-specific endpoint |
+| `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT` | (empty) | Optional metric-specific endpoint |
+| `OTEL_EXPORTER_OTLP_PROTOCOL` | `http` | Protocol: `http` or `grpc` |
+| `OTEL_EXPORTER_OTLP_HEADERS` | (empty) | Generic auth headers (comma-separated) |
+| `OTEL_EXPORTER_OTLP_AUTHORIZATION` | (empty) | `Authorization` header, commonly used by Phoenix bearer auth and Langfuse |
+| `OTEL_EXPORTER_OTLP_X_API_KEY` | (empty) | `x-api-key` header for platforms that require it |
+| `OTEL_EXPORTER_OTLP_LANGFUSE_INGESTION_VERSION` | (empty) | Langfuse ingestion version, for example `4` |
+| `OTEL_EXPORTER_OTLP_METRICS_ENABLED` | `true` | Whether to export OTLP metrics |
+| `LANGSMITH_API_KEY` | (empty) | LangSmith API key; mapped to the `x-api-key` OTLP header |
+| `LANGSMITH_PROJECT` | (empty) | Optional LangSmith project header |
+| `LANGSMITH_OTLP_TRACES_ENDPOINT` | `https://api.smith.langchain.com/otel/v1/traces` | Collector trace endpoint for online LangSmith |
+
+## Code Integration
+
+### Agent Boundary Context
+
+At the request boundary, business code only binds the resolved user and Agent metadata once. The SDK then creates Agent, LLM, and Tool spans from the runtime lifecycle:
-### No monitoring data?
-```bash
-# Check service status
-docker-compose -f docker/docker-compose-monitoring.yml ps
+```python
+from nexent.monitor.agent_observability import AgentRunMetadata
+from utils.monitoring import monitoring_manager
-# Check dependency installation
-python -c "import opentelemetry; print('✅ Monitoring dependencies installed')"
+monitoring_manager.bind_agent_context(AgentRunMetadata(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ agent_id=agent_request.agent_id,
+ conversation_id=agent_request.conversation_id,
+ query=agent_request.query,
+ is_debug=agent_request.is_debug,
+ language=language,
+))
```
-### Port conflicts?
-```bash
-# Check port usage
-lsof -i :3005 -i :9090 -i :16686
+`monitor_endpoint` is still kept as a compatibility API and low-level escape hatch, but it is no longer the recommended way to add normal Agent observability.
+
+### Trace Payload Policy
+
+Tool input/output, retriever output, and Langfuse-compatible `input.value` / `output.value` attributes share the same payload policy. By default Nexent writes a bounded preview plus structured metadata such as `type`, `size_chars`, `item_count`, `truncated`, and `keys`. Memory search spans intentionally record only result summaries and statistics, not full memory text bodies.
+
+Agent context metrics are emitted from the SDK lifecycle. Each action step records an `agent.step.metrics` event with estimated context tokens, compression calls, cache hits, compression ratio, and token threshold. The final Agent span also receives aggregate step count, max context size, average compression ratio, total compression calls, and cache hit totals.
+
+### LLM Call Monitoring
+
+```python
+@monitoring_manager.monitor_llm_call("gpt-4", "chat_completion")
+def call_llm(messages):
+ return llm_response
```
-### Dependency installation issues?
-```bash
-# Reinstall performance dependencies
-uv sync --extra performance
+### Agent Step Tracing
-# Check performance configuration in pyproject.toml
-cat backend/pyproject.toml | grep -A 20 "performance"
+```python
+with monitoring_manager.trace_agent_step("agent.run.loop", step_type="agent_loop") as span:
+ result = execute_tool()
+ monitoring_manager.set_tool_output(result)
```
-### Service name shows as unknown_service?
-```bash
-# Check environment variable configuration
-echo "SERVICE_NAME: $SERVICE_NAME"
+### Tool Call Tracing
-# Restart monitoring service to apply new configuration
-./docker/start-monitoring.sh
+```python
+with monitoring_manager.trace_tool_call("web_search", "agent_name", {"query": "test"}) as span:
+ results = search_web("test")
+ monitoring_manager.set_tool_output({"results": results})
```
-## 🧹 Data Management
+### Retriever Call Tracing
-### Clean Jaeger Trace Data
-```bash
-# Method 1: Restart Jaeger container (simplest)
-docker-compose -f docker/docker-compose-monitoring.yml restart nexent-jaeger
+Knowledge-base search tools are classified as retriever spans automatically by the SDK. Custom retriever integrations can use the same semantics directly:
-# Method 2: Completely rebuild Jaeger container and data
-docker-compose -f docker/docker-compose-monitoring.yml stop nexent-jaeger
-docker-compose -f docker/docker-compose-monitoring.yml rm -f nexent-jaeger
-docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-jaeger
+```python
+with monitoring_manager.trace_retriever_call("knowledge_base_search", "agent_name", {"query": "test"}) as span:
+ documents = search_knowledge_base("test")
+ monitoring_manager.set_retriever_output(documents)
+```
-# Method 3: Clean all monitoring data (rebuild all containers)
-docker-compose -f docker/docker-compose-monitoring.yml down
-docker-compose -f docker/docker-compose-monitoring.yml up -d
+## OpenInference Semantic Attributes
+
+The system uses OpenInference semantic conventions for AI-specific observability:
+
+### LLM Attributes
+
+| Attribute | Description |
+|-----------|-------------|
+| `llm.model_name` | Model identifier (e.g., `gpt-4`) |
+| `llm.operation.name` | Operation type (e.g., `chat_completion`) |
+| `llm.token_count.prompt` | Input token count |
+| `llm.token_count.completion` | Output token count |
+| `llm.invocation_parameters` | Model parameters (JSON) |
+| `llm.time_to_first_token` | TTFT in seconds |
+
+### Agent Attributes
+
+| Attribute | Description |
+|-----------|-------------|
+| `agent.name` | Agent identifier |
+| `agent.step.name` | Step name (e.g., `web_search`) |
+| `agent.step.type` | Step type: `tool_call`, `reasoning`, `action_selection` |
+| `agent.tool.name` | Tool name |
+| `agent.tool.input` | Tool input preview using the configured trace payload policy |
+| `agent.tool.input.*` | Structured tool input metadata: type, size, item count, truncation, keys |
+| `agent.tool.output` | Tool output preview using the configured trace payload policy |
+| `agent.tool.output.*` | Structured tool output metadata: type, size, item count, truncation, keys |
+| `agent.tool.success` | Whether the tool call completed successfully |
+| `agent.tool.duration_ms` | Tool call duration |
+| `retriever.name` | Retriever name |
+| `retrieval.query` | Retriever query |
+| `retrieval.results.count` | Retriever result count |
+| `retrieval.top_score` | Highest numeric result score when available |
+| `retriever.input.*` | Structured retriever input metadata |
+| `retriever.output` | Retriever output preview using the configured trace payload policy |
+| `retriever.output.*` | Structured retriever output metadata |
+| `context.tokens.estimated_input` | Estimated context input tokens per Agent step event |
+| `context.tokens.uncompressed_estimated` | Estimated uncompressed context tokens per Agent step event |
+| `context.compression.calls` | Compression calls per Agent step event |
+| `context.compression.cache_hits` | Compression cache hits per Agent step event |
+| `context.compression.ratio` | Compression ratio per Agent step event |
+
+## Metrics
+
+| Metric | Description |
+|--------|-------------|
+| `llm.request.duration` | Request latency |
+| `llm.token.generation_rate` | Tokens per second |
+| `llm.time_to_first_token` | TTFT |
+| `llm.token_count.prompt` | Input tokens |
+| `llm.token_count.completion` | Output tokens |
+| `agent.step.count` | Agent step count |
+| `agent.execution.duration` | Agent execution time |
+| `agent.error.count` | Agent errors |
+
+## Collector Configuration
+
+By default, the OpenTelemetry Collector only logs data through the debug exporter. This avoids forwarding data back into itself when no external backend is configured. To forward through the Collector, add a platform exporter:
+
+```yaml
+exporters:
+ otlphttp/langsmith:
+ traces_endpoint: https://api.smith.langchain.com/otel/v1/traces
+ headers:
+ x-api-key: YOUR_LANGSMITH_API_KEY
+ Langsmith-Project: nexent
+
+service:
+ pipelines:
+ traces:
+ exporters: [otlphttp/langsmith, debug]
```
-### Clean Prometheus Metrics Data
-```bash
-# Restart Prometheus container
-docker-compose -f docker/docker-compose-monitoring.yml restart nexent-prometheus
+See `docker/monitoring/otel-collector-config.yml` for full configuration with platform examples.
-# Completely clean Prometheus data
-docker-compose -f docker/docker-compose-monitoring.yml stop nexent-prometheus
-docker volume rm docker_prometheus_data 2>/dev/null || true
-docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-prometheus
-```
+## Graceful Degradation
-### Clean Grafana Configuration
-```bash
-# Reset Grafana configuration and dashboards
-docker-compose -f docker/docker-compose-monitoring.yml stop nexent-grafana
-docker volume rm docker_grafana_data 2>/dev/null || true
-docker-compose -f docker/docker-compose-monitoring.yml up -d nexent-grafana
+When OpenTelemetry dependencies are not installed, monitoring gracefully disables:
+
+```python
+pip install nexent # Basic package - no monitoring
+pip install nexent[performance] # With OTLP support
```
-## 📈 Typical Problem Analysis
+All monitoring methods work without errors when disabled - decorators pass through, context managers yield None.
-### Slow token generation (< 5 tokens/s)
-1. **Analysis**: Grafana → Token Generation Rate panel
-2. **Solution**: Check model service load, optimize input prompt length
+## Troubleshooting
-### Slow request response (> 10s)
-1. **Analysis**: Jaeger → View complete trace chain
-2. **Solution**: Locate bottleneck (database/LLM/network)
+### No data appearing
-### Error rate spike (> 10%)
-1. **Analysis**: Prometheus → llm_error_count metric
-2. **Solution**: Check model service availability, verify API keys
+1. Check `ENABLE_TELEMETRY=true` in `.env`
+2. Verify OTLP endpoint is reachable
+3. Check authentication headers are correct
-## 🎉 Getting Started
+### Connection errors
-After setup completion, you can:
+1. Test endpoint: `curl -v $OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces`
+2. Verify protocol matches endpoint (`http` vs `grpc`)
+3. Check Collector logs: `docker logs nexent-otel-collector`
-1. 📊 View **LLM Performance Dashboard** in Grafana
-2. 🔍 Trace complete request chains in Jaeger
-3. 📈 Analyze token generation speed and performance bottlenecks
-4. 🚨 Set performance alerts and thresholds
+### Wrong attributes
-Enjoy efficient LLM performance monitoring! 🚀
+1. Verify OpenInference attributes in platform UI
+2. Check span attribute naming: `llm.model_name` not `model_name`
+3. Review platform-specific attribute requirements
diff --git a/doc/docs/en/user-guide/agent-development.md b/doc/docs/en/user-guide/agent-development.md
index db2614f7d..8e6b47d4f 100644
--- a/doc/docs/en/user-guide/agent-development.md
+++ b/doc/docs/en/user-guide/agent-development.md
@@ -31,15 +31,98 @@ You can configure other collaborative agents for your created agent, as well as
### 🤝 Collaborative Agents
+Collaborative agents help the current agent complete complex tasks. The sources of collaborative agents are divided into two categories:
+
+- **Internal Agents**: Published agents on the platform
+- **External A2A Agents**: Third-party agents discovered through the A2A protocol
+
1. Click the plus sign under the "Collaborative Agent" tab to open the selectable agent list
-2. Select the agents you want to add from the dropdown list
-3. Multiple collaborative agents can be selected
-4. Click × to remove an agent from the selection
+2. The agent list is divided into two tabs: "Internal Agent" and "External A2A Agent". You can choose based on your needs
+3. Select the agent you want to add from the dropdown list
+4. Multiple collaborative agents can be selected
+5. Click × to remove an agent from the selection
+
+
+
+
+
+#### 🌐 Add External A2A Agents
+
+Nexent supports communication with third-party agents through the A2A protocol. You can discover external A2A agents in the following two ways:
+
+##### Discover Agent via URL
+
+If you know the Agent Card address of the target agent, you can use the URL discovery method:
-
+
+1. In the External A2A Agent list, click the "Add External Agent" button
+2. Select the "URL Discovery" tab
+3. Fill in the Agent Card URL address, for example: `https://example.com/.well-known/agent.json`
+4. Click the "Discover" button; the system will automatically retrieve the agent's related information
+5. After successful discovery, you can view the agent's name, description, capabilities and other information
+6. Click "Add to List" to complete the addition
+
+> 💡 **Tip**: The Agent Card is an Agent description file that complies with the A2A 1.0 specification, containing the agent's name, description, calling address, capabilities and other information.
+
+##### Discover Agent via Nacos
+
+If your agent is registered with the Nacos service discovery platform, you can use the Nacos discovery method:
+
+
+
+
+
+1. In the External A2A Agent list, click the "Add External Agent" button
+2. Select the "Nacos Discovery" tab
+3. For first-time use, you need to configure the Nacos connection information:
+ - **Nacos Server Address**: Fill in the Nacos server address, such as `http://127.0.0.1:8848`
+ - **Namespace ID**: Fill in the Nacos namespace ID (optional)
+ - **Group Name**: Fill in the service group name, default is `DEFAULT_GROUP`
+ - **Username/Password**: Fill in the Nacos access credentials (optional)
+4. Click "Save Configuration" to save the Nacos connection information
+5. Fill in the Agent service name to scan
+6. Click the "Scan" button; the system will obtain matching Agent information from Nacos
+7. The scan results will list all matching Agents. You can select the agents you need and add them to the list
+
+> ⚠️ **Note**: Make sure the Nacos service is running properly and the target Agent is correctly registered with Nacos.
+
+##### Manage Discovered External Agents
+
+In the External A2A Agent list, you can view and manage all discovered external agents:
+
+
+
+
+
+1. **View Agent Details**: Click on the agent card to view its complete information, including name, description, URL, capability list, etc.
+2. **Test Agent**: Click the "Test" button to send a test message to the agent and verify if it is working properly
+3. **Chat with Agent**: Click the "Chat" button to open a chat window and interact with the agent in real time
+4. **Configure Calling Protocol**: Click the "Protocol Configuration" button to select the calling protocol for this agent:
+ - **HTTP + JSON**: Use REST API style calls
+ - **JSON-RPC**: Use JSON-RPC protocol calls
+5. **Refresh Agent Information**: If the agent information changes, click the "Refresh" button to re-fetch the latest Agent Card
+6. **Remove Agent**: Click the "Remove" button to delete the agent from the discovered list
+
+> 💡 **Use Cases**:
+> - Quickly integrate known third-party agent services through URL discovery
+> - Batch integrate all agents from the same service registry through Nacos discovery
+> - Configure protocols to meet the requirements of different agent service providers
+
+###### Integrate [DataAgent](https://gitcode.com/datagallery/dataagent) A2A Agent via URL
+
+1. Refer to the [DataAgent documentation](https://gitcode.com/datagallery/dataagent#%F0%9F%8C%90-a2a-10-%E6%9C%8D%E5%8A%A1%E6%A8%A1%E5%BC%8F) and start DataAgent in A2A service mode.
+ > Nexent does not currently support agents that require authentication. Do not set `auth-token` when starting DataAgent.
+
+
+
+
+
+2. Refer to [Discover Agent via URL](#discover-agent-via-url) to integrate the agent. The URL is `http://:9999/.well-known/agent-card.json`.
+3. Refer to [Manage Discovered External Agents](#manage-discovered-external-agents) to configure the invocation protocol, and select HTTP + JSON for integration.
+
### 🛠️ Select Agent Tools
Agents can use various tools to complete tasks, such as knowledge base search, file parsing, image parsing, email sending/receiving, file management, and other local tools. They can also integrate third-party MCP tools or custom tools.
@@ -60,6 +143,8 @@ Agents can use various tools to complete tasks, such as knowledge base search, f
> 2. Please select the `analyze_text_file` tool to enable the parsing function for document and text files.
> 3. Please select the `analyze_image` tool to enable the parsing function for image files.
>
+> ⚠️ **Embedding Model Configuration**: When using the `knowledge_base_search` tool, ensure that the knowledge base has an embedding model configured. For existing knowledge bases, the system will prompt you to select an embedding model. Make sure to select **the same embedding model used when creating the knowledge base**. If the selected model differs from the one used during knowledge base creation, it may cause search failures or inaccurate results.
+>
> 📚 Want to learn about all the built-in local tools available in the system? Please refer to [Local Tools Overview](./local-tools/index.md).
### 🔌 Add MCP Tools
@@ -108,6 +193,39 @@ You can add MCP services to Nexent in the following two ways:
Many third-party services such as [ModelScope](https://www.modelscope.cn/mcp) provide MCP services, which you can quickly integrate and use.
You can also develop your own MCP services and connect them to Nexent; see [MCP Tool Development](../backend/tools/mcp).
+**3️⃣ Convert Stock API to MCP Service**
+
+🔔 This method is suitable for quickly converting existing REST API endpoints into MCP tools without additional development, allowing agents to call existing API capabilities:
+
+>1. In the MCP Config module, select **"API to MCP"** as the access type
+>
+>2. Fill in the API basic information in the input box below:
+> - **Service Name**: Display name for the MCP service
+> - **OpenAPI JSON**: OpenAPI 3.x specification in JSON format
+> - **Base Service URL**: Base address of the API service (supports http/https)
+>
+>3. Click the **+ Add** button in the lower right corner to complete the MCP service conversion
+
+
+
+
+
+>4. After conversion, you can view all externally converted MCP tools in the **Outer APIs** tab
+
+
+
+
+
+
+
+
+
+>💡 **Use Cases**:
+>- Quickly integrate internal enterprise REST API endpoints
+>- Convert third-party service HTTP APIs into MCP tools
+>- Generate tools directly from OpenAPI specifications without writing MCP Server code
+
+
### ⚙️ Custom Tools
You can refer to the following guides to develop your own tools and integrate them into Nexent to enrich agent capabilities:
@@ -129,7 +247,7 @@ Nexent provides a "Tool Testing" capability for all types of tools—whether the
- The test `query`, such as "benefits of vitamin C"
- The search `search_mode` (default is `hybrid`)
- The target index list `index_names`, such as `["Medical", "Vitamin Encyclopedia"]`
- - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page
+ - If `index_names` is not entered, it will default to searching all knowledge bases selected on the knowledge base page
6. After entering the parameters, click "Execute Test" to start the test and view the test results below
@@ -181,6 +299,134 @@ After completing the initial agent configuration, you can debug the agent and fi
After successful debugging, click the "Save" button in the lower right corner, and the agent will be saved and appear in the agent list.
+## 📋 Version Management
+
+Nexent supports agent version management. You can save different versions of agent configurations during the debugging process.
+
+Once the agent configuration is verified, you can publish the agent. After publishing, the agent will be visible in the Agent Space and Start Chat pages.
+
+
+
+If you need to rollback to a previous version, click the "Rollback" button on the version management page.
+
+
+
+### 🚀 Publish as A2A Agent
+
+Nexent supports exposing published agents as A2A Agents for external systems to call. When publishing a version, you can check the "Publish as A2A Agent" option to register the current agent as an A2A 1.0 compliant Agent.
+
+
+
+
+
+After successful publishing, the system will display the A2A Agent's call information:
+
+
+
+
+
+| Field | Description |
+|-------|-------------|
+| **Endpoint ID** | Unique identifier for the A2A Agent |
+| **Agent Card URL** | Agent discovery endpoint; external systems use this address to retrieve Agent descriptions |
+| **Protocol Version** | A2A protocol version; currently 1.0 |
+| **REST Endpoints** | REST-style API endpoints |
+| **JSON-RPC Endpoint** | JSON-RPC 2.0 protocol calling endpoint |
+
+#### Calling Methods
+
+The published A2A Agent supports the following two calling protocols:
+
+##### REST API
+
+```bash
+# Get Agent Card (for Agent discovery)
+GET /nb/a2a/{endpoint_id}/.well-known/agent-card.json
+
+# Send synchronous message
+POST /nb/a2a/{endpoint_id}/message:send
+Content-Type: application/json
+
+{
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+}
+
+# Send streaming message (SSE)
+POST /nb/a2a/{endpoint_id}/message:stream
+Content-Type: application/json
+
+{
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+}
+
+# Get task status
+GET /nb/a2a/{endpoint_id}/tasks/{task_id}
+```
+
+##### JSON-RPC 2.0
+
+```bash
+POST /nb/a2a/{endpoint_id}/v1
+Content-Type: application/json
+
+# Send synchronous message
+{
+ "jsonrpc": "2.0",
+ "method": "SendMessage",
+ "params": {
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+ },
+ "id": 1
+}
+
+# Send streaming message
+{
+ "jsonrpc": "2.0",
+ "method": "SendStreamingMessage",
+ "params": {
+ "message": {
+ "role": "user",
+ "content": "Please help me complete a task"
+ }
+ },
+ "id": 2
+}
+
+# Get task status
+{
+ "jsonrpc": "2.0",
+ "method": "GetTask",
+ "params": {
+ "taskId": "task_abc123"
+ },
+ "id": 3
+}
+```
+
+> 💡 **Tips**:
+> - For local development, replace the `/nb/a2a` prefix with `http://localhost:5013/nb/a2a`
+> - For production environments, replace the prefix with your server domain name or public IP address
+
+> ⚠️ **Notes**:
+> - Calling A2A Agents requires carrying valid authentication information in the request headers
+> - Agent Card information is cached with a refresh interval of 1 hour
+> - If you need to update Agent information, you need to republish the agent version
+
+When an agent is published as an A2A-compliant Agent, users can view the detailed A2A Agent calling information by clicking the button shown below in the agent list:
+
+
+
+
+
## 📋 Manage Agents
In the agent list on the left, you can perform the following operations on existing agents:
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg
new file mode 100644
index 000000000..399af1c56
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-detail.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg
new file mode 100644
index 000000000..5c523f7b1
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-discovery-list.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg
new file mode 100644
index 000000000..4c42104ec
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-find-detail.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg
new file mode 100644
index 000000000..fdfa2e826
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-nacos-discovery.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg
new file mode 100644
index 000000000..5c523f7b1
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-published-as.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg
new file mode 100644
index 000000000..4632206fb
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/a2a-url-discovery.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png
new file mode 100644
index 000000000..2cce2a44a
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png
new file mode 100644
index 000000000..12e9358c5
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_1.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png
new file mode 100644
index 000000000..4221b41f5
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/add_mcp_from_api_2.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/dataagent_deploy.png b/doc/docs/en/user-guide/assets/agent-development/dataagent_deploy.png
new file mode 100644
index 000000000..46fa9fde3
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/dataagent_deploy.png differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg
new file mode 100644
index 000000000..fdfa2e826
Binary files /dev/null and b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.jpg differ
diff --git a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png b/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png
deleted file mode 100644
index 7f47ba1a2..000000000
Binary files a/doc/docs/en/user-guide/assets/agent-development/set-collaboration.png and /dev/null differ
diff --git a/doc/docs/en/user-guide/knowledge-base.md b/doc/docs/en/user-guide/knowledge-base.md
index e5e5714ff..05456e5fa 100644
--- a/doc/docs/en/user-guide/knowledge-base.md
+++ b/doc/docs/en/user-guide/knowledge-base.md
@@ -26,12 +26,14 @@ Create and manage knowledge bases, upload documents, and generate summaries. Kno
### Supported File Formats
Nexent supports multiple file formats, including:
-- **Text:** .txt, .md
+- **Text:** .txt, .md, .csv, .json
- **PDF:** .pdf
- **Word:** .docx
- **PowerPoint:** .pptx
+- **EPUB:** .epub
- **Excel:** .xlsx
- **Data files:** .csv
+- **Web content:** .html, .xml
## 📊 Knowledge Base Summary
diff --git a/doc/docs/en/user-guide/local-tools/index.md b/doc/docs/en/user-guide/local-tools/index.md
index 27dc72ebc..9006f415c 100644
--- a/doc/docs/en/user-guide/local-tools/index.md
+++ b/doc/docs/en/user-guide/local-tools/index.md
@@ -9,6 +9,8 @@ Local tools let agents interact with the workspace, remote hosts, and external s
- [Search Tools](./search-tools): Local/DataMate KB search plus Exa/Tavily/Linkup web search.
- [Multimodal Tools](./multimodal-tools): Download/parse/analyze text files and images.
- [Terminal Tool](./terminal-tool): Persistent SSH sessions for remote commands.
+- [SQL Tools](./sql-tools): Connect to MySQL, PostgreSQL, SQL Server to execute SQL queries.
+- [Skills](../skills): Nexent's built-in tool combinations or custom capability packs with NL generation and version management.
## ⚙️ Configuration Entry
diff --git a/doc/docs/en/user-guide/local-tools/multimodal-tools.md b/doc/docs/en/user-guide/local-tools/multimodal-tools.md
index 6780f5f1e..986682c40 100644
--- a/doc/docs/en/user-guide/local-tools/multimodal-tools.md
+++ b/doc/docs/en/user-guide/local-tools/multimodal-tools.md
@@ -4,18 +4,22 @@ title: Multimodal Tools
# Multimodal Tools
-Multimodal tools analyze text files and images with model support. URLs can be S3, HTTP, or HTTPS.
+Multimodal tools analyze text files, images, videos, and audio with model support. URLs can be S3, HTTP, or HTTPS.
## 🧭 Tool List
- `analyze_text_file`: Download and extract text, then analyze per question
- `analyze_image`: Download images and interpret them with a vision-language model
+- `analyze_video`: Download videos and analyze them with a video understanding model
+- `analyze_audio`: Download audio and analyze it with an audio understanding model
## 🧰 Example Use Cases
- Summarize documents stored in buckets
- Explain screenshots, product photos, or chart images
-- Produce per-file or per-image answers aligned with the input order
+- Understand video content, such as extracting key frame information, human actions, or scene descriptions
+- Analyze audio content, such as transcription, speaker identification, or content summarization
+- Produce per-file or per-image/video/audio answers aligned with the input order
## 🧾 Parameters & Behavior
@@ -29,16 +33,26 @@ Multimodal tools analyze text files and images with model support. URLs can be S
- `query`: User focus/question.
- Downloads each image, runs VLM analysis, and returns an array matching input order.
+### analyze_video
+- `video_url`: Video URL (`s3://bucket/key`, `/bucket/key`, `http(s)://`).
+- `query`: User focus/question.
+- Downloads the video, runs video understanding model analysis, and returns the result.
+
+### analyze_audio
+- `audio_url`: Audio URL (`s3://bucket/key`, `/bucket/key`, `http(s)://`).
+- `query`: User focus/question.
+- Downloads the audio, runs audio understanding model analysis, and returns the result.
+
## ⚙️ Prerequisites
- Configure storage access (e.g., MinIO/S3) and data processing service to fetch files.
-- Provide an LLM for `analyze_text_file` and a VLM for `analyze_image`.
+- Provide an LLM for `analyze_text_file`, a VLM for `analyze_image`, and a video understanding model for `analyze_video` and `analyze_audio` (must support audio/video input, e.g., Qwen3-Omni series).
## 🛠️ How to Use
-1. Prepare accessible URLs and confirm permissions.
-2. Call the corresponding tool with the URL list and question; multiple resources are supported at once.
-3. Use results in the same order as inputs for display or follow-up steps.
+1. Prepare accessible URLs for files, images, videos, or audio; confirm permissions.
+2. Call the corresponding tool with the URL and question; multiple resources are supported at once.
+3. Verify results before using them in follow-up steps.
## 💡 Best Practices
diff --git a/doc/docs/en/user-guide/local-tools/sql-tools.md b/doc/docs/en/user-guide/local-tools/sql-tools.md
new file mode 100644
index 000000000..859b5fbba
--- /dev/null
+++ b/doc/docs/en/user-guide/local-tools/sql-tools.md
@@ -0,0 +1,78 @@
+---
+title: SQL Database Tools
+---
+
+# SQL Database Tools
+
+The SQL database toolset enables AI agents to connect to and query relational databases such as MySQL, PostgreSQL, and SQL Server, allowing direct data access and manipulation.
+
+## Tool List
+
+- `mysql_database`: Connect to MySQL and execute SQL queries
+- `postgres_database`: Connect to PostgreSQL and execute SQL queries
+- `mssql_database`: Connect to SQL Server and execute SQL queries
+
+## Usage Scenarios
+
+- Query report data from business databases for agent analysis and summarization
+- Cross-database joins to retrieve related information scattered across multiple tables
+- Real-time queries of business status to provide agents with up-to-date data
+
+## Parameters and Behavior
+
+### Common Parameters
+
+- `sql`: The SQL query to execute (required)
+- `parameters`: Parameter values for parameterized queries (optional)
+- `max_rows`: Maximum number of rows to return (default: 100)
+- `timeout`: Query timeout in seconds (default: 10)
+
+### Database Connection Parameters
+
+| Database | Connection Parameters |
+|-------------|---------------------------------------------------------------------------|
+| MySQL | `host`, `user`, `password`, `database`, `port` (default 3306) |
+| PostgreSQL | `host`, `user`, `password`, `database`, `port` (default 5432) |
+| SQL Server | `host`, `user`, `password`, `database`, `port` (default 1433) |
+
+### Security Restrictions
+
+- Forbidden operations: `DROP DATABASE`, `GRANT`, `REVOKE`, `CREATE USER`, `INTO OUTFILE`, `LOAD DATA INFILE`
+- `UPDATE` and `DELETE` statements must include a `WHERE` clause
+- `LIMIT` is automatically added to restrict returned rows
+
+### Response Format
+
+```json
+{
+ "status": "success",
+ "columns": ["id", "name", "email"],
+ "rows": [[1, "John Doe", "john@example.com"]],
+ "row_count": 1,
+ "execution_time_ms": 45.23
+}
+```
+
+## Getting Started
+
+1. **Prepare connection info**: Obtain host address, port, database name, username, and password
+2. **Configure the tool**: Add the appropriate database tool in agent configuration and fill in connection parameters
+3. **Test connection**: Use a simple query to verify connectivity
+4. **Construct queries**: Let the agent understand natural language requirements and generate corresponding SQL
+
+## Security Best Practices
+
+- Use read-only accounts in production to limit operation permissions
+- Store sensitive information like database passwords in a key management service
+- Set reasonable `max_rows` values to avoid returning excessive data at once
+- Enable SSL/TLS encryption for database connections
+
+## Common Database Connection Examples
+
+| Database | Connection Example | Parameter Placeholder |
+|-------------|-------------------|---------------------|
+| MySQL | `localhost:3306` | `?` |
+| PostgreSQL | `localhost:5432` | `$1, $2, ...` |
+| SQL Server | `localhost:1433` | `?` |
+
+> Note: Different databases use different parameter placeholder formats. PostgreSQL uses `$1, $2`, while others use `?`.
diff --git a/doc/docs/en/user-guide/mcp-tools.md b/doc/docs/en/user-guide/mcp-tools.md
index b55859cbe..cd1190e0e 100644
--- a/doc/docs/en/user-guide/mcp-tools.md
+++ b/doc/docs/en/user-guide/mcp-tools.md
@@ -1,28 +1,159 @@
# MCP Tools
-The upcoming MCP Tools management module will let you centrally manage MCP servers and tools on a single page, easily completing connection configuration, tool synchronization, and health status monitoring.
+In the MCP Tools module, you can centrally manage all MCP (Model Context Protocol) servers and tools. It supports custom addition, Registry import, and Community import, covering connection configuration, tool synchronization, health monitoring, and community sharing.
-## 🎯 Feature Preview
+The MCP Tools page has two parallel tabs:
-1. Register and manage multiple MCP servers
-2. Quickly sync, view, and organize MCP tool lists
-3. Monitor MCP connection status and usage in real time
+- **Imported Services**: Manage MCP services already accessed by the current tenant — configure, monitor, and maintain your MCP services here.
+- **Published Services**: Manage the MCP services you have published to the community — browse, edit, and unpublish.
-## ⏳ Stay Tuned
+---
-The MCP Tools management feature is under development. We are committed to building an efficient and intuitive management platform that enables you to:
+## ➕ Add MCP Services
-1. Centrally manage all MCP servers
-2. Conveniently sync and organize tools
-3. Monitor server connections and tool runtime status in real time
+Click the **Add MCP Service** button to open the add dialog. The dialog provides three tabs, each corresponding to a different source.
-## 🚀 Related Features
+### Local Add
-While waiting for **MCP Tools** to launch, you can:
+The **Local Add** tab lets you manually configure an MCP service with two transport types.
-1. Manage your MCP tools in **[Agent Development](./agent-development)**
-2. View agent and MCP collaboration relationships through **[Agent Space](./agent-space)**
-3. Experience platform features in **[Start Chat](./start-chat)**
+#### Add via URL
-If you encounter any issues during use, please refer to our **[FAQ](../quick-start/faq)** or ask for support in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions).
+For independently deployed MCP services (HTTP / SSE), connect by entering the endpoint URL.
+
+1. In the **Local Add** tab, set **Transport Type** to "URL"
+2. Fill in the service details:
+ - **Service Name (required)**: A recognizable name for the MCP service
+ - **Service URL (required)**: The MCP service endpoint address
+ - **Description** (optional): A brief description of the service
+ - **Authorization Token** (optional): Bearer token if the service requires authentication
+3. Click **Confirm** — the system will connect to the service and retrieve the available tool list
+
+#### Add via Container Configuration
+
+For MCP services that need to run locally in a container (e.g., services launched via npx), the system automatically creates and manages a container based on your JSON configuration.
+
+1. In the **Local Add** tab, set **Transport Type** to "Container"
+2. Fill in the container configuration:
+ - **Service Name (required)**: A recognizable name for the MCP service
+ - **Description** (optional): A brief description of the service
+ - **Container Configuration JSON (required)**: Enter the standard MCP configuration format, for example:
+ ```json
+ {
+ "mcpServers": {
+ "service-name": {
+ "args": ["mcp-package-name@version"],
+ "command": "npx",
+ "env": {
+ "API_KEY": "xxxx"
+ }
+ }
+ }
+ }
+ ```
+ - **Port**: The port exposed by the container service — the system automatically detects port conflicts and suggests available ports
+3. Click **Confirm** — the system parses the JSON, creates the container, and registers the service
+
+### Import from MCP Registry
+
+Nexent integrates with the MCP Registry, allowing you to browse and import community-maintained MCP services in one click.
+
+1. Switch to the **MCP Registry** tab
+2. Browse the available MCP services — search by name or tags
+3. Click a service to view its details (description, version, required parameters, etc.)
+4. Configure required parameters (e.g., API Key and other environment variables)
+5. Click **Import** — the system automatically installs and configures the service
+
+### Import from Community
+
+Browse MCP services published by other Nexent users and quickly import them.
+
+1. Switch to the **Community Market** tab
+2. Browse published community MCP services — filter by name, tags, or transport type
+3. Click a service to view details, then click **Import** to add it to your service list
+
+---
+
+## 📋 Imported Services
+
+The **Imported Services** tab displays all MCP services accessed by the current tenant as cards. View, edit, monitor, and publish your services here.
+
+### View & Filter
+
+Each service card shows:
+
+- Service name and description
+- Source indicator (Custom / Registry / Community)
+- Enable / Disable toggle
+- Tags
+
+Use the filter bar at the top to filter by **Source**, **Transport Type**, and **Tags**, or use the search box to quickly locate services by name.
+
+### Edit Service Details
+
+Click any service card to open the detail modal, where you can:
+
+- **Edit basic info**: Modify name, description, URL, Authorization Token, and tags
+- **Enable / Disable**: Toggle the service on or off — tools from a disabled service will not appear in agent tool selection
+- **Delete**: Remove the MCP service record — containerized services will also have their container resources cleaned up
+
+### View Tool List
+
+In the service detail modal, click **Tool List** to view all tools provided by this MCP service.
+
+### Health Check
+
+Click the **Health Check** button in the detail modal to test the connection to the MCP service. Possible statuses:
+
+- **Healthy**: The service is reachable
+- **Unhealthy**: The service cannot be reached or responded abnormally
+- **Unchecked**: A health check has not been performed yet
+
+### Container Management
+
+For containerized MCP services, the detail modal also provides:
+
+- **View Container Logs**: Real-time logs from the running container for troubleshooting
+- **View Container Config**: The configuration JSON used when creating the container
+
+### Publish to Community
+
+In the service detail modal, click **Publish to Community**:
+
+1. Review or edit the publication info (name, description, tags, etc.)
+2. Click **Confirm Publish** — the service will be published to the community
+3. Other users can then browse and import it from the **Community Market** tab in the add dialog
+
+---
+
+## 🌐 Published Services
+
+The **Published Services** tab shows all MCP services you have published to the community. Manage your published content here.
+
+Each card shows the service name, description, version, and tags. Filter by name, tags, and transport type.
+
+Click a service card to view details, where you can:
+
+- **Edit published service**: Modify the published service's name, description, and tags
+- **Delete published service**: Withdraw the service from the community — it will no longer be visible to other users
+
+---
+
+## 🔗 Integrating with Agents
+
+Once an MCP service is added, its tools are automatically synced to the agent tool selection list. When configuring an agent on the **[Agent Development](./agent-development)** page:
+
+1. In the **Select Agent Tools** tab, locate the corresponding MCP service group
+2. Click a tool name to enable it
+3. Click ⚙️ to view the tool description and configure its parameters
+
+## 🚀 Next Steps
+
+After configuring MCP services, we recommend:
+
+1. **[Agent Development](./agent-development)** — Assign MCP tools to your agents
+2. **[Agent Space](./agent-space)** — View collaboration between agents and MCP services
+3. **[Start Chat](./start-chat)** — Experience agents calling MCP tools in conversations
+
+If you encounter any issues, please refer to our **[FAQ](../quick-start/faq)** or ask for support in [GitHub Discussions](https://github.com/ModelEngine-Group/nexent/discussions).
diff --git a/doc/docs/en/user-guide/skills.md b/doc/docs/en/user-guide/skills.md
new file mode 100644
index 000000000..0cdc2a288
--- /dev/null
+++ b/doc/docs/en/user-guide/skills.md
@@ -0,0 +1,572 @@
+---
+title: Skill Management
+---
+
+# Skill Management
+
+A Skill is a core mechanism in Nexent for extending agent capabilities. Each skill packages multiple tools with usage documentation into a reusable unit of capability, enabling agents to handle complex tasks like assembling building blocks — without consuming excessive context space.
+
+## Table of Contents
+
+- [Skills vs. Tools](#-skills-vs-tools): Understanding the core concepts
+- [Using Skills](#-using-skills): How to use skills in agent development
+- [Skill Management](#-skill-management): Create, edit, import, and export skills
+- [Skill Upload Guide](#-skill-upload-guide): SKILL.md format, ZIP structure, special tags, and writing standards
+- [NL-to-Skill](#-nl-to-skill): Automatically generate skills from natural language descriptions
+- [Official Skills Overview](#-official-skills-overview): Built-in skills and their capabilities
+
+## The Relationship Between Skills and Tools
+
+In Nexent, **Tools** and **Skills** are two distinct layers. Understanding their differences helps you configure agent capabilities more effectively.
+
+A **Tool** is a single atomic operation the agent can call, such as `read_file` or `tavily_search`. When a tool is enabled for an agent, the LLM searches through the tool list on every turn — meaning even if a tool is completely unnecessary for the current conversation, the LLM still consumes context tokens to "see" it.
+
+A **Skill** bundles the capabilities of multiple tools into a complete workflow, complete with parameter configuration and usage documentation via `SKILL.md`. The LLM does not need to "see" all tools in advance. Based on the user's actual needs, it decides whether to activate a skill. Only when activated does the system load the corresponding toolset — effectively saving Token consumption.
+
+| Dimension | Tool | Skill |
+|-----------|------|-------|
+| Granularity | Single atomic operation | Bundle of multiple tools + configuration + documentation |
+| Token consumption | Occupies context on every turn | Loaded only when activated |
+| Parameters | Fixed parameter schema | Customizable parameter templates |
+| Versioning | No version management | Supports draft/published versions |
+| Distribution | Code-level | ZIP package distribution, plug-and-play |
+
+**Analogy**: Tools are individual items like a screwdriver, hammer, or saw. A Skill is a toolbox — with tools pre-matched for a work scenario and accompanied by usage instructions. Open the right toolbox for the task at hand.
+
+## Using Skills
+
+### Configuring Skills for an Agent
+
+1. Open the **[Agent Development](./agent-development)** page
+2. On the "Select Tools" tab, find the **Skills** group
+3. Click a skill name to select it; click again to deselect
+4. After selecting a skill, click the ⚙️ button next to it to configure skill parameters
+5. Save the agent configuration
+
+
- {/* Model tag - only show when model is not "unknown" */}
+{/* Model tag - only show when model is not "unknown" */}
{kb.embeddingModel !== "unknown" && (
= ({
})}
)}
+ {kb.is_multimodal &&
+ hasIndexedDocumentsAndChunks(kb) && (
+
+ multimodal
+
+ )}
+ {isModelMismatch(kb) && (
+
+ {t("knowledgeBase.tag.modelMismatch")}
+
+ )}
{/* User group tags - only show when not PRIVATE */}
@@ -592,6 +638,13 @@ const KnowledgeBaseList: React.FC = ({
))}
+ {kb.preserve_source_file === false && (
+
+ {t("knowledgeBase.tag.noPreserveSourceFile")}
+
+ )}
>
)}